Fix change log
[official-gcc.git] / gcc / config / sh / lib1funcs.asm
blob2f0ca16cd91937963a711b37930710552517b6d8
1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2 2004, 2005, 2006, 2009
3 Free Software Foundation, Inc.
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
25 !! libgcc routines for the Renesas / SuperH SH CPUs.
26 !! Contributed by Steve Chamberlain.
27 !! sac@cygnus.com
29 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
30 !! recoded in assembly by Toshiyasu Morita
31 !! tm@netcom.com
33 #if defined(__ELF__) && defined(__linux__)
34 .section .note.GNU-stack,"",%progbits
35 .previous
36 #endif
38 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
39 ELF local label prefixes by J"orn Rennecke
40 amylaar@cygnus.com */
42 #include "lib1funcs.h"
44 /* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
45 so it is more convenient to define NO_FPSCR_VALUES here than to
46 define it on the command line. */
47 #if defined __vxworks && defined __PIC__
48 #define NO_FPSCR_VALUES
49 #endif
51 #if ! __SH5__
52 #ifdef L_ashiftrt
53 .global GLOBAL(ashiftrt_r4_0)
54 .global GLOBAL(ashiftrt_r4_1)
55 .global GLOBAL(ashiftrt_r4_2)
56 .global GLOBAL(ashiftrt_r4_3)
57 .global GLOBAL(ashiftrt_r4_4)
58 .global GLOBAL(ashiftrt_r4_5)
59 .global GLOBAL(ashiftrt_r4_6)
60 .global GLOBAL(ashiftrt_r4_7)
61 .global GLOBAL(ashiftrt_r4_8)
62 .global GLOBAL(ashiftrt_r4_9)
63 .global GLOBAL(ashiftrt_r4_10)
64 .global GLOBAL(ashiftrt_r4_11)
65 .global GLOBAL(ashiftrt_r4_12)
66 .global GLOBAL(ashiftrt_r4_13)
67 .global GLOBAL(ashiftrt_r4_14)
68 .global GLOBAL(ashiftrt_r4_15)
69 .global GLOBAL(ashiftrt_r4_16)
70 .global GLOBAL(ashiftrt_r4_17)
71 .global GLOBAL(ashiftrt_r4_18)
72 .global GLOBAL(ashiftrt_r4_19)
73 .global GLOBAL(ashiftrt_r4_20)
74 .global GLOBAL(ashiftrt_r4_21)
75 .global GLOBAL(ashiftrt_r4_22)
76 .global GLOBAL(ashiftrt_r4_23)
77 .global GLOBAL(ashiftrt_r4_24)
78 .global GLOBAL(ashiftrt_r4_25)
79 .global GLOBAL(ashiftrt_r4_26)
80 .global GLOBAL(ashiftrt_r4_27)
81 .global GLOBAL(ashiftrt_r4_28)
82 .global GLOBAL(ashiftrt_r4_29)
83 .global GLOBAL(ashiftrt_r4_30)
84 .global GLOBAL(ashiftrt_r4_31)
85 .global GLOBAL(ashiftrt_r4_32)
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
117 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
118 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
119 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
121 .align 1
122 GLOBAL(ashiftrt_r4_32):
123 GLOBAL(ashiftrt_r4_31):
124 rotcl r4
126 subc r4,r4
128 GLOBAL(ashiftrt_r4_30):
129 shar r4
130 GLOBAL(ashiftrt_r4_29):
131 shar r4
132 GLOBAL(ashiftrt_r4_28):
133 shar r4
134 GLOBAL(ashiftrt_r4_27):
135 shar r4
136 GLOBAL(ashiftrt_r4_26):
137 shar r4
138 GLOBAL(ashiftrt_r4_25):
139 shar r4
140 GLOBAL(ashiftrt_r4_24):
141 shlr16 r4
142 shlr8 r4
144 exts.b r4,r4
146 GLOBAL(ashiftrt_r4_23):
147 shar r4
148 GLOBAL(ashiftrt_r4_22):
149 shar r4
150 GLOBAL(ashiftrt_r4_21):
151 shar r4
152 GLOBAL(ashiftrt_r4_20):
153 shar r4
154 GLOBAL(ashiftrt_r4_19):
155 shar r4
156 GLOBAL(ashiftrt_r4_18):
157 shar r4
158 GLOBAL(ashiftrt_r4_17):
159 shar r4
160 GLOBAL(ashiftrt_r4_16):
161 shlr16 r4
163 exts.w r4,r4
165 GLOBAL(ashiftrt_r4_15):
166 shar r4
167 GLOBAL(ashiftrt_r4_14):
168 shar r4
169 GLOBAL(ashiftrt_r4_13):
170 shar r4
171 GLOBAL(ashiftrt_r4_12):
172 shar r4
173 GLOBAL(ashiftrt_r4_11):
174 shar r4
175 GLOBAL(ashiftrt_r4_10):
176 shar r4
177 GLOBAL(ashiftrt_r4_9):
178 shar r4
179 GLOBAL(ashiftrt_r4_8):
180 shar r4
181 GLOBAL(ashiftrt_r4_7):
182 shar r4
183 GLOBAL(ashiftrt_r4_6):
184 shar r4
185 GLOBAL(ashiftrt_r4_5):
186 shar r4
187 GLOBAL(ashiftrt_r4_4):
188 shar r4
189 GLOBAL(ashiftrt_r4_3):
190 shar r4
191 GLOBAL(ashiftrt_r4_2):
192 shar r4
193 GLOBAL(ashiftrt_r4_1):
195 shar r4
197 GLOBAL(ashiftrt_r4_0):
201 ENDFUNC(GLOBAL(ashiftrt_r4_0))
202 ENDFUNC(GLOBAL(ashiftrt_r4_1))
203 ENDFUNC(GLOBAL(ashiftrt_r4_2))
204 ENDFUNC(GLOBAL(ashiftrt_r4_3))
205 ENDFUNC(GLOBAL(ashiftrt_r4_4))
206 ENDFUNC(GLOBAL(ashiftrt_r4_5))
207 ENDFUNC(GLOBAL(ashiftrt_r4_6))
208 ENDFUNC(GLOBAL(ashiftrt_r4_7))
209 ENDFUNC(GLOBAL(ashiftrt_r4_8))
210 ENDFUNC(GLOBAL(ashiftrt_r4_9))
211 ENDFUNC(GLOBAL(ashiftrt_r4_10))
212 ENDFUNC(GLOBAL(ashiftrt_r4_11))
213 ENDFUNC(GLOBAL(ashiftrt_r4_12))
214 ENDFUNC(GLOBAL(ashiftrt_r4_13))
215 ENDFUNC(GLOBAL(ashiftrt_r4_14))
216 ENDFUNC(GLOBAL(ashiftrt_r4_15))
217 ENDFUNC(GLOBAL(ashiftrt_r4_16))
218 ENDFUNC(GLOBAL(ashiftrt_r4_17))
219 ENDFUNC(GLOBAL(ashiftrt_r4_18))
220 ENDFUNC(GLOBAL(ashiftrt_r4_19))
221 ENDFUNC(GLOBAL(ashiftrt_r4_20))
222 ENDFUNC(GLOBAL(ashiftrt_r4_21))
223 ENDFUNC(GLOBAL(ashiftrt_r4_22))
224 ENDFUNC(GLOBAL(ashiftrt_r4_23))
225 ENDFUNC(GLOBAL(ashiftrt_r4_24))
226 ENDFUNC(GLOBAL(ashiftrt_r4_25))
227 ENDFUNC(GLOBAL(ashiftrt_r4_26))
228 ENDFUNC(GLOBAL(ashiftrt_r4_27))
229 ENDFUNC(GLOBAL(ashiftrt_r4_28))
230 ENDFUNC(GLOBAL(ashiftrt_r4_29))
231 ENDFUNC(GLOBAL(ashiftrt_r4_30))
232 ENDFUNC(GLOBAL(ashiftrt_r4_31))
233 ENDFUNC(GLOBAL(ashiftrt_r4_32))
234 #endif
236 #ifdef L_ashiftrt_n
239 ! GLOBAL(ashrsi3)
241 ! Entry:
243 ! r4: Value to shift
244 ! r5: Shifts
246 ! Exit:
248 ! r0: Result
250 ! Destroys:
252 ! (none)
255 .global GLOBAL(ashrsi3)
256 HIDDEN_FUNC(GLOBAL(ashrsi3))
257 .align 2
258 GLOBAL(ashrsi3):
259 mov #31,r0
260 and r0,r5
261 mova LOCAL(ashrsi3_table),r0
262 mov.b @(r0,r5),r5
263 #ifdef __sh1__
264 add r5,r0
265 jmp @r0
266 #else
267 braf r5
268 #endif
269 mov r4,r0
271 .align 2
272 LOCAL(ashrsi3_table):
273 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
303 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
304 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
306 LOCAL(ashrsi3_31):
307 rotcl r0
309 subc r0,r0
311 LOCAL(ashrsi3_30):
312 shar r0
313 LOCAL(ashrsi3_29):
314 shar r0
315 LOCAL(ashrsi3_28):
316 shar r0
317 LOCAL(ashrsi3_27):
318 shar r0
319 LOCAL(ashrsi3_26):
320 shar r0
321 LOCAL(ashrsi3_25):
322 shar r0
323 LOCAL(ashrsi3_24):
324 shlr16 r0
325 shlr8 r0
327 exts.b r0,r0
329 LOCAL(ashrsi3_23):
330 shar r0
331 LOCAL(ashrsi3_22):
332 shar r0
333 LOCAL(ashrsi3_21):
334 shar r0
335 LOCAL(ashrsi3_20):
336 shar r0
337 LOCAL(ashrsi3_19):
338 shar r0
339 LOCAL(ashrsi3_18):
340 shar r0
341 LOCAL(ashrsi3_17):
342 shar r0
343 LOCAL(ashrsi3_16):
344 shlr16 r0
346 exts.w r0,r0
348 LOCAL(ashrsi3_15):
349 shar r0
350 LOCAL(ashrsi3_14):
351 shar r0
352 LOCAL(ashrsi3_13):
353 shar r0
354 LOCAL(ashrsi3_12):
355 shar r0
356 LOCAL(ashrsi3_11):
357 shar r0
358 LOCAL(ashrsi3_10):
359 shar r0
360 LOCAL(ashrsi3_9):
361 shar r0
362 LOCAL(ashrsi3_8):
363 shar r0
364 LOCAL(ashrsi3_7):
365 shar r0
366 LOCAL(ashrsi3_6):
367 shar r0
368 LOCAL(ashrsi3_5):
369 shar r0
370 LOCAL(ashrsi3_4):
371 shar r0
372 LOCAL(ashrsi3_3):
373 shar r0
374 LOCAL(ashrsi3_2):
375 shar r0
376 LOCAL(ashrsi3_1):
378 shar r0
380 LOCAL(ashrsi3_0):
384 ENDFUNC(GLOBAL(ashrsi3))
385 #endif
387 #ifdef L_ashiftlt
390 ! GLOBAL(ashlsi3)
392 ! Entry:
394 ! r4: Value to shift
395 ! r5: Shifts
397 ! Exit:
399 ! r0: Result
401 ! Destroys:
403 ! (none)
405 .global GLOBAL(ashlsi3)
406 HIDDEN_FUNC(GLOBAL(ashlsi3))
407 .align 2
408 GLOBAL(ashlsi3):
409 mov #31,r0
410 and r0,r5
411 mova LOCAL(ashlsi3_table),r0
412 mov.b @(r0,r5),r5
413 #ifdef __sh1__
414 add r5,r0
415 jmp @r0
416 #else
417 braf r5
418 #endif
419 mov r4,r0
421 .align 2
422 LOCAL(ashlsi3_table):
423 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
424 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
425 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
426 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
427 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
428 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
429 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
430 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
431 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
432 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
433 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
434 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
435 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
436 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
437 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
438 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
450 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
451 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
452 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
453 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
454 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
456 LOCAL(ashlsi3_6):
457 shll2 r0
458 LOCAL(ashlsi3_4):
459 shll2 r0
460 LOCAL(ashlsi3_2):
462 shll2 r0
464 LOCAL(ashlsi3_7):
465 shll2 r0
466 LOCAL(ashlsi3_5):
467 shll2 r0
468 LOCAL(ashlsi3_3):
469 shll2 r0
470 LOCAL(ashlsi3_1):
472 shll r0
474 LOCAL(ashlsi3_14):
475 shll2 r0
476 LOCAL(ashlsi3_12):
477 shll2 r0
478 LOCAL(ashlsi3_10):
479 shll2 r0
480 LOCAL(ashlsi3_8):
482 shll8 r0
484 LOCAL(ashlsi3_15):
485 shll2 r0
486 LOCAL(ashlsi3_13):
487 shll2 r0
488 LOCAL(ashlsi3_11):
489 shll2 r0
490 LOCAL(ashlsi3_9):
491 shll8 r0
493 shll r0
495 LOCAL(ashlsi3_22):
496 shll2 r0
497 LOCAL(ashlsi3_20):
498 shll2 r0
499 LOCAL(ashlsi3_18):
500 shll2 r0
501 LOCAL(ashlsi3_16):
503 shll16 r0
505 LOCAL(ashlsi3_23):
506 shll2 r0
507 LOCAL(ashlsi3_21):
508 shll2 r0
509 LOCAL(ashlsi3_19):
510 shll2 r0
511 LOCAL(ashlsi3_17):
512 shll16 r0
514 shll r0
516 LOCAL(ashlsi3_30):
517 shll2 r0
518 LOCAL(ashlsi3_28):
519 shll2 r0
520 LOCAL(ashlsi3_26):
521 shll2 r0
522 LOCAL(ashlsi3_24):
523 shll16 r0
525 shll8 r0
527 LOCAL(ashlsi3_31):
528 shll2 r0
529 LOCAL(ashlsi3_29):
530 shll2 r0
531 LOCAL(ashlsi3_27):
532 shll2 r0
533 LOCAL(ashlsi3_25):
534 shll16 r0
535 shll8 r0
537 shll r0
539 LOCAL(ashlsi3_0):
543 ENDFUNC(GLOBAL(ashlsi3))
544 #endif
546 #ifdef L_lshiftrt
549 ! GLOBAL(lshrsi3)
551 ! Entry:
553 ! r4: Value to shift
554 ! r5: Shifts
556 ! Exit:
558 ! r0: Result
560 ! Destroys:
562 ! (none)
564 .global GLOBAL(lshrsi3)
565 HIDDEN_FUNC(GLOBAL(lshrsi3))
566 .align 2
567 GLOBAL(lshrsi3):
568 mov #31,r0
569 and r0,r5
570 mova LOCAL(lshrsi3_table),r0
571 mov.b @(r0,r5),r5
572 #ifdef __sh1__
573 add r5,r0
574 jmp @r0
575 #else
576 braf r5
577 #endif
578 mov r4,r0
580 .align 2
581 LOCAL(lshrsi3_table):
582 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
583 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
584 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
585 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
586 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
587 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
588 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
589 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
590 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
591 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
592 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
593 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
594 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
595 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
596 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
597 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
609 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
610 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
611 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
612 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
613 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
615 LOCAL(lshrsi3_6):
616 shlr2 r0
617 LOCAL(lshrsi3_4):
618 shlr2 r0
619 LOCAL(lshrsi3_2):
621 shlr2 r0
623 LOCAL(lshrsi3_7):
624 shlr2 r0
625 LOCAL(lshrsi3_5):
626 shlr2 r0
627 LOCAL(lshrsi3_3):
628 shlr2 r0
629 LOCAL(lshrsi3_1):
631 shlr r0
633 LOCAL(lshrsi3_14):
634 shlr2 r0
635 LOCAL(lshrsi3_12):
636 shlr2 r0
637 LOCAL(lshrsi3_10):
638 shlr2 r0
639 LOCAL(lshrsi3_8):
641 shlr8 r0
643 LOCAL(lshrsi3_15):
644 shlr2 r0
645 LOCAL(lshrsi3_13):
646 shlr2 r0
647 LOCAL(lshrsi3_11):
648 shlr2 r0
649 LOCAL(lshrsi3_9):
650 shlr8 r0
652 shlr r0
654 LOCAL(lshrsi3_22):
655 shlr2 r0
656 LOCAL(lshrsi3_20):
657 shlr2 r0
658 LOCAL(lshrsi3_18):
659 shlr2 r0
660 LOCAL(lshrsi3_16):
662 shlr16 r0
664 LOCAL(lshrsi3_23):
665 shlr2 r0
666 LOCAL(lshrsi3_21):
667 shlr2 r0
668 LOCAL(lshrsi3_19):
669 shlr2 r0
670 LOCAL(lshrsi3_17):
671 shlr16 r0
673 shlr r0
675 LOCAL(lshrsi3_30):
676 shlr2 r0
677 LOCAL(lshrsi3_28):
678 shlr2 r0
679 LOCAL(lshrsi3_26):
680 shlr2 r0
681 LOCAL(lshrsi3_24):
682 shlr16 r0
684 shlr8 r0
686 LOCAL(lshrsi3_31):
687 shlr2 r0
688 LOCAL(lshrsi3_29):
689 shlr2 r0
690 LOCAL(lshrsi3_27):
691 shlr2 r0
692 LOCAL(lshrsi3_25):
693 shlr16 r0
694 shlr8 r0
696 shlr r0
698 LOCAL(lshrsi3_0):
702 ENDFUNC(GLOBAL(lshrsi3))
703 #endif
705 #ifdef L_movmem
706 .text
707 .balign 4
708 .global GLOBAL(movmem)
709 HIDDEN_FUNC(GLOBAL(movmem))
710 HIDDEN_ALIAS(movstr,movmem)
711 /* This would be a lot simpler if r6 contained the byte count
712 minus 64, and we wouldn't be called here for a byte count of 64. */
713 GLOBAL(movmem):
714 sts.l pr,@-r15
715 shll2 r6
716 bsr GLOBAL(movmemSI52+2)
717 mov.l @(48,r5),r0
718 .balign 4
719 LOCAL(movmem_loop): /* Reached with rts */
720 mov.l @(60,r5),r0
721 add #-64,r6
722 mov.l r0,@(60,r4)
723 tst r6,r6
724 mov.l @(56,r5),r0
725 bt LOCAL(movmem_done)
726 mov.l r0,@(56,r4)
727 cmp/pl r6
728 mov.l @(52,r5),r0
729 add #64,r5
730 mov.l r0,@(52,r4)
731 add #64,r4
732 bt GLOBAL(movmemSI52)
733 ! done all the large groups, do the remainder
734 ! jump to movmem+
735 mova GLOBAL(movmemSI4)+4,r0
736 add r6,r0
737 jmp @r0
738 LOCAL(movmem_done): ! share slot insn, works out aligned.
739 lds.l @r15+,pr
740 mov.l r0,@(56,r4)
741 mov.l @(52,r5),r0
743 mov.l r0,@(52,r4)
744 .balign 4
745 ! ??? We need aliases movstr* for movmem* for the older libraries. These
746 ! aliases will be removed at the some point in the future.
747 .global GLOBAL(movmemSI64)
748 HIDDEN_FUNC(GLOBAL(movmemSI64))
749 HIDDEN_ALIAS(movstrSI64,movmemSI64)
750 GLOBAL(movmemSI64):
751 mov.l @(60,r5),r0
752 mov.l r0,@(60,r4)
753 .global GLOBAL(movmemSI60)
754 HIDDEN_FUNC(GLOBAL(movmemSI60))
755 HIDDEN_ALIAS(movstrSI60,movmemSI60)
756 GLOBAL(movmemSI60):
757 mov.l @(56,r5),r0
758 mov.l r0,@(56,r4)
759 .global GLOBAL(movmemSI56)
760 HIDDEN_FUNC(GLOBAL(movmemSI56))
761 HIDDEN_ALIAS(movstrSI56,movmemSI56)
762 GLOBAL(movmemSI56):
763 mov.l @(52,r5),r0
764 mov.l r0,@(52,r4)
765 .global GLOBAL(movmemSI52)
766 HIDDEN_FUNC(GLOBAL(movmemSI52))
767 HIDDEN_ALIAS(movstrSI52,movmemSI52)
768 GLOBAL(movmemSI52):
769 mov.l @(48,r5),r0
770 mov.l r0,@(48,r4)
771 .global GLOBAL(movmemSI48)
772 HIDDEN_FUNC(GLOBAL(movmemSI48))
773 HIDDEN_ALIAS(movstrSI48,movmemSI48)
774 GLOBAL(movmemSI48):
775 mov.l @(44,r5),r0
776 mov.l r0,@(44,r4)
777 .global GLOBAL(movmemSI44)
778 HIDDEN_FUNC(GLOBAL(movmemSI44))
779 HIDDEN_ALIAS(movstrSI44,movmemSI44)
780 GLOBAL(movmemSI44):
781 mov.l @(40,r5),r0
782 mov.l r0,@(40,r4)
783 .global GLOBAL(movmemSI40)
784 HIDDEN_FUNC(GLOBAL(movmemSI40))
785 HIDDEN_ALIAS(movstrSI40,movmemSI40)
786 GLOBAL(movmemSI40):
787 mov.l @(36,r5),r0
788 mov.l r0,@(36,r4)
789 .global GLOBAL(movmemSI36)
790 HIDDEN_FUNC(GLOBAL(movmemSI36))
791 HIDDEN_ALIAS(movstrSI36,movmemSI36)
792 GLOBAL(movmemSI36):
793 mov.l @(32,r5),r0
794 mov.l r0,@(32,r4)
795 .global GLOBAL(movmemSI32)
796 HIDDEN_FUNC(GLOBAL(movmemSI32))
797 HIDDEN_ALIAS(movstrSI32,movmemSI32)
798 GLOBAL(movmemSI32):
799 mov.l @(28,r5),r0
800 mov.l r0,@(28,r4)
801 .global GLOBAL(movmemSI28)
802 HIDDEN_FUNC(GLOBAL(movmemSI28))
803 HIDDEN_ALIAS(movstrSI28,movmemSI28)
804 GLOBAL(movmemSI28):
805 mov.l @(24,r5),r0
806 mov.l r0,@(24,r4)
807 .global GLOBAL(movmemSI24)
808 HIDDEN_FUNC(GLOBAL(movmemSI24))
809 HIDDEN_ALIAS(movstrSI24,movmemSI24)
810 GLOBAL(movmemSI24):
811 mov.l @(20,r5),r0
812 mov.l r0,@(20,r4)
813 .global GLOBAL(movmemSI20)
814 HIDDEN_FUNC(GLOBAL(movmemSI20))
815 HIDDEN_ALIAS(movstrSI20,movmemSI20)
816 GLOBAL(movmemSI20):
817 mov.l @(16,r5),r0
818 mov.l r0,@(16,r4)
819 .global GLOBAL(movmemSI16)
820 HIDDEN_FUNC(GLOBAL(movmemSI16))
821 HIDDEN_ALIAS(movstrSI16,movmemSI16)
822 GLOBAL(movmemSI16):
823 mov.l @(12,r5),r0
824 mov.l r0,@(12,r4)
825 .global GLOBAL(movmemSI12)
826 HIDDEN_FUNC(GLOBAL(movmemSI12))
827 HIDDEN_ALIAS(movstrSI12,movmemSI12)
828 GLOBAL(movmemSI12):
829 mov.l @(8,r5),r0
830 mov.l r0,@(8,r4)
831 .global GLOBAL(movmemSI8)
832 HIDDEN_FUNC(GLOBAL(movmemSI8))
833 HIDDEN_ALIAS(movstrSI8,movmemSI8)
834 GLOBAL(movmemSI8):
835 mov.l @(4,r5),r0
836 mov.l r0,@(4,r4)
837 .global GLOBAL(movmemSI4)
838 HIDDEN_FUNC(GLOBAL(movmemSI4))
839 HIDDEN_ALIAS(movstrSI4,movmemSI4)
840 GLOBAL(movmemSI4):
841 mov.l @(0,r5),r0
843 mov.l r0,@(0,r4)
845 ENDFUNC(GLOBAL(movmemSI64))
846 ENDFUNC(GLOBAL(movmemSI60))
847 ENDFUNC(GLOBAL(movmemSI56))
848 ENDFUNC(GLOBAL(movmemSI52))
849 ENDFUNC(GLOBAL(movmemSI48))
850 ENDFUNC(GLOBAL(movmemSI44))
851 ENDFUNC(GLOBAL(movmemSI40))
852 ENDFUNC(GLOBAL(movmemSI36))
853 ENDFUNC(GLOBAL(movmemSI32))
854 ENDFUNC(GLOBAL(movmemSI28))
855 ENDFUNC(GLOBAL(movmemSI24))
856 ENDFUNC(GLOBAL(movmemSI20))
857 ENDFUNC(GLOBAL(movmemSI16))
858 ENDFUNC(GLOBAL(movmemSI12))
859 ENDFUNC(GLOBAL(movmemSI8))
860 ENDFUNC(GLOBAL(movmemSI4))
861 ENDFUNC(GLOBAL(movmem))
862 #endif
864 #ifdef L_movmem_i4
865 .text
866 .global GLOBAL(movmem_i4_even)
867 .global GLOBAL(movmem_i4_odd)
868 .global GLOBAL(movmemSI12_i4)
870 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
871 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
872 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
874 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
875 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
876 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
878 .p2align 5
879 L_movmem_2mod4_end:
880 mov.l r0,@(16,r4)
882 mov.l r1,@(20,r4)
884 .p2align 2
886 GLOBAL(movmem_i4_even):
887 mov.l @r5+,r0
888 bra L_movmem_start_even
889 mov.l @r5+,r1
891 GLOBAL(movmem_i4_odd):
892 mov.l @r5+,r1
893 add #-4,r4
894 mov.l @r5+,r2
895 mov.l @r5+,r3
896 mov.l r1,@(4,r4)
897 mov.l r2,@(8,r4)
899 L_movmem_loop:
900 mov.l r3,@(12,r4)
901 dt r6
902 mov.l @r5+,r0
903 bt/s L_movmem_2mod4_end
904 mov.l @r5+,r1
905 add #16,r4
906 L_movmem_start_even:
907 mov.l @r5+,r2
908 mov.l @r5+,r3
909 mov.l r0,@r4
910 dt r6
911 mov.l r1,@(4,r4)
912 bf/s L_movmem_loop
913 mov.l r2,@(8,r4)
915 mov.l r3,@(12,r4)
917 ENDFUNC(GLOBAL(movmem_i4_even))
918 ENDFUNC(GLOBAL(movmem_i4_odd))
920 .p2align 4
921 GLOBAL(movmemSI12_i4):
922 mov.l @r5,r0
923 mov.l @(4,r5),r1
924 mov.l @(8,r5),r2
925 mov.l r0,@r4
926 mov.l r1,@(4,r4)
928 mov.l r2,@(8,r4)
930 ENDFUNC(GLOBAL(movmemSI12_i4))
931 #endif
933 #ifdef L_mulsi3
936 .global GLOBAL(mulsi3)
937 HIDDEN_FUNC(GLOBAL(mulsi3))
939 ! r4 = aabb
940 ! r5 = ccdd
941 ! r0 = aabb*ccdd via partial products
943 ! if aa == 0 and cc = 0
944 ! r0 = bb*dd
946 ! else
947 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
950 GLOBAL(mulsi3):
951 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
952 mov r5,r3 ! r3 = ccdd
953 swap.w r4,r2 ! r2 = bbaa
954 xtrct r2,r3 ! r3 = aacc
955 tst r3,r3 ! msws zero ?
956 bf hiset
957 rts ! yes - then we have the answer
958 sts macl,r0
960 hiset: sts macl,r0 ! r0 = bb*dd
961 mulu.w r2,r5 ! brewing macl = aa*dd
962 sts macl,r1
963 mulu.w r3,r4 ! brewing macl = cc*bb
964 sts macl,r2
965 add r1,r2
966 shll16 r2
968 add r2,r0
970 ENDFUNC(GLOBAL(mulsi3))
971 #endif
972 #endif /* ! __SH5__ */
973 #ifdef L_sdivsi3_i4
974 .title "SH DIVIDE"
975 !! 4 byte integer Divide code for the Renesas SH
976 #ifdef __SH4__
977 !! args in r4 and r5, result in fpul, clobber dr0, dr2
979 .global GLOBAL(sdivsi3_i4)
980 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
981 GLOBAL(sdivsi3_i4):
982 lds r4,fpul
983 float fpul,dr0
984 lds r5,fpul
985 float fpul,dr2
986 fdiv dr2,dr0
988 ftrc dr0,fpul
990 ENDFUNC(GLOBAL(sdivsi3_i4))
991 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
992 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
994 #if ! __SH5__ || __SH5__ == 32
995 #if __SH5__
996 .mode SHcompact
997 #endif
998 .global GLOBAL(sdivsi3_i4)
999 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1000 GLOBAL(sdivsi3_i4):
1001 sts.l fpscr,@-r15
1002 mov #8,r2
1003 swap.w r2,r2
1004 lds r2,fpscr
1005 lds r4,fpul
1006 float fpul,dr0
1007 lds r5,fpul
1008 float fpul,dr2
1009 fdiv dr2,dr0
1010 ftrc dr0,fpul
1012 lds.l @r15+,fpscr
1014 ENDFUNC(GLOBAL(sdivsi3_i4))
1015 #endif /* ! __SH5__ || __SH5__ == 32 */
1016 #endif /* ! __SH4__ */
1017 #endif
1019 #ifdef L_sdivsi3
1020 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1021 sh2e/sh3e code. */
1022 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1024 !! Steve Chamberlain
1025 !! sac@cygnus.com
1029 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1031 .global GLOBAL(sdivsi3)
1032 #if __SHMEDIA__
1033 #if __SH5__ == 32
1034 .section .text..SHmedia32,"ax"
1035 #else
1036 .text
1037 #endif
1038 .align 2
1039 #if 0
1040 /* The assembly code that follows is a hand-optimized version of the C
1041 code that follows. Note that the registers that are modified are
1042 exactly those listed as clobbered in the patterns divsi3_i1 and
1043 divsi3_i1_media.
1045 int __sdivsi3 (i, j)
1046 int i, j;
1048 register unsigned long long r18 asm ("r18");
1049 register unsigned long long r19 asm ("r19");
1050 register unsigned long long r0 asm ("r0") = 0;
1051 register unsigned long long r1 asm ("r1") = 1;
1052 register int r2 asm ("r2") = i >> 31;
1053 register int r3 asm ("r3") = j >> 31;
1055 r2 = r2 ? r2 : r1;
1056 r3 = r3 ? r3 : r1;
1057 r18 = i * r2;
1058 r19 = j * r3;
1059 r2 *= r3;
1061 r19 <<= 31;
1062 r1 <<= 31;
1064 if (r18 >= r19)
1065 r0 |= r1, r18 -= r19;
1066 while (r19 >>= 1, r1 >>= 1);
1068 return r2 * (int)r0;
1071 GLOBAL(sdivsi3):
1072 pt/l LOCAL(sdivsi3_dontadd), tr2
1073 pt/l LOCAL(sdivsi3_loop), tr1
1074 ptabs/l r18, tr0
1075 movi 0, r0
1076 movi 1, r1
1077 shari.l r4, 31, r2
1078 shari.l r5, 31, r3
1079 cmveq r2, r1, r2
1080 cmveq r3, r1, r3
1081 muls.l r4, r2, r18
1082 muls.l r5, r3, r19
1083 muls.l r2, r3, r2
1084 shlli r19, 31, r19
1085 shlli r1, 31, r1
1086 LOCAL(sdivsi3_loop):
1087 bgtu r19, r18, tr2
1088 or r0, r1, r0
1089 sub r18, r19, r18
1090 LOCAL(sdivsi3_dontadd):
1091 shlri r1, 1, r1
1092 shlri r19, 1, r19
1093 bnei r1, 0, tr1
1094 muls.l r0, r2, r0
1095 add.l r0, r63, r0
1096 blink tr0, r63
1097 #elif 0 /* ! 0 */
1098 // inputs: r4,r5
1099 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1100 // result in r0
1101 GLOBAL(sdivsi3):
1102 // can create absolute value without extra latency,
1103 // but dependent on proper sign extension of inputs:
1104 // shari.l r5,31,r2
1105 // xor r5,r2,r20
1106 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1107 shari.l r5,31,r2
1108 ori r2,1,r2
1109 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1110 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1111 shari.l r4,31,r3
1112 nsb r20,r0
1113 shlld r20,r0,r25
1114 shlri r25,48,r25
1115 sub r19,r25,r1
1116 mmulfx.w r1,r1,r2
1117 mshflo.w r1,r63,r1
1118 // If r4 was to be used in-place instead of r21, could use this sequence
1119 // to compute absolute:
1120 // sub r63,r4,r19 // compute absolute value of r4
1121 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1122 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1123 ori r3,1,r3
1124 mmulfx.w r25,r2,r2
1125 sub r19,r0,r0
1126 muls.l r4,r3,r21
1127 msub.w r1,r2,r2
1128 addi r2,-2,r1
1129 mulu.l r21,r1,r19
1130 mmulfx.w r2,r2,r2
1131 shlli r1,15,r1
1132 shlrd r19,r0,r19
1133 mulu.l r19,r20,r3
1134 mmacnfx.wl r25,r2,r1
1135 ptabs r18,tr0
1136 sub r21,r3,r25
1138 mulu.l r25,r1,r2
1139 addi r0,14,r0
1140 xor r4,r5,r18
1141 shlrd r2,r0,r2
1142 mulu.l r2,r20,r3
1143 add r19,r2,r19
1144 shari.l r18,31,r18
1145 sub r25,r3,r25
1147 mulu.l r25,r1,r2
1148 sub r25,r20,r25
1149 add r19,r18,r19
1150 shlrd r2,r0,r2
1151 mulu.l r2,r20,r3
1152 addi r25,1,r25
1153 add r19,r2,r19
1155 cmpgt r25,r3,r25
1156 add.l r19,r25,r0
1157 xor r0,r18,r0
1158 blink tr0,r63
1159 #else /* ! 0 && ! 0 */
1161 // inputs: r4,r5
1162 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1163 // result in r0
1164 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1165 #ifndef __pic__
1166 FUNC(GLOBAL(sdivsi3))
1167 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1168 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1169 // with the SHcompact implementation, which clobbers tr1 / tr2.
1170 .global GLOBAL(sdivsi3_1)
1171 GLOBAL(sdivsi3_1):
1172 .global GLOBAL(div_table_internal)
1173 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1174 shori GLOBAL(div_table_internal) & 65535, r20
1175 #endif
1176 .global GLOBAL(sdivsi3_2)
1177 // div_table in r20
1178 // clobbered: r1,r18,r19,r21,r25,tr0
1179 GLOBAL(sdivsi3_2):
1180 nsb r5, r1
1181 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1182 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1183 ldx.ub r20, r21, r19 // u0.8
1184 shari r25, 32, r25 // normalize to s2.30
1185 shlli r21, 1, r21
1186 muls.l r25, r19, r19 // s2.38
1187 ldx.w r20, r21, r21 // s2.14
1188 ptabs r18, tr0
1189 shari r19, 24, r19 // truncate to s2.14
1190 sub r21, r19, r19 // some 11 bit inverse in s1.14
1191 muls.l r19, r19, r21 // u0.28
1192 sub r63, r1, r1
1193 addi r1, 92, r1
1194 muls.l r25, r21, r18 // s2.58
1195 shlli r19, 45, r19 // multiply by two and convert to s2.58
1196 /* bubble */
1197 sub r19, r18, r18
1198 shari r18, 28, r18 // some 22 bit inverse in s1.30
1199 muls.l r18, r25, r0 // s2.60
1200 muls.l r18, r4, r25 // s32.30
1201 /* bubble */
1202 shari r0, 16, r19 // s-16.44
1203 muls.l r19, r18, r19 // s-16.74
1204 shari r25, 63, r0
1205 shari r4, 14, r18 // s19.-14
1206 shari r19, 30, r19 // s-16.44
1207 muls.l r19, r18, r19 // s15.30
1208 xor r21, r0, r21 // You could also use the constant 1 << 27.
1209 add r21, r25, r21
1210 sub r21, r19, r21
1211 shard r21, r1, r21
1212 sub r21, r0, r0
1213 blink tr0, r63
1214 #ifndef __pic__
1215 ENDFUNC(GLOBAL(sdivsi3))
1216 #endif
1217 ENDFUNC(GLOBAL(sdivsi3_2))
1218 #endif
1219 #elif defined __SHMEDIA__
1220 /* m5compact-nofpu */
1221 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1222 .mode SHmedia
1223 .section .text..SHmedia32,"ax"
1224 .align 2
1225 FUNC(GLOBAL(sdivsi3))
1226 GLOBAL(sdivsi3):
1227 pt/l LOCAL(sdivsi3_dontsub), tr0
1228 pt/l LOCAL(sdivsi3_loop), tr1
1229 ptabs/l r18,tr2
1230 shari.l r4,31,r18
1231 shari.l r5,31,r19
1232 xor r4,r18,r20
1233 xor r5,r19,r21
1234 sub.l r20,r18,r20
1235 sub.l r21,r19,r21
1236 xor r18,r19,r19
1237 shlli r21,32,r25
1238 addi r25,-1,r21
1239 addz.l r20,r63,r20
1240 LOCAL(sdivsi3_loop):
1241 shlli r20,1,r20
1242 bgeu/u r21,r20,tr0
1243 sub r20,r21,r20
1244 LOCAL(sdivsi3_dontsub):
1245 addi.l r25,-1,r25
1246 bnei r25,-32,tr1
1247 xor r20,r19,r20
1248 sub.l r20,r19,r0
1249 blink tr2,r63
1250 ENDFUNC(GLOBAL(sdivsi3))
1251 #else /* ! __SHMEDIA__ */
1252 FUNC(GLOBAL(sdivsi3))
1253 GLOBAL(sdivsi3):
1254 mov r4,r1
1255 mov r5,r0
1257 tst r0,r0
1258 bt div0
1259 mov #0,r2
1260 div0s r2,r1
1261 subc r3,r3
1262 subc r2,r1
1263 div0s r0,r3
1264 rotcl r1
1265 div1 r0,r3
1266 rotcl r1
1267 div1 r0,r3
1268 rotcl r1
1269 div1 r0,r3
1270 rotcl r1
1271 div1 r0,r3
1272 rotcl r1
1273 div1 r0,r3
1274 rotcl r1
1275 div1 r0,r3
1276 rotcl r1
1277 div1 r0,r3
1278 rotcl r1
1279 div1 r0,r3
1280 rotcl r1
1281 div1 r0,r3
1282 rotcl r1
1283 div1 r0,r3
1284 rotcl r1
1285 div1 r0,r3
1286 rotcl r1
1287 div1 r0,r3
1288 rotcl r1
1289 div1 r0,r3
1290 rotcl r1
1291 div1 r0,r3
1292 rotcl r1
1293 div1 r0,r3
1294 rotcl r1
1295 div1 r0,r3
1296 rotcl r1
1297 div1 r0,r3
1298 rotcl r1
1299 div1 r0,r3
1300 rotcl r1
1301 div1 r0,r3
1302 rotcl r1
1303 div1 r0,r3
1304 rotcl r1
1305 div1 r0,r3
1306 rotcl r1
1307 div1 r0,r3
1308 rotcl r1
1309 div1 r0,r3
1310 rotcl r1
1311 div1 r0,r3
1312 rotcl r1
1313 div1 r0,r3
1314 rotcl r1
1315 div1 r0,r3
1316 rotcl r1
1317 div1 r0,r3
1318 rotcl r1
1319 div1 r0,r3
1320 rotcl r1
1321 div1 r0,r3
1322 rotcl r1
1323 div1 r0,r3
1324 rotcl r1
1325 div1 r0,r3
1326 rotcl r1
1327 div1 r0,r3
1328 rotcl r1
1329 addc r2,r1
1331 mov r1,r0
1334 div0: rts
1335 mov #0,r0
1337 ENDFUNC(GLOBAL(sdivsi3))
1338 #endif /* ! __SHMEDIA__ */
1339 #endif /* ! __SH4__ */
1340 #endif
1341 #ifdef L_udivsi3_i4
1343 .title "SH DIVIDE"
1344 !! 4 byte integer Divide code for the Renesas SH
1345 #ifdef __SH4__
1346 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1347 !! and t bit
1349 .global GLOBAL(udivsi3_i4)
1350 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1351 GLOBAL(udivsi3_i4):
1352 mov #1,r1
1353 cmp/hi r1,r5
1354 bf trivial
1355 rotr r1
1356 xor r1,r4
1357 lds r4,fpul
1358 mova L1,r0
1359 #ifdef FMOVD_WORKS
1360 fmov.d @r0+,dr4
1361 #else
1362 fmov.s @r0+,DR40
1363 fmov.s @r0,DR41
1364 #endif
1365 float fpul,dr0
1366 xor r1,r5
1367 lds r5,fpul
1368 float fpul,dr2
1369 fadd dr4,dr0
1370 fadd dr4,dr2
1371 fdiv dr2,dr0
1373 ftrc dr0,fpul
1375 trivial:
1377 lds r4,fpul
1379 .align 2
1380 #ifdef FMOVD_WORKS
1381 .align 3 ! make double below 8 byte aligned.
1382 #endif
1384 .double 2147483648
1386 ENDFUNC(GLOBAL(udivsi3_i4))
1387 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1388 #if ! __SH5__ || __SH5__ == 32
1389 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1390 .mode SHmedia
1391 .global GLOBAL(udivsi3_i4)
1392 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1393 GLOBAL(udivsi3_i4):
1394 addz.l r4,r63,r20
1395 addz.l r5,r63,r21
1396 fmov.qd r20,dr0
1397 fmov.qd r21,dr32
1398 ptabs r18,tr0
1399 float.qd dr0,dr0
1400 float.qd dr32,dr32
1401 fdiv.d dr0,dr32,dr0
1402 ftrc.dq dr0,dr32
1403 fmov.s fr33,fr32
1404 blink tr0,r63
1406 ENDFUNC(GLOBAL(udivsi3_i4))
1407 #endif /* ! __SH5__ || __SH5__ == 32 */
1408 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1409 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1411 .global GLOBAL(udivsi3_i4)
1412 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1413 GLOBAL(udivsi3_i4):
1414 mov #1,r1
1415 cmp/hi r1,r5
1416 bf trivial
1417 sts.l fpscr,@-r15
1418 mova L1,r0
1419 lds.l @r0+,fpscr
1420 rotr r1
1421 xor r1,r4
1422 lds r4,fpul
1423 #ifdef FMOVD_WORKS
1424 fmov.d @r0+,dr4
1425 #else
1426 fmov.s @r0+,DR40
1427 fmov.s @r0,DR41
1428 #endif
1429 float fpul,dr0
1430 xor r1,r5
1431 lds r5,fpul
1432 float fpul,dr2
1433 fadd dr4,dr0
1434 fadd dr4,dr2
1435 fdiv dr2,dr0
1436 ftrc dr0,fpul
1438 lds.l @r15+,fpscr
1440 #ifdef FMOVD_WORKS
1441 .align 3 ! make double below 8 byte aligned.
1442 #endif
1443 trivial:
1445 lds r4,fpul
1447 .align 2
1449 #ifndef FMOVD_WORKS
1450 .long 0x80000
1451 #else
1452 .long 0x180000
1453 #endif
1454 .double 2147483648
1456 ENDFUNC(GLOBAL(udivsi3_i4))
1457 #endif /* ! __SH4__ */
1458 #endif
1460 #ifdef L_udivsi3
1461 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1462 sh2e/sh3e code. */
1463 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1465 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1466 .global GLOBAL(udivsi3)
1467 HIDDEN_FUNC(GLOBAL(udivsi3))
1469 #if __SHMEDIA__
1470 #if __SH5__ == 32
1471 .section .text..SHmedia32,"ax"
1472 #else
1473 .text
1474 #endif
1475 .align 2
1476 #if 0
1477 /* The assembly code that follows is a hand-optimized version of the C
1478 code that follows. Note that the registers that are modified are
1479 exactly those listed as clobbered in the patterns udivsi3_i1 and
1480 udivsi3_i1_media.
1482 unsigned
1483 __udivsi3 (i, j)
1484 unsigned i, j;
1486 register unsigned long long r0 asm ("r0") = 0;
1487 register unsigned long long r18 asm ("r18") = 1;
1488 register unsigned long long r4 asm ("r4") = i;
1489 register unsigned long long r19 asm ("r19") = j;
1491 r19 <<= 31;
1492 r18 <<= 31;
1494 if (r4 >= r19)
1495 r0 |= r18, r4 -= r19;
1496 while (r19 >>= 1, r18 >>= 1);
1498 return r0;
1501 GLOBAL(udivsi3):
1502 pt/l LOCAL(udivsi3_dontadd), tr2
1503 pt/l LOCAL(udivsi3_loop), tr1
1504 ptabs/l r18, tr0
1505 movi 0, r0
1506 movi 1, r18
1507 addz.l r5, r63, r19
1508 addz.l r4, r63, r4
1509 shlli r19, 31, r19
1510 shlli r18, 31, r18
1511 LOCAL(udivsi3_loop):
1512 bgtu r19, r4, tr2
1513 or r0, r18, r0
1514 sub r4, r19, r4
1515 LOCAL(udivsi3_dontadd):
1516 shlri r18, 1, r18
1517 shlri r19, 1, r19
1518 bnei r18, 0, tr1
1519 blink tr0, r63
1520 #else
1521 GLOBAL(udivsi3):
1522 // inputs: r4,r5
1523 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1524 // result in r0.
1525 addz.l r5,r63,r22
1526 nsb r22,r0
1527 shlld r22,r0,r25
1528 shlri r25,48,r25
1529 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1530 sub r20,r25,r21
1531 mmulfx.w r21,r21,r19
1532 mshflo.w r21,r63,r21
1533 ptabs r18,tr0
1534 mmulfx.w r25,r19,r19
1535 sub r20,r0,r0
1536 /* bubble */
1537 msub.w r21,r19,r19
1538 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1539 before the msub.w, but we need a different value for
1540 r19 to keep errors under control. */
1541 mulu.l r4,r21,r18
1542 mmulfx.w r19,r19,r19
1543 shlli r21,15,r21
1544 shlrd r18,r0,r18
1545 mulu.l r18,r22,r20
1546 mmacnfx.wl r25,r19,r21
1547 /* bubble */
1548 sub r4,r20,r25
1550 mulu.l r25,r21,r19
1551 addi r0,14,r0
1552 /* bubble */
1553 shlrd r19,r0,r19
1554 mulu.l r19,r22,r20
1555 add r18,r19,r18
1556 /* bubble */
1557 sub.l r25,r20,r25
1559 mulu.l r25,r21,r19
1560 addz.l r25,r63,r25
1561 sub r25,r22,r25
1562 shlrd r19,r0,r19
1563 mulu.l r19,r22,r20
1564 addi r25,1,r25
1565 add r18,r19,r18
1567 cmpgt r25,r20,r25
1568 add.l r18,r25,r0
1569 blink tr0,r63
1570 #endif
1571 #elif defined (__SHMEDIA__)
1572 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1573 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1574 So use a short shmedia loop. */
1575 // clobbered: r20,r21,r25,tr0,tr1,tr2
1576 .mode SHmedia
1577 .section .text..SHmedia32,"ax"
1578 .align 2
1579 GLOBAL(udivsi3):
1580 pt/l LOCAL(udivsi3_dontsub), tr0
1581 pt/l LOCAL(udivsi3_loop), tr1
1582 ptabs/l r18,tr2
1583 shlli r5,32,r25
1584 addi r25,-1,r21
1585 addz.l r4,r63,r20
1586 LOCAL(udivsi3_loop):
1587 shlli r20,1,r20
1588 bgeu/u r21,r20,tr0
1589 sub r20,r21,r20
1590 LOCAL(udivsi3_dontsub):
1591 addi.l r25,-1,r25
1592 bnei r25,-32,tr1
1593 add.l r20,r63,r0
1594 blink tr2,r63
1595 #else /* ! defined (__SHMEDIA__) */
1596 LOCAL(div8):
1597 div1 r5,r4
1598 LOCAL(div7):
1599 div1 r5,r4; div1 r5,r4; div1 r5,r4
1600 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1602 LOCAL(divx4):
1603 div1 r5,r4; rotcl r0
1604 div1 r5,r4; rotcl r0
1605 div1 r5,r4; rotcl r0
1606 rts; div1 r5,r4
1608 GLOBAL(udivsi3):
1609 sts.l pr,@-r15
1610 extu.w r5,r0
1611 cmp/eq r5,r0
1612 #ifdef __sh1__
1613 bf LOCAL(large_divisor)
1614 #else
1615 bf/s LOCAL(large_divisor)
1616 #endif
1617 div0u
1618 swap.w r4,r0
1619 shlr16 r4
1620 bsr LOCAL(div8)
1621 shll16 r5
1622 bsr LOCAL(div7)
1623 div1 r5,r4
1624 xtrct r4,r0
1625 xtrct r0,r4
1626 bsr LOCAL(div8)
1627 swap.w r4,r4
1628 bsr LOCAL(div7)
1629 div1 r5,r4
1630 lds.l @r15+,pr
1631 xtrct r4,r0
1632 swap.w r0,r0
1633 rotcl r0
1635 shlr16 r5
1637 LOCAL(large_divisor):
1638 #ifdef __sh1__
1639 div0u
1640 #endif
1641 mov #0,r0
1642 xtrct r4,r0
1643 xtrct r0,r4
1644 bsr LOCAL(divx4)
1645 rotcl r0
1646 bsr LOCAL(divx4)
1647 rotcl r0
1648 bsr LOCAL(divx4)
1649 rotcl r0
1650 bsr LOCAL(divx4)
1651 rotcl r0
1652 lds.l @r15+,pr
1654 rotcl r0
1656 ENDFUNC(GLOBAL(udivsi3))
1657 #endif /* ! __SHMEDIA__ */
1658 #endif /* __SH4__ */
1659 #endif /* L_udivsi3 */
1661 #ifdef L_udivdi3
1662 #ifdef __SHMEDIA__
1663 .mode SHmedia
1664 .section .text..SHmedia32,"ax"
1665 .align 2
1666 .global GLOBAL(udivdi3)
1667 FUNC(GLOBAL(udivdi3))
1668 GLOBAL(udivdi3):
1669 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1670 shlri r3,1,r4
1671 nsb r4,r22
1672 shlld r3,r22,r6
1673 shlri r6,49,r5
1674 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1675 sub r21,r5,r1
1676 mmulfx.w r1,r1,r4
1677 mshflo.w r1,r63,r1
1678 sub r63,r22,r20 // r63 == 64 % 64
1679 mmulfx.w r5,r4,r4
1680 pta LOCAL(large_divisor),tr0
1681 addi r20,32,r9
1682 msub.w r1,r4,r1
1683 madd.w r1,r1,r1
1684 mmulfx.w r1,r1,r4
1685 shlri r6,32,r7
1686 bgt/u r9,r63,tr0 // large_divisor
1687 mmulfx.w r5,r4,r4
1688 shlri r2,32+14,r19
1689 addi r22,-31,r0
1690 msub.w r1,r4,r1
1692 mulu.l r1,r7,r4
1693 addi r1,-3,r5
1694 mulu.l r5,r19,r5
1695 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1696 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1697 the case may be, %0000000000000000 000.11111111111, still */
1698 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1699 mulu.l r5,r3,r8
1700 mshalds.l r1,r21,r1
1701 shari r4,26,r4
1702 shlld r8,r0,r8
1703 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1704 sub r2,r8,r2
1705 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1707 shlri r2,22,r21
1708 mulu.l r21,r1,r21
1709 shlld r5,r0,r8
1710 addi r20,30-22,r0
1711 shlrd r21,r0,r21
1712 mulu.l r21,r3,r5
1713 add r8,r21,r8
1714 mcmpgt.l r21,r63,r21 // See Note 1
1715 addi r20,30,r0
1716 mshfhi.l r63,r21,r21
1717 sub r2,r5,r2
1718 andc r2,r21,r2
1720 /* small divisor: need a third divide step */
1721 mulu.l r2,r1,r7
1722 ptabs r18,tr0
1723 addi r2,1,r2
1724 shlrd r7,r0,r7
1725 mulu.l r7,r3,r5
1726 add r8,r7,r8
1727 sub r2,r3,r2
1728 cmpgt r2,r5,r5
1729 add r8,r5,r2
1730 /* could test r3 here to check for divide by zero. */
1731 blink tr0,r63
1733 LOCAL(large_divisor):
1734 mmulfx.w r5,r4,r4
1735 shlrd r2,r9,r25
1736 shlri r25,32,r8
1737 msub.w r1,r4,r1
1739 mulu.l r1,r7,r4
1740 addi r1,-3,r5
1741 mulu.l r5,r8,r5
1742 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1743 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1744 the case may be, %0000000000000000 000.11111111111, still */
1745 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1746 shlri r5,14-1,r8
1747 mulu.l r8,r7,r5
1748 mshalds.l r1,r21,r1
1749 shari r4,26,r4
1750 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1751 sub r25,r5,r25
1752 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1754 shlri r25,22,r21
1755 mulu.l r21,r1,r21
1756 pta LOCAL(no_lo_adj),tr0
1757 addi r22,32,r0
1758 shlri r21,40,r21
1759 mulu.l r21,r7,r5
1760 add r8,r21,r8
1761 shlld r2,r0,r2
1762 sub r25,r5,r25
1763 bgtu/u r7,r25,tr0 // no_lo_adj
1764 addi r8,1,r8
1765 sub r25,r7,r25
1766 LOCAL(no_lo_adj):
1767 mextr4 r2,r25,r2
1769 /* large_divisor: only needs a few adjustments. */
1770 mulu.l r8,r6,r5
1771 ptabs r18,tr0
1772 /* bubble */
1773 cmpgtu r5,r2,r5
1774 sub r8,r5,r2
1775 blink tr0,r63
1776 ENDFUNC(GLOBAL(udivdi3))
1777 /* Note 1: To shift the result of the second divide stage so that the result
1778 always fits into 32 bits, yet we still reduce the rest sufficiently
1779 would require a lot of instructions to do the shifts just right. Using
1780 the full 64 bit shift result to multiply with the divisor would require
1781 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1782 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1783 know that the rest after taking this partial result into account will
1784 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1785 upper 32 bits of the partial result are nonzero. */
1786 #endif /* __SHMEDIA__ */
1787 #endif /* L_udivdi3 */
1789 #ifdef L_divdi3
1790 #ifdef __SHMEDIA__
1791 .mode SHmedia
1792 .section .text..SHmedia32,"ax"
1793 .align 2
1794 .global GLOBAL(divdi3)
1795 FUNC(GLOBAL(divdi3))
1796 GLOBAL(divdi3):
1797 pta GLOBAL(udivdi3_internal),tr0
1798 shari r2,63,r22
1799 shari r3,63,r23
1800 xor r2,r22,r2
1801 xor r3,r23,r3
1802 sub r2,r22,r2
1803 sub r3,r23,r3
1804 beq/u r22,r23,tr0
1805 ptabs r18,tr1
1806 blink tr0,r18
1807 sub r63,r2,r2
1808 blink tr1,r63
1809 ENDFUNC(GLOBAL(divdi3))
1810 #endif /* __SHMEDIA__ */
1811 #endif /* L_divdi3 */
1813 #ifdef L_umoddi3
1814 #ifdef __SHMEDIA__
1815 .mode SHmedia
1816 .section .text..SHmedia32,"ax"
1817 .align 2
1818 .global GLOBAL(umoddi3)
1819 FUNC(GLOBAL(umoddi3))
1820 GLOBAL(umoddi3):
1821 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1822 shlri r3,1,r4
1823 nsb r4,r22
1824 shlld r3,r22,r6
1825 shlri r6,49,r5
1826 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1827 sub r21,r5,r1
1828 mmulfx.w r1,r1,r4
1829 mshflo.w r1,r63,r1
1830 sub r63,r22,r20 // r63 == 64 % 64
1831 mmulfx.w r5,r4,r4
1832 pta LOCAL(large_divisor),tr0
1833 addi r20,32,r9
1834 msub.w r1,r4,r1
1835 madd.w r1,r1,r1
1836 mmulfx.w r1,r1,r4
1837 shlri r6,32,r7
1838 bgt/u r9,r63,tr0 // large_divisor
1839 mmulfx.w r5,r4,r4
1840 shlri r2,32+14,r19
1841 addi r22,-31,r0
1842 msub.w r1,r4,r1
1844 mulu.l r1,r7,r4
1845 addi r1,-3,r5
1846 mulu.l r5,r19,r5
1847 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1848 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1849 the case may be, %0000000000000000 000.11111111111, still */
1850 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1851 mulu.l r5,r3,r5
1852 mshalds.l r1,r21,r1
1853 shari r4,26,r4
1854 shlld r5,r0,r5
1855 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1856 sub r2,r5,r2
1857 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1859 shlri r2,22,r21
1860 mulu.l r21,r1,r21
1861 addi r20,30-22,r0
1862 /* bubble */ /* could test r3 here to check for divide by zero. */
1863 shlrd r21,r0,r21
1864 mulu.l r21,r3,r5
1865 mcmpgt.l r21,r63,r21 // See Note 1
1866 addi r20,30,r0
1867 mshfhi.l r63,r21,r21
1868 sub r2,r5,r2
1869 andc r2,r21,r2
1871 /* small divisor: need a third divide step */
1872 mulu.l r2,r1,r7
1873 ptabs r18,tr0
1874 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1875 shlrd r7,r0,r7
1876 mulu.l r7,r3,r5
1877 /* bubble */
1878 addi r8,1,r7
1879 cmpgt r7,r5,r7
1880 cmvne r7,r8,r2
1881 sub r2,r5,r2
1882 blink tr0,r63
1884 LOCAL(large_divisor):
1885 mmulfx.w r5,r4,r4
1886 shlrd r2,r9,r25
1887 shlri r25,32,r8
1888 msub.w r1,r4,r1
1890 mulu.l r1,r7,r4
1891 addi r1,-3,r5
1892 mulu.l r5,r8,r5
1893 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1894 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1895 the case may be, %0000000000000000 000.11111111111, still */
1896 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1897 shlri r5,14-1,r8
1898 mulu.l r8,r7,r5
1899 mshalds.l r1,r21,r1
1900 shari r4,26,r4
1901 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1902 sub r25,r5,r25
1903 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1905 shlri r25,22,r21
1906 mulu.l r21,r1,r21
1907 pta LOCAL(no_lo_adj),tr0
1908 addi r22,32,r0
1909 shlri r21,40,r21
1910 mulu.l r21,r7,r5
1911 add r8,r21,r8
1912 shlld r2,r0,r2
1913 sub r25,r5,r25
1914 bgtu/u r7,r25,tr0 // no_lo_adj
1915 addi r8,1,r8
1916 sub r25,r7,r25
1917 LOCAL(no_lo_adj):
1918 mextr4 r2,r25,r2
1920 /* large_divisor: only needs a few adjustments. */
1921 mulu.l r8,r6,r5
1922 ptabs r18,tr0
1923 add r2,r6,r7
1924 cmpgtu r5,r2,r8
1925 cmvne r8,r7,r2
1926 sub r2,r5,r2
1927 shlrd r2,r22,r2
1928 blink tr0,r63
1929 ENDFUNC(GLOBAL(umoddi3))
1930 /* Note 1: To shift the result of the second divide stage so that the result
1931 always fits into 32 bits, yet we still reduce the rest sufficiently
1932 would require a lot of instructions to do the shifts just right. Using
1933 the full 64 bit shift result to multiply with the divisor would require
1934 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1935 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1936 know that the rest after taking this partial result into account will
1937 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1938 upper 32 bits of the partial result are nonzero. */
1939 #endif /* __SHMEDIA__ */
1940 #endif /* L_umoddi3 */
1942 #ifdef L_moddi3
1943 #ifdef __SHMEDIA__
1944 .mode SHmedia
1945 .section .text..SHmedia32,"ax"
1946 .align 2
1947 .global GLOBAL(moddi3)
1948 FUNC(GLOBAL(moddi3))
1949 GLOBAL(moddi3):
1950 pta GLOBAL(umoddi3_internal),tr0
1951 shari r2,63,r22
1952 shari r3,63,r23
1953 xor r2,r22,r2
1954 xor r3,r23,r3
1955 sub r2,r22,r2
1956 sub r3,r23,r3
1957 beq/u r22,r63,tr0
1958 ptabs r18,tr1
1959 blink tr0,r18
1960 sub r63,r2,r2
1961 blink tr1,r63
1962 ENDFUNC(GLOBAL(moddi3))
1963 #endif /* __SHMEDIA__ */
1964 #endif /* L_moddi3 */
1966 #ifdef L_set_fpscr
1967 #if !defined (__SH2A_NOFPU__)
1968 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1969 #ifdef __SH5__
1970 .mode SHcompact
1971 #endif
1972 .global GLOBAL(set_fpscr)
1973 HIDDEN_FUNC(GLOBAL(set_fpscr))
1974 GLOBAL(set_fpscr):
1975 lds r4,fpscr
1976 #ifdef __PIC__
1977 mov.l r12,@-r15
1978 #ifdef __vxworks
1979 mov.l LOCAL(set_fpscr_L0_base),r12
1980 mov.l LOCAL(set_fpscr_L0_index),r0
1981 mov.l @r12,r12
1982 mov.l @(r0,r12),r12
1983 #else
1984 mova LOCAL(set_fpscr_L0),r0
1985 mov.l LOCAL(set_fpscr_L0),r12
1986 add r0,r12
1987 #endif
1988 mov.l LOCAL(set_fpscr_L1),r0
1989 mov.l @(r0,r12),r1
1990 mov.l @r15+,r12
1991 #else
1992 mov.l LOCAL(set_fpscr_L1),r1
1993 #endif
1994 swap.w r4,r0
1995 or #24,r0
1996 #ifndef FMOVD_WORKS
1997 xor #16,r0
1998 #endif
1999 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2000 swap.w r0,r3
2001 mov.l r3,@(4,r1)
2002 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2003 swap.w r0,r2
2004 mov.l r2,@r1
2005 #endif
2006 #ifndef FMOVD_WORKS
2007 xor #8,r0
2008 #else
2009 xor #24,r0
2010 #endif
2011 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2012 swap.w r0,r2
2014 mov.l r2,@r1
2015 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2016 swap.w r0,r3
2018 mov.l r3,@(4,r1)
2019 #endif
2020 .align 2
2021 #ifdef __PIC__
2022 #ifdef __vxworks
2023 LOCAL(set_fpscr_L0_base):
2024 .long ___GOTT_BASE__
2025 LOCAL(set_fpscr_L0_index):
2026 .long ___GOTT_INDEX__
2027 #else
2028 LOCAL(set_fpscr_L0):
2029 .long _GLOBAL_OFFSET_TABLE_
2030 #endif
2031 LOCAL(set_fpscr_L1):
2032 .long GLOBAL(fpscr_values@GOT)
2033 #else
2034 LOCAL(set_fpscr_L1):
2035 .long GLOBAL(fpscr_values)
2036 #endif
2038 ENDFUNC(GLOBAL(set_fpscr))
2039 #ifndef NO_FPSCR_VALUES
2040 #ifdef __ELF__
2041 .comm GLOBAL(fpscr_values),8,4
2042 #else
2043 .comm GLOBAL(fpscr_values),8
2044 #endif /* ELF */
2045 #endif /* NO_FPSCR_VALUES */
2046 #endif /* SH2E / SH3E / SH4 */
2047 #endif /* __SH2A_NOFPU__ */
2048 #endif /* L_set_fpscr */
2049 #ifdef L_ic_invalidate
2050 #if __SH5__ == 32
2051 .mode SHmedia
2052 .section .text..SHmedia32,"ax"
2053 .align 2
2054 .global GLOBAL(init_trampoline)
2055 HIDDEN_FUNC(GLOBAL(init_trampoline))
2056 GLOBAL(init_trampoline):
2057 st.l r0,8,r2
2058 #ifdef __LITTLE_ENDIAN__
2059 movi 9,r20
2060 shori 0x402b,r20
2061 shori 0xd101,r20
2062 shori 0xd002,r20
2063 #else
2064 movi 0xffffffffffffd002,r20
2065 shori 0xd101,r20
2066 shori 0x402b,r20
2067 shori 9,r20
2068 #endif
2069 st.q r0,0,r20
2070 st.l r0,12,r3
2071 ENDFUNC(GLOBAL(init_trampoline))
2072 .global GLOBAL(ic_invalidate)
2073 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2074 GLOBAL(ic_invalidate):
2075 ocbwb r0,0
2076 synco
2077 icbi r0, 0
2078 ptabs r18, tr0
2079 synci
2080 blink tr0, r63
2081 ENDFUNC(GLOBAL(ic_invalidate))
2082 #elif defined(__SH4A__)
2083 .global GLOBAL(ic_invalidate)
2084 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2085 GLOBAL(ic_invalidate):
2086 ocbwb @r4
2087 synco
2088 icbi @r4
2091 ENDFUNC(GLOBAL(ic_invalidate))
2092 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2093 /* For system code, we use ic_invalidate_line_i, but user code
2094 needs a different mechanism. A kernel call is generally not
2095 available, and it would also be slow. Different SH4 variants use
2096 different sizes and associativities of the Icache. We use a small
2097 bit of dispatch code that can be put hidden in every shared object,
2098 which calls the actual processor-specific invalidation code in a
2099 separate module.
2100 Or if you have operating system support, the OS could mmap the
2101 procesor-specific code from a single page, since it is highly
2102 repetitive. */
2103 .global GLOBAL(ic_invalidate)
2104 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2105 GLOBAL(ic_invalidate):
2106 #ifdef __pic__
2107 #ifdef __vxworks
2108 mov.l 1f,r1
2109 mov.l 2f,r0
2110 mov.l @r1,r1
2111 mov.l 0f,r2
2112 mov.l @(r0,r1),r0
2113 #else
2114 mov.l 1f,r1
2115 mova 1f,r0
2116 mov.l 0f,r2
2117 add r1,r0
2118 #endif
2119 mov.l @(r0,r2),r1
2120 #else
2121 mov.l 0f,r1
2122 #endif
2123 ocbwb @r4
2124 mov.l @(8,r1),r0
2125 sub r1,r4
2126 and r4,r0
2127 add r1,r0
2128 jmp @r0
2129 mov.l @(4,r1),r0
2130 .align 2
2131 #ifndef __pic__
2132 0: .long GLOBAL(ic_invalidate_array)
2133 #else /* __pic__ */
2134 .global GLOBAL(ic_invalidate_array)
2135 0: .long GLOBAL(ic_invalidate_array)@GOT
2136 #ifdef __vxworks
2137 1: .long ___GOTT_BASE__
2138 2: .long ___GOTT_INDEX__
2139 #else
2140 1: .long _GLOBAL_OFFSET_TABLE_
2141 #endif
2142 ENDFUNC(GLOBAL(ic_invalidate))
2143 #endif /* __pic__ */
2144 #endif /* SH4 */
2145 #endif /* L_ic_invalidate */
2147 #ifdef L_ic_invalidate_array
2148 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2149 .global GLOBAL(ic_invalidate_array)
2150 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2151 .global GLOBAL(ic_invalidate_array)
2152 FUNC(GLOBAL(ic_invalidate_array))
2153 GLOBAL(ic_invalidate_array):
2154 add r1,r4
2155 synco
2156 icbi @r4
2159 .align 2
2160 .long 0
2161 ENDFUNC(GLOBAL(ic_invalidate_array))
2162 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2163 .global GLOBAL(ic_invalidate_array)
2164 .p2align 5
2165 FUNC(GLOBAL(ic_invalidate_array))
2166 /* This must be aligned to the beginning of a cache line. */
2167 GLOBAL(ic_invalidate_array):
2168 #ifndef WAYS
2169 #define WAYS 4
2170 #define WAY_SIZE 0x4000
2171 #endif
2172 #if WAYS == 1
2173 .rept WAY_SIZE * WAYS / 32
2176 .rept 7
2177 .long WAY_SIZE - 32
2178 .endr
2179 .endr
2180 #elif WAYS <= 6
2181 .rept WAY_SIZE * WAYS / 32
2182 braf r0
2183 add #-8,r0
2184 .long WAY_SIZE + 8
2185 .long WAY_SIZE - 32
2186 .rept WAYS-2
2187 braf r0
2189 .endr
2190 .rept 7 - WAYS
2193 .endr
2194 .endr
2195 #else /* WAYS > 6 */
2196 /* This variant needs two different pages for mmap-ing. */
2197 .rept WAYS-1
2198 .rept WAY_SIZE / 32
2199 braf r0
2201 .long WAY_SIZE
2202 .rept 6
2203 .long WAY_SIZE - 32
2204 .endr
2205 .endr
2206 .endr
2207 .rept WAY_SIZE / 32
2209 .rept 15
2211 .endr
2212 .endr
2213 #endif /* WAYS */
2214 ENDFUNC(GLOBAL(ic_invalidate_array))
2215 #endif /* SH4 */
2216 #endif /* L_ic_invalidate_array */
2218 #if defined (__SH5__) && __SH5__ == 32
2219 #ifdef L_shcompact_call_trampoline
2220 .section .rodata
2221 .align 1
2222 LOCAL(ct_main_table):
2223 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2224 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2225 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2226 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2227 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2228 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2229 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2230 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2231 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2232 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2233 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2234 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2235 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2236 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2237 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2238 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2239 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2240 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2241 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2242 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2243 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2244 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2245 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2246 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2247 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2248 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2249 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2250 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2251 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2252 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2253 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2254 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2255 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2256 .mode SHmedia
2257 .section .text..SHmedia32, "ax"
2258 .align 2
2260 /* This function loads 64-bit general-purpose registers from the
2261 stack, from a memory address contained in them or from an FP
2262 register, according to a cookie passed in r1. Its execution
2263 time is linear on the number of registers that actually have
2264 to be copied. See sh.h for details on the actual bit pattern.
2266 The function to be called is passed in r0. If a 32-bit return
2267 value is expected, the actual function will be tail-called,
2268 otherwise the return address will be stored in r10 (that the
2269 caller should expect to be clobbered) and the return value
2270 will be expanded into r2/r3 upon return. */
2272 .global GLOBAL(GCC_shcompact_call_trampoline)
2273 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2274 GLOBAL(GCC_shcompact_call_trampoline):
2275 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2276 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2277 pt/l LOCAL(ct_loop), tr1
2278 addz.l r1, r63, r1
2279 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2280 LOCAL(ct_loop):
2281 nsb r1, r28
2282 shlli r28, 1, r29
2283 ldx.w r0, r29, r30
2284 LOCAL(ct_main_label):
2285 ptrel/l r30, tr2
2286 blink tr2, r63
2287 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2288 /* It must be dr0, so just do it. */
2289 fmov.dq dr0, r2
2290 movi 7, r30
2291 shlli r30, 29, r31
2292 andc r1, r31, r1
2293 blink tr1, r63
2294 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2295 /* It is either dr0 or dr2. */
2296 movi 7, r30
2297 shlri r1, 26, r32
2298 shlli r30, 26, r31
2299 andc r1, r31, r1
2300 fmov.dq dr0, r3
2301 beqi/l r32, 4, tr1
2302 fmov.dq dr2, r3
2303 blink tr1, r63
2304 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2305 shlri r1, 23 - 3, r34
2306 andi r34, 3 << 3, r33
2307 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2308 LOCAL(ct_r4_fp_base):
2309 ptrel/l r32, tr2
2310 movi 7, r30
2311 shlli r30, 23, r31
2312 andc r1, r31, r1
2313 blink tr2, r63
2314 LOCAL(ct_r4_fp_copy):
2315 fmov.dq dr0, r4
2316 blink tr1, r63
2317 fmov.dq dr2, r4
2318 blink tr1, r63
2319 fmov.dq dr4, r4
2320 blink tr1, r63
2321 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2322 shlri r1, 20 - 3, r34
2323 andi r34, 3 << 3, r33
2324 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2325 LOCAL(ct_r5_fp_base):
2326 ptrel/l r32, tr2
2327 movi 7, r30
2328 shlli r30, 20, r31
2329 andc r1, r31, r1
2330 blink tr2, r63
2331 LOCAL(ct_r5_fp_copy):
2332 fmov.dq dr0, r5
2333 blink tr1, r63
2334 fmov.dq dr2, r5
2335 blink tr1, r63
2336 fmov.dq dr4, r5
2337 blink tr1, r63
2338 fmov.dq dr6, r5
2339 blink tr1, r63
2340 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2341 /* It must be dr8. */
2342 fmov.dq dr8, r6
2343 movi 15, r30
2344 shlli r30, 16, r31
2345 andc r1, r31, r1
2346 blink tr1, r63
2347 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2348 shlri r1, 16 - 3, r34
2349 andi r34, 3 << 3, r33
2350 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2351 LOCAL(ct_r6_fp_base):
2352 ptrel/l r32, tr2
2353 movi 7, r30
2354 shlli r30, 16, r31
2355 andc r1, r31, r1
2356 blink tr2, r63
2357 LOCAL(ct_r6_fp_copy):
2358 fmov.dq dr0, r6
2359 blink tr1, r63
2360 fmov.dq dr2, r6
2361 blink tr1, r63
2362 fmov.dq dr4, r6
2363 blink tr1, r63
2364 fmov.dq dr6, r6
2365 blink tr1, r63
2366 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2367 /* It is either dr8 or dr10. */
2368 movi 15 << 12, r31
2369 shlri r1, 12, r32
2370 andc r1, r31, r1
2371 fmov.dq dr8, r7
2372 beqi/l r32, 8, tr1
2373 fmov.dq dr10, r7
2374 blink tr1, r63
2375 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2376 shlri r1, 12 - 3, r34
2377 andi r34, 3 << 3, r33
2378 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2379 LOCAL(ct_r7_fp_base):
2380 ptrel/l r32, tr2
2381 movi 7 << 12, r31
2382 andc r1, r31, r1
2383 blink tr2, r63
2384 LOCAL(ct_r7_fp_copy):
2385 fmov.dq dr0, r7
2386 blink tr1, r63
2387 fmov.dq dr2, r7
2388 blink tr1, r63
2389 fmov.dq dr4, r7
2390 blink tr1, r63
2391 fmov.dq dr6, r7
2392 blink tr1, r63
2393 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2394 /* It is either dr8 or dr10. */
2395 movi 15 << 8, r31
2396 andi r1, 1 << 8, r32
2397 andc r1, r31, r1
2398 fmov.dq dr8, r8
2399 beq/l r32, r63, tr1
2400 fmov.dq dr10, r8
2401 blink tr1, r63
2402 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2403 shlri r1, 8 - 3, r34
2404 andi r34, 3 << 3, r33
2405 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2406 LOCAL(ct_r8_fp_base):
2407 ptrel/l r32, tr2
2408 movi 7 << 8, r31
2409 andc r1, r31, r1
2410 blink tr2, r63
2411 LOCAL(ct_r8_fp_copy):
2412 fmov.dq dr0, r8
2413 blink tr1, r63
2414 fmov.dq dr2, r8
2415 blink tr1, r63
2416 fmov.dq dr4, r8
2417 blink tr1, r63
2418 fmov.dq dr6, r8
2419 blink tr1, r63
2420 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2421 /* It is either dr8 or dr10. */
2422 movi 15 << 4, r31
2423 andi r1, 1 << 4, r32
2424 andc r1, r31, r1
2425 fmov.dq dr8, r9
2426 beq/l r32, r63, tr1
2427 fmov.dq dr10, r9
2428 blink tr1, r63
2429 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2430 shlri r1, 4 - 3, r34
2431 andi r34, 3 << 3, r33
2432 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2433 LOCAL(ct_r9_fp_base):
2434 ptrel/l r32, tr2
2435 movi 7 << 4, r31
2436 andc r1, r31, r1
2437 blink tr2, r63
2438 LOCAL(ct_r9_fp_copy):
2439 fmov.dq dr0, r9
2440 blink tr1, r63
2441 fmov.dq dr2, r9
2442 blink tr1, r63
2443 fmov.dq dr4, r9
2444 blink tr1, r63
2445 fmov.dq dr6, r9
2446 blink tr1, r63
2447 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2448 pt/l LOCAL(ct_r2_load), tr2
2449 movi 3, r30
2450 shlli r30, 29, r31
2451 and r1, r31, r32
2452 andc r1, r31, r1
2453 beq/l r31, r32, tr2
2454 addi.l r2, 8, r3
2455 ldx.q r2, r63, r2
2456 /* Fall through. */
2457 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2458 pt/l LOCAL(ct_r3_load), tr2
2459 movi 3, r30
2460 shlli r30, 26, r31
2461 and r1, r31, r32
2462 andc r1, r31, r1
2463 beq/l r31, r32, tr2
2464 addi.l r3, 8, r4
2465 ldx.q r3, r63, r3
2466 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2467 pt/l LOCAL(ct_r4_load), tr2
2468 movi 3, r30
2469 shlli r30, 23, r31
2470 and r1, r31, r32
2471 andc r1, r31, r1
2472 beq/l r31, r32, tr2
2473 addi.l r4, 8, r5
2474 ldx.q r4, r63, r4
2475 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2476 pt/l LOCAL(ct_r5_load), tr2
2477 movi 3, r30
2478 shlli r30, 20, r31
2479 and r1, r31, r32
2480 andc r1, r31, r1
2481 beq/l r31, r32, tr2
2482 addi.l r5, 8, r6
2483 ldx.q r5, r63, r5
2484 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2485 pt/l LOCAL(ct_r6_load), tr2
2486 movi 3 << 16, r31
2487 and r1, r31, r32
2488 andc r1, r31, r1
2489 beq/l r31, r32, tr2
2490 addi.l r6, 8, r7
2491 ldx.q r6, r63, r6
2492 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2493 pt/l LOCAL(ct_r7_load), tr2
2494 movi 3 << 12, r31
2495 and r1, r31, r32
2496 andc r1, r31, r1
2497 beq/l r31, r32, tr2
2498 addi.l r7, 8, r8
2499 ldx.q r7, r63, r7
2500 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2501 pt/l LOCAL(ct_r8_load), tr2
2502 movi 3 << 8, r31
2503 and r1, r31, r32
2504 andc r1, r31, r1
2505 beq/l r31, r32, tr2
2506 addi.l r8, 8, r9
2507 ldx.q r8, r63, r8
2508 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2509 pt/l LOCAL(ct_check_tramp), tr2
2510 ldx.q r9, r63, r9
2511 blink tr2, r63
2512 LOCAL(ct_r2_load):
2513 ldx.q r2, r63, r2
2514 blink tr1, r63
2515 LOCAL(ct_r3_load):
2516 ldx.q r3, r63, r3
2517 blink tr1, r63
2518 LOCAL(ct_r4_load):
2519 ldx.q r4, r63, r4
2520 blink tr1, r63
2521 LOCAL(ct_r5_load):
2522 ldx.q r5, r63, r5
2523 blink tr1, r63
2524 LOCAL(ct_r6_load):
2525 ldx.q r6, r63, r6
2526 blink tr1, r63
2527 LOCAL(ct_r7_load):
2528 ldx.q r7, r63, r7
2529 blink tr1, r63
2530 LOCAL(ct_r8_load):
2531 ldx.q r8, r63, r8
2532 blink tr1, r63
2533 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2534 movi 1, r30
2535 ldx.q r15, r63, r2
2536 shlli r30, 29, r31
2537 addi.l r15, 8, r15
2538 andc r1, r31, r1
2539 blink tr1, r63
2540 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2541 movi 1, r30
2542 ldx.q r15, r63, r3
2543 shlli r30, 26, r31
2544 addi.l r15, 8, r15
2545 andc r1, r31, r1
2546 blink tr1, r63
2547 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2548 movi 1, r30
2549 ldx.q r15, r63, r4
2550 shlli r30, 23, r31
2551 addi.l r15, 8, r15
2552 andc r1, r31, r1
2553 blink tr1, r63
2554 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2555 movi 1, r30
2556 ldx.q r15, r63, r5
2557 shlli r30, 20, r31
2558 addi.l r15, 8, r15
2559 andc r1, r31, r1
2560 blink tr1, r63
2561 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2562 movi 1, r30
2563 ldx.q r15, r63, r6
2564 shlli r30, 16, r31
2565 addi.l r15, 8, r15
2566 andc r1, r31, r1
2567 blink tr1, r63
2568 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2569 ldx.q r15, r63, r7
2570 movi 1 << 12, r31
2571 addi.l r15, 8, r15
2572 andc r1, r31, r1
2573 blink tr1, r63
2574 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2575 ldx.q r15, r63, r8
2576 movi 1 << 8, r31
2577 addi.l r15, 8, r15
2578 andc r1, r31, r1
2579 blink tr1, r63
2580 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2581 andi r1, 7 << 1, r30
2582 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2583 shlli r30, 2, r31
2584 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2585 sub.l r32, r31, r33
2586 ptabs/l r33, tr2
2587 blink tr2, r63
2588 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2589 ldx.q r15, r63, r3
2590 addi.l r15, 8, r15
2591 ldx.q r15, r63, r4
2592 addi.l r15, 8, r15
2593 ldx.q r15, r63, r5
2594 addi.l r15, 8, r15
2595 ldx.q r15, r63, r6
2596 addi.l r15, 8, r15
2597 ldx.q r15, r63, r7
2598 addi.l r15, 8, r15
2599 ldx.q r15, r63, r8
2600 addi.l r15, 8, r15
2601 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2602 ldx.q r15, r63, r9
2603 addi.l r15, 8, r15
2604 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2605 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2606 pt/u LOCAL(ct_ret_wide), tr2
2607 andi r1, 1, r1
2608 bne/u r1, r63, tr2
2609 LOCAL(ct_call_func): /* Just branch to the function. */
2610 blink tr0, r63
2611 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2612 64-bit return value. */
2613 add.l r18, r63, r10
2614 blink tr0, r18
2615 ptabs r10, tr0
2616 #if __LITTLE_ENDIAN__
2617 shari r2, 32, r3
2618 add.l r2, r63, r2
2619 #else
2620 add.l r2, r63, r3
2621 shari r2, 32, r2
2622 #endif
2623 blink tr0, r63
2625 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2626 #endif /* L_shcompact_call_trampoline */
2628 #ifdef L_shcompact_return_trampoline
2629 /* This function does the converse of the code in `ret_wide'
2630 above. It is tail-called by SHcompact functions returning
2631 64-bit non-floating-point values, to pack the 32-bit values in
2632 r2 and r3 into r2. */
2634 .mode SHmedia
2635 .section .text..SHmedia32, "ax"
2636 .align 2
2637 .global GLOBAL(GCC_shcompact_return_trampoline)
2638 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2639 GLOBAL(GCC_shcompact_return_trampoline):
2640 ptabs/l r18, tr0
2641 #if __LITTLE_ENDIAN__
2642 addz.l r2, r63, r2
2643 shlli r3, 32, r3
2644 #else
2645 addz.l r3, r63, r3
2646 shlli r2, 32, r2
2647 #endif
2648 or r3, r2, r2
2649 blink tr0, r63
2651 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2652 #endif /* L_shcompact_return_trampoline */
2654 #ifdef L_shcompact_incoming_args
2655 .section .rodata
2656 .align 1
2657 LOCAL(ia_main_table):
2658 .word 1 /* Invalid, just loop */
2659 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2660 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2661 .word 1 /* Invalid, just loop */
2662 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2663 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2664 .word 1 /* Invalid, just loop */
2665 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2666 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2667 .word 1 /* Invalid, just loop */
2668 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2669 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2670 .word 1 /* Invalid, just loop */
2671 .word 1 /* Invalid, just loop */
2672 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2673 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2674 .word 1 /* Invalid, just loop */
2675 .word 1 /* Invalid, just loop */
2676 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2677 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2678 .word 1 /* Invalid, just loop */
2679 .word 1 /* Invalid, just loop */
2680 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2681 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2682 .word 1 /* Invalid, just loop */
2683 .word 1 /* Invalid, just loop */
2684 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2685 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2686 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2687 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2688 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2689 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2690 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2691 .mode SHmedia
2692 .section .text..SHmedia32, "ax"
2693 .align 2
2695 /* This function stores 64-bit general-purpose registers back in
2696 the stack, and loads the address in which each register
2697 was stored into itself. The lower 32 bits of r17 hold the address
2698 to begin storing, and the upper 32 bits of r17 hold the cookie.
2699 Its execution time is linear on the
2700 number of registers that actually have to be copied, and it is
2701 optimized for structures larger than 64 bits, as opposed to
2702 individual `long long' arguments. See sh.h for details on the
2703 actual bit pattern. */
2705 .global GLOBAL(GCC_shcompact_incoming_args)
2706 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2707 GLOBAL(GCC_shcompact_incoming_args):
2708 ptabs/l r18, tr0 /* Prepare to return. */
2709 shlri r17, 32, r0 /* Load the cookie. */
2710 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2711 pt/l LOCAL(ia_loop), tr1
2712 add.l r17, r63, r17
2713 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2714 LOCAL(ia_loop):
2715 nsb r0, r36
2716 shlli r36, 1, r37
2717 ldx.w r43, r37, r38
2718 LOCAL(ia_main_label):
2719 ptrel/l r38, tr2
2720 blink tr2, r63
2721 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2722 movi 3, r38
2723 shlli r38, 29, r39
2724 and r0, r39, r40
2725 andc r0, r39, r0
2726 stx.q r17, r63, r2
2727 add.l r17, r63, r2
2728 addi.l r17, 8, r17
2729 beq/u r39, r40, tr1
2730 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2731 movi 3, r38
2732 shlli r38, 26, r39
2733 and r0, r39, r40
2734 andc r0, r39, r0
2735 stx.q r17, r63, r3
2736 add.l r17, r63, r3
2737 addi.l r17, 8, r17
2738 beq/u r39, r40, tr1
2739 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2740 movi 3, r38
2741 shlli r38, 23, r39
2742 and r0, r39, r40
2743 andc r0, r39, r0
2744 stx.q r17, r63, r4
2745 add.l r17, r63, r4
2746 addi.l r17, 8, r17
2747 beq/u r39, r40, tr1
2748 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2749 movi 3, r38
2750 shlli r38, 20, r39
2751 and r0, r39, r40
2752 andc r0, r39, r0
2753 stx.q r17, r63, r5
2754 add.l r17, r63, r5
2755 addi.l r17, 8, r17
2756 beq/u r39, r40, tr1
2757 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2758 movi 3, r38
2759 shlli r38, 16, r39
2760 and r0, r39, r40
2761 andc r0, r39, r0
2762 stx.q r17, r63, r6
2763 add.l r17, r63, r6
2764 addi.l r17, 8, r17
2765 beq/u r39, r40, tr1
2766 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2767 movi 3 << 12, r39
2768 and r0, r39, r40
2769 andc r0, r39, r0
2770 stx.q r17, r63, r7
2771 add.l r17, r63, r7
2772 addi.l r17, 8, r17
2773 beq/u r39, r40, tr1
2774 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2775 movi 3 << 8, r39
2776 and r0, r39, r40
2777 andc r0, r39, r0
2778 stx.q r17, r63, r8
2779 add.l r17, r63, r8
2780 addi.l r17, 8, r17
2781 beq/u r39, r40, tr1
2782 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2783 stx.q r17, r63, r9
2784 add.l r17, r63, r9
2785 blink tr0, r63
2786 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2787 movi 1, r38
2788 shlli r38, 29, r39
2789 andc r0, r39, r0
2790 stx.q r17, r63, r2
2791 addi.l r17, 8, r17
2792 blink tr1, r63
2793 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2794 movi 1, r38
2795 shlli r38, 26, r39
2796 andc r0, r39, r0
2797 stx.q r17, r63, r3
2798 addi.l r17, 8, r17
2799 blink tr1, r63
2800 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2801 movi 1, r38
2802 shlli r38, 23, r39
2803 andc r0, r39, r0
2804 stx.q r17, r63, r4
2805 addi.l r17, 8, r17
2806 blink tr1, r63
2807 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2808 movi 1, r38
2809 shlli r38, 20, r39
2810 andc r0, r39, r0
2811 stx.q r17, r63, r5
2812 addi.l r17, 8, r17
2813 blink tr1, r63
2814 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2815 movi 1, r38
2816 shlli r38, 16, r39
2817 andc r0, r39, r0
2818 stx.q r17, r63, r6
2819 addi.l r17, 8, r17
2820 blink tr1, r63
2821 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2822 movi 1 << 12, r39
2823 andc r0, r39, r0
2824 stx.q r17, r63, r7
2825 addi.l r17, 8, r17
2826 blink tr1, r63
2827 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2828 movi 1 << 8, r39
2829 andc r0, r39, r0
2830 stx.q r17, r63, r8
2831 addi.l r17, 8, r17
2832 blink tr1, r63
2833 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2834 andi r0, 7 << 1, r38
2835 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2836 shlli r38, 2, r39
2837 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2838 sub.l r40, r39, r41
2839 ptabs/l r41, tr2
2840 blink tr2, r63
2841 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2842 stx.q r17, r63, r3
2843 addi.l r17, 8, r17
2844 stx.q r17, r63, r4
2845 addi.l r17, 8, r17
2846 stx.q r17, r63, r5
2847 addi.l r17, 8, r17
2848 stx.q r17, r63, r6
2849 addi.l r17, 8, r17
2850 stx.q r17, r63, r7
2851 addi.l r17, 8, r17
2852 stx.q r17, r63, r8
2853 addi.l r17, 8, r17
2854 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2855 stx.q r17, r63, r9
2856 LOCAL(ia_return): /* Return. */
2857 blink tr0, r63
2858 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2859 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2860 #endif /* L_shcompact_incoming_args */
2861 #endif
2862 #if __SH5__
2863 #ifdef L_nested_trampoline
2864 #if __SH5__ == 32
2865 .section .text..SHmedia32,"ax"
2866 #else
2867 .text
2868 #endif
2869 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2870 .global GLOBAL(GCC_nested_trampoline)
2871 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2872 GLOBAL(GCC_nested_trampoline):
2873 .mode SHmedia
2874 ptrel/u r63, tr0
2875 gettr tr0, r0
2876 #if __SH5__ == 64
2877 ld.q r0, 24, r1
2878 #else
2879 ld.l r0, 24, r1
2880 #endif
2881 ptabs/l r1, tr1
2882 #if __SH5__ == 64
2883 ld.q r0, 32, r1
2884 #else
2885 ld.l r0, 28, r1
2886 #endif
2887 blink tr1, r63
2889 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2890 #endif /* L_nested_trampoline */
2891 #endif /* __SH5__ */
2892 #if __SH5__ == 32
2893 #ifdef L_push_pop_shmedia_regs
2894 .section .text..SHmedia32,"ax"
2895 .mode SHmedia
2896 .align 2
2897 #ifndef __SH4_NOFPU__
2898 .global GLOBAL(GCC_push_shmedia_regs)
2899 FUNC(GLOBAL(GCC_push_shmedia_regs))
2900 GLOBAL(GCC_push_shmedia_regs):
2901 addi.l r15, -14*8, r15
2902 fst.d r15, 13*8, dr62
2903 fst.d r15, 12*8, dr60
2904 fst.d r15, 11*8, dr58
2905 fst.d r15, 10*8, dr56
2906 fst.d r15, 9*8, dr54
2907 fst.d r15, 8*8, dr52
2908 fst.d r15, 7*8, dr50
2909 fst.d r15, 6*8, dr48
2910 fst.d r15, 5*8, dr46
2911 fst.d r15, 4*8, dr44
2912 fst.d r15, 3*8, dr42
2913 fst.d r15, 2*8, dr40
2914 fst.d r15, 1*8, dr38
2915 fst.d r15, 0*8, dr36
2916 #else /* ! __SH4_NOFPU__ */
2917 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2918 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2919 GLOBAL(GCC_push_shmedia_regs_nofpu):
2920 #endif /* ! __SH4_NOFPU__ */
2921 ptabs/l r18, tr0
2922 addi.l r15, -27*8, r15
2923 gettr tr7, r62
2924 gettr tr6, r61
2925 gettr tr5, r60
2926 st.q r15, 26*8, r62
2927 st.q r15, 25*8, r61
2928 st.q r15, 24*8, r60
2929 st.q r15, 23*8, r59
2930 st.q r15, 22*8, r58
2931 st.q r15, 21*8, r57
2932 st.q r15, 20*8, r56
2933 st.q r15, 19*8, r55
2934 st.q r15, 18*8, r54
2935 st.q r15, 17*8, r53
2936 st.q r15, 16*8, r52
2937 st.q r15, 15*8, r51
2938 st.q r15, 14*8, r50
2939 st.q r15, 13*8, r49
2940 st.q r15, 12*8, r48
2941 st.q r15, 11*8, r47
2942 st.q r15, 10*8, r46
2943 st.q r15, 9*8, r45
2944 st.q r15, 8*8, r44
2945 st.q r15, 7*8, r35
2946 st.q r15, 6*8, r34
2947 st.q r15, 5*8, r33
2948 st.q r15, 4*8, r32
2949 st.q r15, 3*8, r31
2950 st.q r15, 2*8, r30
2951 st.q r15, 1*8, r29
2952 st.q r15, 0*8, r28
2953 blink tr0, r63
2954 #ifndef __SH4_NOFPU__
2955 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2956 #else
2957 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2958 #endif
2959 #ifndef __SH4_NOFPU__
2960 .global GLOBAL(GCC_pop_shmedia_regs)
2961 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2962 GLOBAL(GCC_pop_shmedia_regs):
2963 pt .L0, tr1
2964 movi 41*8, r0
2965 fld.d r15, 40*8, dr62
2966 fld.d r15, 39*8, dr60
2967 fld.d r15, 38*8, dr58
2968 fld.d r15, 37*8, dr56
2969 fld.d r15, 36*8, dr54
2970 fld.d r15, 35*8, dr52
2971 fld.d r15, 34*8, dr50
2972 fld.d r15, 33*8, dr48
2973 fld.d r15, 32*8, dr46
2974 fld.d r15, 31*8, dr44
2975 fld.d r15, 30*8, dr42
2976 fld.d r15, 29*8, dr40
2977 fld.d r15, 28*8, dr38
2978 fld.d r15, 27*8, dr36
2979 blink tr1, r63
2980 #else /* ! __SH4_NOFPU__ */
2981 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2982 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2983 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2984 #endif /* ! __SH4_NOFPU__ */
2985 movi 27*8, r0
2986 .L0:
2987 ptabs r18, tr0
2988 ld.q r15, 26*8, r62
2989 ld.q r15, 25*8, r61
2990 ld.q r15, 24*8, r60
2991 ptabs r62, tr7
2992 ptabs r61, tr6
2993 ptabs r60, tr5
2994 ld.q r15, 23*8, r59
2995 ld.q r15, 22*8, r58
2996 ld.q r15, 21*8, r57
2997 ld.q r15, 20*8, r56
2998 ld.q r15, 19*8, r55
2999 ld.q r15, 18*8, r54
3000 ld.q r15, 17*8, r53
3001 ld.q r15, 16*8, r52
3002 ld.q r15, 15*8, r51
3003 ld.q r15, 14*8, r50
3004 ld.q r15, 13*8, r49
3005 ld.q r15, 12*8, r48
3006 ld.q r15, 11*8, r47
3007 ld.q r15, 10*8, r46
3008 ld.q r15, 9*8, r45
3009 ld.q r15, 8*8, r44
3010 ld.q r15, 7*8, r35
3011 ld.q r15, 6*8, r34
3012 ld.q r15, 5*8, r33
3013 ld.q r15, 4*8, r32
3014 ld.q r15, 3*8, r31
3015 ld.q r15, 2*8, r30
3016 ld.q r15, 1*8, r29
3017 ld.q r15, 0*8, r28
3018 add.l r15, r0, r15
3019 blink tr0, r63
3021 #ifndef __SH4_NOFPU__
3022 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3023 #else
3024 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3025 #endif
3026 #endif /* __SH5__ == 32 */
3027 #endif /* L_push_pop_shmedia_regs */
3029 #ifdef L_div_table
3030 #if __SH5__
3031 #if defined(__pic__) && defined(__SHMEDIA__)
3032 .global GLOBAL(sdivsi3)
3033 FUNC(GLOBAL(sdivsi3))
3034 #if __SH5__ == 32
3035 .section .text..SHmedia32,"ax"
3036 #else
3037 .text
3038 #endif
3039 #if 0
3040 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3041 in a text section does not work (at least for shared libraries):
3042 the linker sets the LSB of the address as if this was SHmedia code. */
3043 #define TEXT_DATA_BUG
3044 #endif
3045 .align 2
3046 // inputs: r4,r5
3047 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3048 // result in r0
3049 .global GLOBAL(sdivsi3)
3050 GLOBAL(sdivsi3):
3051 #ifdef TEXT_DATA_BUG
3052 ptb datalabel Local_div_table,tr0
3053 #else
3054 ptb GLOBAL(div_table_internal),tr0
3055 #endif
3056 nsb r5, r1
3057 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3058 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3059 /* bubble */
3060 gettr tr0,r20
3061 ldx.ub r20, r21, r19 // u0.8
3062 shari r25, 32, r25 // normalize to s2.30
3063 shlli r21, 1, r21
3064 muls.l r25, r19, r19 // s2.38
3065 ldx.w r20, r21, r21 // s2.14
3066 ptabs r18, tr0
3067 shari r19, 24, r19 // truncate to s2.14
3068 sub r21, r19, r19 // some 11 bit inverse in s1.14
3069 muls.l r19, r19, r21 // u0.28
3070 sub r63, r1, r1
3071 addi r1, 92, r1
3072 muls.l r25, r21, r18 // s2.58
3073 shlli r19, 45, r19 // multiply by two and convert to s2.58
3074 /* bubble */
3075 sub r19, r18, r18
3076 shari r18, 28, r18 // some 22 bit inverse in s1.30
3077 muls.l r18, r25, r0 // s2.60
3078 muls.l r18, r4, r25 // s32.30
3079 /* bubble */
3080 shari r0, 16, r19 // s-16.44
3081 muls.l r19, r18, r19 // s-16.74
3082 shari r25, 63, r0
3083 shari r4, 14, r18 // s19.-14
3084 shari r19, 30, r19 // s-16.44
3085 muls.l r19, r18, r19 // s15.30
3086 xor r21, r0, r21 // You could also use the constant 1 << 27.
3087 add r21, r25, r21
3088 sub r21, r19, r21
3089 shard r21, r1, r21
3090 sub r21, r0, r0
3091 blink tr0, r63
3092 ENDFUNC(GLOBAL(sdivsi3))
3093 /* This table has been generated by divtab.c .
3094 Defects for bias -330:
3095 Max defect: 6.081536e-07 at -1.000000e+00
3096 Min defect: 2.849516e-08 at 1.030651e+00
3097 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3098 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3099 Defect at 1: 1.238659e-07
3100 Defect at -2: 1.061708e-07 */
3101 #else /* ! __pic__ || ! __SHMEDIA__ */
3102 .section .rodata
3103 #endif /* __pic__ */
3104 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3105 .balign 2
3106 .type Local_div_table,@object
3107 .size Local_div_table,128
3108 /* negative division constants */
3109 .word -16638
3110 .word -17135
3111 .word -17737
3112 .word -18433
3113 .word -19103
3114 .word -19751
3115 .word -20583
3116 .word -21383
3117 .word -22343
3118 .word -23353
3119 .word -24407
3120 .word -25582
3121 .word -26863
3122 .word -28382
3123 .word -29965
3124 .word -31800
3125 /* negative division factors */
3126 .byte 66
3127 .byte 70
3128 .byte 75
3129 .byte 81
3130 .byte 87
3131 .byte 93
3132 .byte 101
3133 .byte 109
3134 .byte 119
3135 .byte 130
3136 .byte 142
3137 .byte 156
3138 .byte 172
3139 .byte 192
3140 .byte 214
3141 .byte 241
3142 .skip 16
3143 Local_div_table:
3144 .skip 16
3145 /* positive division factors */
3146 .byte 241
3147 .byte 214
3148 .byte 192
3149 .byte 172
3150 .byte 156
3151 .byte 142
3152 .byte 130
3153 .byte 119
3154 .byte 109
3155 .byte 101
3156 .byte 93
3157 .byte 87
3158 .byte 81
3159 .byte 75
3160 .byte 70
3161 .byte 66
3162 /* positive division constants */
3163 .word 31801
3164 .word 29966
3165 .word 28383
3166 .word 26864
3167 .word 25583
3168 .word 24408
3169 .word 23354
3170 .word 22344
3171 .word 21384
3172 .word 20584
3173 .word 19752
3174 .word 19104
3175 .word 18434
3176 .word 17738
3177 .word 17136
3178 .word 16639
3179 .section .rodata
3180 #endif /* TEXT_DATA_BUG */
3181 .balign 2
3182 .type GLOBAL(div_table),@object
3183 .size GLOBAL(div_table),128
3184 /* negative division constants */
3185 .word -16638
3186 .word -17135
3187 .word -17737
3188 .word -18433
3189 .word -19103
3190 .word -19751
3191 .word -20583
3192 .word -21383
3193 .word -22343
3194 .word -23353
3195 .word -24407
3196 .word -25582
3197 .word -26863
3198 .word -28382
3199 .word -29965
3200 .word -31800
3201 /* negative division factors */
3202 .byte 66
3203 .byte 70
3204 .byte 75
3205 .byte 81
3206 .byte 87
3207 .byte 93
3208 .byte 101
3209 .byte 109
3210 .byte 119
3211 .byte 130
3212 .byte 142
3213 .byte 156
3214 .byte 172
3215 .byte 192
3216 .byte 214
3217 .byte 241
3218 .skip 16
3219 .global GLOBAL(div_table)
3220 GLOBAL(div_table):
3221 HIDDEN_ALIAS(div_table_internal,div_table)
3222 .skip 16
3223 /* positive division factors */
3224 .byte 241
3225 .byte 214
3226 .byte 192
3227 .byte 172
3228 .byte 156
3229 .byte 142
3230 .byte 130
3231 .byte 119
3232 .byte 109
3233 .byte 101
3234 .byte 93
3235 .byte 87
3236 .byte 81
3237 .byte 75
3238 .byte 70
3239 .byte 66
3240 /* positive division constants */
3241 .word 31801
3242 .word 29966
3243 .word 28383
3244 .word 26864
3245 .word 25583
3246 .word 24408
3247 .word 23354
3248 .word 22344
3249 .word 21384
3250 .word 20584
3251 .word 19752
3252 .word 19104
3253 .word 18434
3254 .word 17738
3255 .word 17136
3256 .word 16639
3258 #elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3259 /* This code used shld, thus is not suitable for SH1 / SH2. */
3261 /* Signed / unsigned division without use of FPU, optimized for SH4.
3262 Uses a lookup table for divisors in the range -128 .. +128, and
3263 div1 with case distinction for larger divisors in three more ranges.
3264 The code is lumped together with the table to allow the use of mova. */
3265 #ifdef __LITTLE_ENDIAN__
3266 #define L_LSB 0
3267 #define L_LSWMSB 1
3268 #define L_MSWLSB 2
3269 #else
3270 #define L_LSB 3
3271 #define L_LSWMSB 2
3272 #define L_MSWLSB 1
3273 #endif
3275 .balign 4
3276 .global GLOBAL(udivsi3_i4i)
3277 FUNC(GLOBAL(udivsi3_i4i))
3278 GLOBAL(udivsi3_i4i):
3279 mov.w LOCAL(c128_w), r1
3280 div0u
3281 mov r4,r0
3282 shlr8 r0
3283 cmp/hi r1,r5
3284 extu.w r5,r1
3285 bf LOCAL(udiv_le128)
3286 cmp/eq r5,r1
3287 bf LOCAL(udiv_ge64k)
3288 shlr r0
3289 mov r5,r1
3290 shll16 r5
3291 mov.l r4,@-r15
3292 div1 r5,r0
3293 mov.l r1,@-r15
3294 div1 r5,r0
3295 div1 r5,r0
3296 bra LOCAL(udiv_25)
3297 div1 r5,r0
3299 LOCAL(div_le128):
3300 mova LOCAL(div_table_ix),r0
3301 bra LOCAL(div_le128_2)
3302 mov.b @(r0,r5),r1
3303 LOCAL(udiv_le128):
3304 mov.l r4,@-r15
3305 mova LOCAL(div_table_ix),r0
3306 mov.b @(r0,r5),r1
3307 mov.l r5,@-r15
3308 LOCAL(div_le128_2):
3309 mova LOCAL(div_table_inv),r0
3310 mov.l @(r0,r1),r1
3311 mov r5,r0
3312 tst #0xfe,r0
3313 mova LOCAL(div_table_clz),r0
3314 dmulu.l r1,r4
3315 mov.b @(r0,r5),r1
3316 bt/s LOCAL(div_by_1)
3317 mov r4,r0
3318 mov.l @r15+,r5
3319 sts mach,r0
3320 /* clrt */
3321 addc r4,r0
3322 mov.l @r15+,r4
3323 rotcr r0
3325 shld r1,r0
3327 LOCAL(div_by_1_neg):
3328 neg r4,r0
3329 LOCAL(div_by_1):
3330 mov.l @r15+,r5
3332 mov.l @r15+,r4
3334 LOCAL(div_ge64k):
3335 bt/s LOCAL(div_r8)
3336 div0u
3337 shll8 r5
3338 bra LOCAL(div_ge64k_2)
3339 div1 r5,r0
3340 LOCAL(udiv_ge64k):
3341 cmp/hi r0,r5
3342 mov r5,r1
3343 bt LOCAL(udiv_r8)
3344 shll8 r5
3345 mov.l r4,@-r15
3346 div1 r5,r0
3347 mov.l r1,@-r15
3348 LOCAL(div_ge64k_2):
3349 div1 r5,r0
3350 mov.l LOCAL(zero_l),r1
3351 .rept 4
3352 div1 r5,r0
3353 .endr
3354 mov.l r1,@-r15
3355 div1 r5,r0
3356 mov.w LOCAL(m256_w),r1
3357 div1 r5,r0
3358 mov.b r0,@(L_LSWMSB,r15)
3359 xor r4,r0
3360 and r1,r0
3361 bra LOCAL(div_ge64k_end)
3362 xor r4,r0
3364 LOCAL(div_r8):
3365 shll16 r4
3366 bra LOCAL(div_r8_2)
3367 shll8 r4
3368 LOCAL(udiv_r8):
3369 mov.l r4,@-r15
3370 shll16 r4
3371 clrt
3372 shll8 r4
3373 mov.l r5,@-r15
3374 LOCAL(div_r8_2):
3375 rotcl r4
3376 mov r0,r1
3377 div1 r5,r1
3378 mov r4,r0
3379 rotcl r0
3380 mov r5,r4
3381 div1 r5,r1
3382 .rept 5
3383 rotcl r0; div1 r5,r1
3384 .endr
3385 rotcl r0
3386 mov.l @r15+,r5
3387 div1 r4,r1
3388 mov.l @r15+,r4
3390 rotcl r0
3392 ENDFUNC(GLOBAL(udivsi3_i4i))
3394 .global GLOBAL(sdivsi3_i4i)
3395 FUNC(GLOBAL(sdivsi3_i4i))
3396 /* This is link-compatible with a GLOBAL(sdivsi3) call,
3397 but we effectively clobber only r1. */
3398 GLOBAL(sdivsi3_i4i):
3399 mov.l r4,@-r15
3400 cmp/pz r5
3401 mov.w LOCAL(c128_w), r1
3402 bt/s LOCAL(pos_divisor)
3403 cmp/pz r4
3404 mov.l r5,@-r15
3405 neg r5,r5
3406 bt/s LOCAL(neg_result)
3407 cmp/hi r1,r5
3408 neg r4,r4
3409 LOCAL(pos_result):
3410 extu.w r5,r0
3411 bf LOCAL(div_le128)
3412 cmp/eq r5,r0
3413 mov r4,r0
3414 shlr8 r0
3415 bf/s LOCAL(div_ge64k)
3416 cmp/hi r0,r5
3417 div0u
3418 shll16 r5
3419 div1 r5,r0
3420 div1 r5,r0
3421 div1 r5,r0
3422 LOCAL(udiv_25):
3423 mov.l LOCAL(zero_l),r1
3424 div1 r5,r0
3425 div1 r5,r0
3426 mov.l r1,@-r15
3427 .rept 3
3428 div1 r5,r0
3429 .endr
3430 mov.b r0,@(L_MSWLSB,r15)
3431 xtrct r4,r0
3432 swap.w r0,r0
3433 .rept 8
3434 div1 r5,r0
3435 .endr
3436 mov.b r0,@(L_LSWMSB,r15)
3437 LOCAL(div_ge64k_end):
3438 .rept 8
3439 div1 r5,r0
3440 .endr
3441 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3442 extu.b r0,r0
3443 mov.l @r15+,r5
3444 or r4,r0
3445 mov.l @r15+,r4
3447 rotcl r0
3449 LOCAL(div_le128_neg):
3450 tst #0xfe,r0
3451 mova LOCAL(div_table_ix),r0
3452 mov.b @(r0,r5),r1
3453 mova LOCAL(div_table_inv),r0
3454 bt/s LOCAL(div_by_1_neg)
3455 mov.l @(r0,r1),r1
3456 mova LOCAL(div_table_clz),r0
3457 dmulu.l r1,r4
3458 mov.b @(r0,r5),r1
3459 mov.l @r15+,r5
3460 sts mach,r0
3461 /* clrt */
3462 addc r4,r0
3463 mov.l @r15+,r4
3464 rotcr r0
3465 shld r1,r0
3467 neg r0,r0
3469 LOCAL(pos_divisor):
3470 mov.l r5,@-r15
3471 bt/s LOCAL(pos_result)
3472 cmp/hi r1,r5
3473 neg r4,r4
3474 LOCAL(neg_result):
3475 extu.w r5,r0
3476 bf LOCAL(div_le128_neg)
3477 cmp/eq r5,r0
3478 mov r4,r0
3479 shlr8 r0
3480 bf/s LOCAL(div_ge64k_neg)
3481 cmp/hi r0,r5
3482 div0u
3483 mov.l LOCAL(zero_l),r1
3484 shll16 r5
3485 div1 r5,r0
3486 mov.l r1,@-r15
3487 .rept 7
3488 div1 r5,r0
3489 .endr
3490 mov.b r0,@(L_MSWLSB,r15)
3491 xtrct r4,r0
3492 swap.w r0,r0
3493 .rept 8
3494 div1 r5,r0
3495 .endr
3496 mov.b r0,@(L_LSWMSB,r15)
3497 LOCAL(div_ge64k_neg_end):
3498 .rept 8
3499 div1 r5,r0
3500 .endr
3501 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3502 extu.b r0,r1
3503 mov.l @r15+,r5
3504 or r4,r1
3505 LOCAL(div_r8_neg_end):
3506 mov.l @r15+,r4
3507 rotcl r1
3509 neg r1,r0
3511 LOCAL(div_ge64k_neg):
3512 bt/s LOCAL(div_r8_neg)
3513 div0u
3514 shll8 r5
3515 mov.l LOCAL(zero_l),r1
3516 .rept 6
3517 div1 r5,r0
3518 .endr
3519 mov.l r1,@-r15
3520 div1 r5,r0
3521 mov.w LOCAL(m256_w),r1
3522 div1 r5,r0
3523 mov.b r0,@(L_LSWMSB,r15)
3524 xor r4,r0
3525 and r1,r0
3526 bra LOCAL(div_ge64k_neg_end)
3527 xor r4,r0
3529 LOCAL(c128_w):
3530 .word 128
3532 LOCAL(div_r8_neg):
3533 clrt
3534 shll16 r4
3535 mov r4,r1
3536 shll8 r1
3537 mov r5,r4
3538 .rept 7
3539 rotcl r1; div1 r5,r0
3540 .endr
3541 mov.l @r15+,r5
3542 rotcl r1
3543 bra LOCAL(div_r8_neg_end)
3544 div1 r4,r0
3546 LOCAL(m256_w):
3547 .word 0xff00
3548 /* This table has been generated by divtab-sh4.c. */
3549 .balign 4
3550 LOCAL(div_table_clz):
3551 .byte 0
3552 .byte 1
3553 .byte 0
3554 .byte -1
3555 .byte -1
3556 .byte -2
3557 .byte -2
3558 .byte -2
3559 .byte -2
3560 .byte -3
3561 .byte -3
3562 .byte -3
3563 .byte -3
3564 .byte -3
3565 .byte -3
3566 .byte -3
3567 .byte -3
3568 .byte -4
3569 .byte -4
3570 .byte -4
3571 .byte -4
3572 .byte -4
3573 .byte -4
3574 .byte -4
3575 .byte -4
3576 .byte -4
3577 .byte -4
3578 .byte -4
3579 .byte -4
3580 .byte -4
3581 .byte -4
3582 .byte -4
3583 .byte -4
3584 .byte -5
3585 .byte -5
3586 .byte -5
3587 .byte -5
3588 .byte -5
3589 .byte -5
3590 .byte -5
3591 .byte -5
3592 .byte -5
3593 .byte -5
3594 .byte -5
3595 .byte -5
3596 .byte -5
3597 .byte -5
3598 .byte -5
3599 .byte -5
3600 .byte -5
3601 .byte -5
3602 .byte -5
3603 .byte -5
3604 .byte -5
3605 .byte -5
3606 .byte -5
3607 .byte -5
3608 .byte -5
3609 .byte -5
3610 .byte -5
3611 .byte -5
3612 .byte -5
3613 .byte -5
3614 .byte -5
3615 .byte -5
3616 .byte -6
3617 .byte -6
3618 .byte -6
3619 .byte -6
3620 .byte -6
3621 .byte -6
3622 .byte -6
3623 .byte -6
3624 .byte -6
3625 .byte -6
3626 .byte -6
3627 .byte -6
3628 .byte -6
3629 .byte -6
3630 .byte -6
3631 .byte -6
3632 .byte -6
3633 .byte -6
3634 .byte -6
3635 .byte -6
3636 .byte -6
3637 .byte -6
3638 .byte -6
3639 .byte -6
3640 .byte -6
3641 .byte -6
3642 .byte -6
3643 .byte -6
3644 .byte -6
3645 .byte -6
3646 .byte -6
3647 .byte -6
3648 .byte -6
3649 .byte -6
3650 .byte -6
3651 .byte -6
3652 .byte -6
3653 .byte -6
3654 .byte -6
3655 .byte -6
3656 .byte -6
3657 .byte -6
3658 .byte -6
3659 .byte -6
3660 .byte -6
3661 .byte -6
3662 .byte -6
3663 .byte -6
3664 .byte -6
3665 .byte -6
3666 .byte -6
3667 .byte -6
3668 .byte -6
3669 .byte -6
3670 .byte -6
3671 .byte -6
3672 .byte -6
3673 .byte -6
3674 .byte -6
3675 .byte -6
3676 .byte -6
3677 .byte -6
3678 .byte -6
3679 /* Lookup table translating positive divisor to index into table of
3680 normalized inverse. N.B. the '0' entry is also the last entry of the
3681 previous table, and causes an unaligned access for division by zero. */
3682 LOCAL(div_table_ix):
3683 .byte -6
3684 .byte -128
3685 .byte -128
3686 .byte 0
3687 .byte -128
3688 .byte -64
3689 .byte 0
3690 .byte 64
3691 .byte -128
3692 .byte -96
3693 .byte -64
3694 .byte -32
3695 .byte 0
3696 .byte 32
3697 .byte 64
3698 .byte 96
3699 .byte -128
3700 .byte -112
3701 .byte -96
3702 .byte -80
3703 .byte -64
3704 .byte -48
3705 .byte -32
3706 .byte -16
3707 .byte 0
3708 .byte 16
3709 .byte 32
3710 .byte 48
3711 .byte 64
3712 .byte 80
3713 .byte 96
3714 .byte 112
3715 .byte -128
3716 .byte -120
3717 .byte -112
3718 .byte -104
3719 .byte -96
3720 .byte -88
3721 .byte -80
3722 .byte -72
3723 .byte -64
3724 .byte -56
3725 .byte -48
3726 .byte -40
3727 .byte -32
3728 .byte -24
3729 .byte -16
3730 .byte -8
3731 .byte 0
3732 .byte 8
3733 .byte 16
3734 .byte 24
3735 .byte 32
3736 .byte 40
3737 .byte 48
3738 .byte 56
3739 .byte 64
3740 .byte 72
3741 .byte 80
3742 .byte 88
3743 .byte 96
3744 .byte 104
3745 .byte 112
3746 .byte 120
3747 .byte -128
3748 .byte -124
3749 .byte -120
3750 .byte -116
3751 .byte -112
3752 .byte -108
3753 .byte -104
3754 .byte -100
3755 .byte -96
3756 .byte -92
3757 .byte -88
3758 .byte -84
3759 .byte -80
3760 .byte -76
3761 .byte -72
3762 .byte -68
3763 .byte -64
3764 .byte -60
3765 .byte -56
3766 .byte -52
3767 .byte -48
3768 .byte -44
3769 .byte -40
3770 .byte -36
3771 .byte -32
3772 .byte -28
3773 .byte -24
3774 .byte -20
3775 .byte -16
3776 .byte -12
3777 .byte -8
3778 .byte -4
3779 .byte 0
3780 .byte 4
3781 .byte 8
3782 .byte 12
3783 .byte 16
3784 .byte 20
3785 .byte 24
3786 .byte 28
3787 .byte 32
3788 .byte 36
3789 .byte 40
3790 .byte 44
3791 .byte 48
3792 .byte 52
3793 .byte 56
3794 .byte 60
3795 .byte 64
3796 .byte 68
3797 .byte 72
3798 .byte 76
3799 .byte 80
3800 .byte 84
3801 .byte 88
3802 .byte 92
3803 .byte 96
3804 .byte 100
3805 .byte 104
3806 .byte 108
3807 .byte 112
3808 .byte 116
3809 .byte 120
3810 .byte 124
3811 .byte -128
3812 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3813 .balign 4
3814 LOCAL(zero_l):
3815 .long 0x0
3816 .long 0xF81F81F9
3817 .long 0xF07C1F08
3818 .long 0xE9131AC0
3819 .long 0xE1E1E1E2
3820 .long 0xDAE6076C
3821 .long 0xD41D41D5
3822 .long 0xCD856891
3823 .long 0xC71C71C8
3824 .long 0xC0E07039
3825 .long 0xBACF914D
3826 .long 0xB4E81B4F
3827 .long 0xAF286BCB
3828 .long 0xA98EF607
3829 .long 0xA41A41A5
3830 .long 0x9EC8E952
3831 .long 0x9999999A
3832 .long 0x948B0FCE
3833 .long 0x8F9C18FA
3834 .long 0x8ACB90F7
3835 .long 0x86186187
3836 .long 0x81818182
3837 .long 0x7D05F418
3838 .long 0x78A4C818
3839 .long 0x745D1746
3840 .long 0x702E05C1
3841 .long 0x6C16C16D
3842 .long 0x68168169
3843 .long 0x642C8591
3844 .long 0x60581606
3845 .long 0x5C9882BA
3846 .long 0x58ED2309
3847 LOCAL(div_table_inv):
3848 .long 0x55555556
3849 .long 0x51D07EAF
3850 .long 0x4E5E0A73
3851 .long 0x4AFD6A06
3852 .long 0x47AE147B
3853 .long 0x446F8657
3854 .long 0x41414142
3855 .long 0x3E22CBCF
3856 .long 0x3B13B13C
3857 .long 0x38138139
3858 .long 0x3521CFB3
3859 .long 0x323E34A3
3860 .long 0x2F684BDB
3861 .long 0x2C9FB4D9
3862 .long 0x29E4129F
3863 .long 0x27350B89
3864 .long 0x24924925
3865 .long 0x21FB7813
3866 .long 0x1F7047DD
3867 .long 0x1CF06ADB
3868 .long 0x1A7B9612
3869 .long 0x18118119
3870 .long 0x15B1E5F8
3871 .long 0x135C8114
3872 .long 0x11111112
3873 .long 0xECF56BF
3874 .long 0xC9714FC
3875 .long 0xA6810A7
3876 .long 0x8421085
3877 .long 0x624DD30
3878 .long 0x4104105
3879 .long 0x2040811
3880 /* maximum error: 0.987342 scaled: 0.921875*/
3882 ENDFUNC(GLOBAL(sdivsi3_i4i))
3883 #endif /* SH3 / SH4 */
3885 #endif /* L_div_table */
3887 #ifdef L_udiv_qrnnd_16
3888 #if !__SHMEDIA__
3889 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3890 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3891 /* n1 < d, but n1 might be larger than d1. */
3892 .global GLOBAL(udiv_qrnnd_16)
3893 .balign 8
3894 GLOBAL(udiv_qrnnd_16):
3895 div0u
3896 cmp/hi r6,r0
3897 bt .Lots
3898 .rept 16
3899 div1 r6,r0
3900 .endr
3901 extu.w r0,r1
3902 bt 0f
3903 add r6,r0
3904 0: rotcl r1
3905 mulu.w r1,r5
3906 xtrct r4,r0
3907 swap.w r0,r0
3908 sts macl,r2
3909 cmp/hs r2,r0
3910 sub r2,r0
3911 bt 0f
3912 addc r5,r0
3913 add #-1,r1
3914 bt 0f
3915 1: add #-1,r1
3917 add r5,r0
3918 .balign 8
3919 .Lots:
3920 sub r5,r0
3921 swap.w r4,r1
3922 xtrct r0,r1
3923 clrt
3924 mov r1,r0
3925 addc r5,r0
3926 mov #-1,r1
3927 SL1(bf, 1b,
3928 shlr16 r1)
3929 0: rts
3931 ENDFUNC(GLOBAL(udiv_qrnnd_16))
3932 #endif /* !__SHMEDIA__ */
3933 #endif /* L_udiv_qrnnd_16 */