Makefile.in: Rebuilt.
[official-gcc.git] / gcc / config / sh / lib1funcs.asm
blob466b89046f38655f82639ee216a5944923d00081
1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2 2004, 2005, 2006
3 Free Software Foundation, Inc.
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any
8 later version.
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
19 This file is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; see the file COPYING. If not, write to
26 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
27 Boston, MA 02110-1301, USA. */
29 !! libgcc routines for the Renesas / SuperH SH CPUs.
30 !! Contributed by Steve Chamberlain.
31 !! sac@cygnus.com
33 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
34 !! recoded in assembly by Toshiyasu Morita
35 !! tm@netcom.com
37 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
38 ELF local label prefixes by J"orn Rennecke
39 amylaar@cygnus.com */
41 #include "lib1funcs.h"
43 #if ! __SH5__
44 #ifdef L_ashiftrt
45 .global GLOBAL(ashiftrt_r4_0)
46 .global GLOBAL(ashiftrt_r4_1)
47 .global GLOBAL(ashiftrt_r4_2)
48 .global GLOBAL(ashiftrt_r4_3)
49 .global GLOBAL(ashiftrt_r4_4)
50 .global GLOBAL(ashiftrt_r4_5)
51 .global GLOBAL(ashiftrt_r4_6)
52 .global GLOBAL(ashiftrt_r4_7)
53 .global GLOBAL(ashiftrt_r4_8)
54 .global GLOBAL(ashiftrt_r4_9)
55 .global GLOBAL(ashiftrt_r4_10)
56 .global GLOBAL(ashiftrt_r4_11)
57 .global GLOBAL(ashiftrt_r4_12)
58 .global GLOBAL(ashiftrt_r4_13)
59 .global GLOBAL(ashiftrt_r4_14)
60 .global GLOBAL(ashiftrt_r4_15)
61 .global GLOBAL(ashiftrt_r4_16)
62 .global GLOBAL(ashiftrt_r4_17)
63 .global GLOBAL(ashiftrt_r4_18)
64 .global GLOBAL(ashiftrt_r4_19)
65 .global GLOBAL(ashiftrt_r4_20)
66 .global GLOBAL(ashiftrt_r4_21)
67 .global GLOBAL(ashiftrt_r4_22)
68 .global GLOBAL(ashiftrt_r4_23)
69 .global GLOBAL(ashiftrt_r4_24)
70 .global GLOBAL(ashiftrt_r4_25)
71 .global GLOBAL(ashiftrt_r4_26)
72 .global GLOBAL(ashiftrt_r4_27)
73 .global GLOBAL(ashiftrt_r4_28)
74 .global GLOBAL(ashiftrt_r4_29)
75 .global GLOBAL(ashiftrt_r4_30)
76 .global GLOBAL(ashiftrt_r4_31)
77 .global GLOBAL(ashiftrt_r4_32)
79 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
80 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
81 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
82 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
83 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
84 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
113 .align 1
114 GLOBAL(ashiftrt_r4_32):
115 GLOBAL(ashiftrt_r4_31):
116 rotcl r4
118 subc r4,r4
120 GLOBAL(ashiftrt_r4_30):
121 shar r4
122 GLOBAL(ashiftrt_r4_29):
123 shar r4
124 GLOBAL(ashiftrt_r4_28):
125 shar r4
126 GLOBAL(ashiftrt_r4_27):
127 shar r4
128 GLOBAL(ashiftrt_r4_26):
129 shar r4
130 GLOBAL(ashiftrt_r4_25):
131 shar r4
132 GLOBAL(ashiftrt_r4_24):
133 shlr16 r4
134 shlr8 r4
136 exts.b r4,r4
138 GLOBAL(ashiftrt_r4_23):
139 shar r4
140 GLOBAL(ashiftrt_r4_22):
141 shar r4
142 GLOBAL(ashiftrt_r4_21):
143 shar r4
144 GLOBAL(ashiftrt_r4_20):
145 shar r4
146 GLOBAL(ashiftrt_r4_19):
147 shar r4
148 GLOBAL(ashiftrt_r4_18):
149 shar r4
150 GLOBAL(ashiftrt_r4_17):
151 shar r4
152 GLOBAL(ashiftrt_r4_16):
153 shlr16 r4
155 exts.w r4,r4
157 GLOBAL(ashiftrt_r4_15):
158 shar r4
159 GLOBAL(ashiftrt_r4_14):
160 shar r4
161 GLOBAL(ashiftrt_r4_13):
162 shar r4
163 GLOBAL(ashiftrt_r4_12):
164 shar r4
165 GLOBAL(ashiftrt_r4_11):
166 shar r4
167 GLOBAL(ashiftrt_r4_10):
168 shar r4
169 GLOBAL(ashiftrt_r4_9):
170 shar r4
171 GLOBAL(ashiftrt_r4_8):
172 shar r4
173 GLOBAL(ashiftrt_r4_7):
174 shar r4
175 GLOBAL(ashiftrt_r4_6):
176 shar r4
177 GLOBAL(ashiftrt_r4_5):
178 shar r4
179 GLOBAL(ashiftrt_r4_4):
180 shar r4
181 GLOBAL(ashiftrt_r4_3):
182 shar r4
183 GLOBAL(ashiftrt_r4_2):
184 shar r4
185 GLOBAL(ashiftrt_r4_1):
187 shar r4
189 GLOBAL(ashiftrt_r4_0):
193 ENDFUNC(GLOBAL(ashiftrt_r4_0))
194 ENDFUNC(GLOBAL(ashiftrt_r4_1))
195 ENDFUNC(GLOBAL(ashiftrt_r4_2))
196 ENDFUNC(GLOBAL(ashiftrt_r4_3))
197 ENDFUNC(GLOBAL(ashiftrt_r4_4))
198 ENDFUNC(GLOBAL(ashiftrt_r4_5))
199 ENDFUNC(GLOBAL(ashiftrt_r4_6))
200 ENDFUNC(GLOBAL(ashiftrt_r4_7))
201 ENDFUNC(GLOBAL(ashiftrt_r4_8))
202 ENDFUNC(GLOBAL(ashiftrt_r4_9))
203 ENDFUNC(GLOBAL(ashiftrt_r4_10))
204 ENDFUNC(GLOBAL(ashiftrt_r4_11))
205 ENDFUNC(GLOBAL(ashiftrt_r4_12))
206 ENDFUNC(GLOBAL(ashiftrt_r4_13))
207 ENDFUNC(GLOBAL(ashiftrt_r4_14))
208 ENDFUNC(GLOBAL(ashiftrt_r4_15))
209 ENDFUNC(GLOBAL(ashiftrt_r4_16))
210 ENDFUNC(GLOBAL(ashiftrt_r4_17))
211 ENDFUNC(GLOBAL(ashiftrt_r4_18))
212 ENDFUNC(GLOBAL(ashiftrt_r4_19))
213 ENDFUNC(GLOBAL(ashiftrt_r4_20))
214 ENDFUNC(GLOBAL(ashiftrt_r4_21))
215 ENDFUNC(GLOBAL(ashiftrt_r4_22))
216 ENDFUNC(GLOBAL(ashiftrt_r4_23))
217 ENDFUNC(GLOBAL(ashiftrt_r4_24))
218 ENDFUNC(GLOBAL(ashiftrt_r4_25))
219 ENDFUNC(GLOBAL(ashiftrt_r4_26))
220 ENDFUNC(GLOBAL(ashiftrt_r4_27))
221 ENDFUNC(GLOBAL(ashiftrt_r4_28))
222 ENDFUNC(GLOBAL(ashiftrt_r4_29))
223 ENDFUNC(GLOBAL(ashiftrt_r4_30))
224 ENDFUNC(GLOBAL(ashiftrt_r4_31))
225 ENDFUNC(GLOBAL(ashiftrt_r4_32))
226 #endif
228 #ifdef L_ashiftrt_n
231 ! GLOBAL(ashrsi3)
233 ! Entry:
235 ! r4: Value to shift
236 ! r5: Shifts
238 ! Exit:
240 ! r0: Result
242 ! Destroys:
244 ! (none)
247 .global GLOBAL(ashrsi3)
248 HIDDEN_FUNC(GLOBAL(ashrsi3))
249 .align 2
250 GLOBAL(ashrsi3):
251 mov #31,r0
252 and r0,r5
253 mova LOCAL(ashrsi3_table),r0
254 mov.b @(r0,r5),r5
255 #ifdef __sh1__
256 add r5,r0
257 jmp @r0
258 #else
259 braf r5
260 #endif
261 mov r4,r0
263 .align 2
264 LOCAL(ashrsi3_table):
265 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
266 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
267 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
268 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
269 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
270 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
271 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
272 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
273 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
298 LOCAL(ashrsi3_31):
299 rotcl r0
301 subc r0,r0
303 LOCAL(ashrsi3_30):
304 shar r0
305 LOCAL(ashrsi3_29):
306 shar r0
307 LOCAL(ashrsi3_28):
308 shar r0
309 LOCAL(ashrsi3_27):
310 shar r0
311 LOCAL(ashrsi3_26):
312 shar r0
313 LOCAL(ashrsi3_25):
314 shar r0
315 LOCAL(ashrsi3_24):
316 shlr16 r0
317 shlr8 r0
319 exts.b r0,r0
321 LOCAL(ashrsi3_23):
322 shar r0
323 LOCAL(ashrsi3_22):
324 shar r0
325 LOCAL(ashrsi3_21):
326 shar r0
327 LOCAL(ashrsi3_20):
328 shar r0
329 LOCAL(ashrsi3_19):
330 shar r0
331 LOCAL(ashrsi3_18):
332 shar r0
333 LOCAL(ashrsi3_17):
334 shar r0
335 LOCAL(ashrsi3_16):
336 shlr16 r0
338 exts.w r0,r0
340 LOCAL(ashrsi3_15):
341 shar r0
342 LOCAL(ashrsi3_14):
343 shar r0
344 LOCAL(ashrsi3_13):
345 shar r0
346 LOCAL(ashrsi3_12):
347 shar r0
348 LOCAL(ashrsi3_11):
349 shar r0
350 LOCAL(ashrsi3_10):
351 shar r0
352 LOCAL(ashrsi3_9):
353 shar r0
354 LOCAL(ashrsi3_8):
355 shar r0
356 LOCAL(ashrsi3_7):
357 shar r0
358 LOCAL(ashrsi3_6):
359 shar r0
360 LOCAL(ashrsi3_5):
361 shar r0
362 LOCAL(ashrsi3_4):
363 shar r0
364 LOCAL(ashrsi3_3):
365 shar r0
366 LOCAL(ashrsi3_2):
367 shar r0
368 LOCAL(ashrsi3_1):
370 shar r0
372 LOCAL(ashrsi3_0):
376 ENDFUNC(GLOBAL(ashrsi3))
377 #endif
379 #ifdef L_ashiftlt
382 ! GLOBAL(ashlsi3)
384 ! Entry:
386 ! r4: Value to shift
387 ! r5: Shifts
389 ! Exit:
391 ! r0: Result
393 ! Destroys:
395 ! (none)
397 .global GLOBAL(ashlsi3)
398 HIDDEN_FUNC(GLOBAL(ashlsi3))
399 .align 2
400 GLOBAL(ashlsi3):
401 mov #31,r0
402 and r0,r5
403 mova LOCAL(ashlsi3_table),r0
404 mov.b @(r0,r5),r5
405 #ifdef __sh1__
406 add r5,r0
407 jmp @r0
408 #else
409 braf r5
410 #endif
411 mov r4,r0
413 .align 2
414 LOCAL(ashlsi3_table):
415 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
416 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
417 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
418 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
419 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
420 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
421 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
422 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
423 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
424 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
425 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
426 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
427 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
428 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
429 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
430 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
431 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
432 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
433 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
434 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
435 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
436 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
437 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
438 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
448 LOCAL(ashlsi3_6):
449 shll2 r0
450 LOCAL(ashlsi3_4):
451 shll2 r0
452 LOCAL(ashlsi3_2):
454 shll2 r0
456 LOCAL(ashlsi3_7):
457 shll2 r0
458 LOCAL(ashlsi3_5):
459 shll2 r0
460 LOCAL(ashlsi3_3):
461 shll2 r0
462 LOCAL(ashlsi3_1):
464 shll r0
466 LOCAL(ashlsi3_14):
467 shll2 r0
468 LOCAL(ashlsi3_12):
469 shll2 r0
470 LOCAL(ashlsi3_10):
471 shll2 r0
472 LOCAL(ashlsi3_8):
474 shll8 r0
476 LOCAL(ashlsi3_15):
477 shll2 r0
478 LOCAL(ashlsi3_13):
479 shll2 r0
480 LOCAL(ashlsi3_11):
481 shll2 r0
482 LOCAL(ashlsi3_9):
483 shll8 r0
485 shll r0
487 LOCAL(ashlsi3_22):
488 shll2 r0
489 LOCAL(ashlsi3_20):
490 shll2 r0
491 LOCAL(ashlsi3_18):
492 shll2 r0
493 LOCAL(ashlsi3_16):
495 shll16 r0
497 LOCAL(ashlsi3_23):
498 shll2 r0
499 LOCAL(ashlsi3_21):
500 shll2 r0
501 LOCAL(ashlsi3_19):
502 shll2 r0
503 LOCAL(ashlsi3_17):
504 shll16 r0
506 shll r0
508 LOCAL(ashlsi3_30):
509 shll2 r0
510 LOCAL(ashlsi3_28):
511 shll2 r0
512 LOCAL(ashlsi3_26):
513 shll2 r0
514 LOCAL(ashlsi3_24):
515 shll16 r0
517 shll8 r0
519 LOCAL(ashlsi3_31):
520 shll2 r0
521 LOCAL(ashlsi3_29):
522 shll2 r0
523 LOCAL(ashlsi3_27):
524 shll2 r0
525 LOCAL(ashlsi3_25):
526 shll16 r0
527 shll8 r0
529 shll r0
531 LOCAL(ashlsi3_0):
535 ENDFUNC(GLOBAL(ashlsi3))
536 #endif
538 #ifdef L_lshiftrt
541 ! GLOBAL(lshrsi3)
543 ! Entry:
545 ! r4: Value to shift
546 ! r5: Shifts
548 ! Exit:
550 ! r0: Result
552 ! Destroys:
554 ! (none)
556 .global GLOBAL(lshrsi3)
557 HIDDEN_FUNC(GLOBAL(lshrsi3))
558 .align 2
559 GLOBAL(lshrsi3):
560 mov #31,r0
561 and r0,r5
562 mova LOCAL(lshrsi3_table),r0
563 mov.b @(r0,r5),r5
564 #ifdef __sh1__
565 add r5,r0
566 jmp @r0
567 #else
568 braf r5
569 #endif
570 mov r4,r0
572 .align 2
573 LOCAL(lshrsi3_table):
574 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
575 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
576 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
577 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
578 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
579 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
580 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
581 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
582 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
583 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
584 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
585 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
586 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
587 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
588 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
589 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
590 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
591 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
592 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
593 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
594 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
595 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
596 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
597 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
607 LOCAL(lshrsi3_6):
608 shlr2 r0
609 LOCAL(lshrsi3_4):
610 shlr2 r0
611 LOCAL(lshrsi3_2):
613 shlr2 r0
615 LOCAL(lshrsi3_7):
616 shlr2 r0
617 LOCAL(lshrsi3_5):
618 shlr2 r0
619 LOCAL(lshrsi3_3):
620 shlr2 r0
621 LOCAL(lshrsi3_1):
623 shlr r0
625 LOCAL(lshrsi3_14):
626 shlr2 r0
627 LOCAL(lshrsi3_12):
628 shlr2 r0
629 LOCAL(lshrsi3_10):
630 shlr2 r0
631 LOCAL(lshrsi3_8):
633 shlr8 r0
635 LOCAL(lshrsi3_15):
636 shlr2 r0
637 LOCAL(lshrsi3_13):
638 shlr2 r0
639 LOCAL(lshrsi3_11):
640 shlr2 r0
641 LOCAL(lshrsi3_9):
642 shlr8 r0
644 shlr r0
646 LOCAL(lshrsi3_22):
647 shlr2 r0
648 LOCAL(lshrsi3_20):
649 shlr2 r0
650 LOCAL(lshrsi3_18):
651 shlr2 r0
652 LOCAL(lshrsi3_16):
654 shlr16 r0
656 LOCAL(lshrsi3_23):
657 shlr2 r0
658 LOCAL(lshrsi3_21):
659 shlr2 r0
660 LOCAL(lshrsi3_19):
661 shlr2 r0
662 LOCAL(lshrsi3_17):
663 shlr16 r0
665 shlr r0
667 LOCAL(lshrsi3_30):
668 shlr2 r0
669 LOCAL(lshrsi3_28):
670 shlr2 r0
671 LOCAL(lshrsi3_26):
672 shlr2 r0
673 LOCAL(lshrsi3_24):
674 shlr16 r0
676 shlr8 r0
678 LOCAL(lshrsi3_31):
679 shlr2 r0
680 LOCAL(lshrsi3_29):
681 shlr2 r0
682 LOCAL(lshrsi3_27):
683 shlr2 r0
684 LOCAL(lshrsi3_25):
685 shlr16 r0
686 shlr8 r0
688 shlr r0
690 LOCAL(lshrsi3_0):
694 ENDFUNC(GLOBAL(lshrsi3))
695 #endif
697 #ifdef L_movmem
698 .text
699 .balign 4
700 .global GLOBAL(movmem)
701 HIDDEN_FUNC(GLOBAL(movmem))
702 HIDDEN_ALIAS(movstr,movmem)
703 /* This would be a lot simpler if r6 contained the byte count
704 minus 64, and we wouldn't be called here for a byte count of 64. */
705 GLOBAL(movmem):
706 sts.l pr,@-r15
707 shll2 r6
708 bsr GLOBAL(movmemSI52+2)
709 mov.l @(48,r5),r0
710 .balign 4
711 LOCAL(movmem_loop): /* Reached with rts */
712 mov.l @(60,r5),r0
713 add #-64,r6
714 mov.l r0,@(60,r4)
715 tst r6,r6
716 mov.l @(56,r5),r0
717 bt LOCAL(movmem_done)
718 mov.l r0,@(56,r4)
719 cmp/pl r6
720 mov.l @(52,r5),r0
721 add #64,r5
722 mov.l r0,@(52,r4)
723 add #64,r4
724 bt GLOBAL(movmemSI52)
725 ! done all the large groups, do the remainder
726 ! jump to movmem+
727 mova GLOBAL(movmemSI4)+4,r0
728 add r6,r0
729 jmp @r0
730 LOCAL(movmem_done): ! share slot insn, works out aligned.
731 lds.l @r15+,pr
732 mov.l r0,@(56,r4)
733 mov.l @(52,r5),r0
735 mov.l r0,@(52,r4)
736 .balign 4
737 ! ??? We need aliases movstr* for movmem* for the older libraries. These
738 ! aliases will be removed at the some point in the future.
739 .global GLOBAL(movmemSI64)
740 HIDDEN_FUNC(GLOBAL(movmemSI64))
741 HIDDEN_ALIAS(movstrSI64,movmemSI64)
742 GLOBAL(movmemSI64):
743 mov.l @(60,r5),r0
744 mov.l r0,@(60,r4)
745 .global GLOBAL(movmemSI60)
746 HIDDEN_FUNC(GLOBAL(movmemSI60))
747 HIDDEN_ALIAS(movstrSI60,movmemSI60)
748 GLOBAL(movmemSI60):
749 mov.l @(56,r5),r0
750 mov.l r0,@(56,r4)
751 .global GLOBAL(movmemSI56)
752 HIDDEN_FUNC(GLOBAL(movmemSI56))
753 HIDDEN_ALIAS(movstrSI56,movmemSI56)
754 GLOBAL(movmemSI56):
755 mov.l @(52,r5),r0
756 mov.l r0,@(52,r4)
757 .global GLOBAL(movmemSI52)
758 HIDDEN_FUNC(GLOBAL(movmemSI52))
759 HIDDEN_ALIAS(movstrSI52,movmemSI52)
760 GLOBAL(movmemSI52):
761 mov.l @(48,r5),r0
762 mov.l r0,@(48,r4)
763 .global GLOBAL(movmemSI48)
764 HIDDEN_FUNC(GLOBAL(movmemSI48))
765 HIDDEN_ALIAS(movstrSI48,movmemSI48)
766 GLOBAL(movmemSI48):
767 mov.l @(44,r5),r0
768 mov.l r0,@(44,r4)
769 .global GLOBAL(movmemSI44)
770 HIDDEN_FUNC(GLOBAL(movmemSI44))
771 HIDDEN_ALIAS(movstrSI44,movmemSI44)
772 GLOBAL(movmemSI44):
773 mov.l @(40,r5),r0
774 mov.l r0,@(40,r4)
775 .global GLOBAL(movmemSI40)
776 HIDDEN_FUNC(GLOBAL(movmemSI40))
777 HIDDEN_ALIAS(movstrSI40,movmemSI40)
778 GLOBAL(movmemSI40):
779 mov.l @(36,r5),r0
780 mov.l r0,@(36,r4)
781 .global GLOBAL(movmemSI36)
782 HIDDEN_FUNC(GLOBAL(movmemSI36))
783 HIDDEN_ALIAS(movstrSI36,movmemSI36)
784 GLOBAL(movmemSI36):
785 mov.l @(32,r5),r0
786 mov.l r0,@(32,r4)
787 .global GLOBAL(movmemSI32)
788 HIDDEN_FUNC(GLOBAL(movmemSI32))
789 HIDDEN_ALIAS(movstrSI32,movmemSI32)
790 GLOBAL(movmemSI32):
791 mov.l @(28,r5),r0
792 mov.l r0,@(28,r4)
793 .global GLOBAL(movmemSI28)
794 HIDDEN_FUNC(GLOBAL(movmemSI28))
795 HIDDEN_ALIAS(movstrSI28,movmemSI28)
796 GLOBAL(movmemSI28):
797 mov.l @(24,r5),r0
798 mov.l r0,@(24,r4)
799 .global GLOBAL(movmemSI24)
800 HIDDEN_FUNC(GLOBAL(movmemSI24))
801 HIDDEN_ALIAS(movstrSI24,movmemSI24)
802 GLOBAL(movmemSI24):
803 mov.l @(20,r5),r0
804 mov.l r0,@(20,r4)
805 .global GLOBAL(movmemSI20)
806 HIDDEN_FUNC(GLOBAL(movmemSI20))
807 HIDDEN_ALIAS(movstrSI20,movmemSI20)
808 GLOBAL(movmemSI20):
809 mov.l @(16,r5),r0
810 mov.l r0,@(16,r4)
811 .global GLOBAL(movmemSI16)
812 HIDDEN_FUNC(GLOBAL(movmemSI16))
813 HIDDEN_ALIAS(movstrSI16,movmemSI16)
814 GLOBAL(movmemSI16):
815 mov.l @(12,r5),r0
816 mov.l r0,@(12,r4)
817 .global GLOBAL(movmemSI12)
818 HIDDEN_FUNC(GLOBAL(movmemSI12))
819 HIDDEN_ALIAS(movstrSI12,movmemSI12)
820 GLOBAL(movmemSI12):
821 mov.l @(8,r5),r0
822 mov.l r0,@(8,r4)
823 .global GLOBAL(movmemSI8)
824 HIDDEN_FUNC(GLOBAL(movmemSI8))
825 HIDDEN_ALIAS(movstrSI8,movmemSI8)
826 GLOBAL(movmemSI8):
827 mov.l @(4,r5),r0
828 mov.l r0,@(4,r4)
829 .global GLOBAL(movmemSI4)
830 HIDDEN_FUNC(GLOBAL(movmemSI4))
831 HIDDEN_ALIAS(movstrSI4,movmemSI4)
832 GLOBAL(movmemSI4):
833 mov.l @(0,r5),r0
835 mov.l r0,@(0,r4)
837 ENDFUNC(GLOBAL(movmemSI64))
838 ENDFUNC(GLOBAL(movmemSI60))
839 ENDFUNC(GLOBAL(movmemSI56))
840 ENDFUNC(GLOBAL(movmemSI52))
841 ENDFUNC(GLOBAL(movmemSI48))
842 ENDFUNC(GLOBAL(movmemSI44))
843 ENDFUNC(GLOBAL(movmemSI40))
844 ENDFUNC(GLOBAL(movmemSI36))
845 ENDFUNC(GLOBAL(movmemSI32))
846 ENDFUNC(GLOBAL(movmemSI28))
847 ENDFUNC(GLOBAL(movmemSI24))
848 ENDFUNC(GLOBAL(movmemSI20))
849 ENDFUNC(GLOBAL(movmemSI16))
850 ENDFUNC(GLOBAL(movmemSI12))
851 ENDFUNC(GLOBAL(movmemSI8))
852 ENDFUNC(GLOBAL(movmemSI4))
853 ENDFUNC(GLOBAL(movmem))
854 #endif
856 #ifdef L_movmem_i4
857 .text
858 .global GLOBAL(movmem_i4_even)
859 .global GLOBAL(movmem_i4_odd)
860 .global GLOBAL(movmemSI12_i4)
862 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
863 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
864 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
866 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
867 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
868 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
870 .p2align 5
871 L_movmem_2mod4_end:
872 mov.l r0,@(16,r4)
874 mov.l r1,@(20,r4)
876 .p2align 2
878 GLOBAL(movmem_i4_even):
879 mov.l @r5+,r0
880 bra L_movmem_start_even
881 mov.l @r5+,r1
883 GLOBAL(movmem_i4_odd):
884 mov.l @r5+,r1
885 add #-4,r4
886 mov.l @r5+,r2
887 mov.l @r5+,r3
888 mov.l r1,@(4,r4)
889 mov.l r2,@(8,r4)
891 L_movmem_loop:
892 mov.l r3,@(12,r4)
893 dt r6
894 mov.l @r5+,r0
895 bt/s L_movmem_2mod4_end
896 mov.l @r5+,r1
897 add #16,r4
898 L_movmem_start_even:
899 mov.l @r5+,r2
900 mov.l @r5+,r3
901 mov.l r0,@r4
902 dt r6
903 mov.l r1,@(4,r4)
904 bf/s L_movmem_loop
905 mov.l r2,@(8,r4)
907 mov.l r3,@(12,r4)
909 ENDFUNC(GLOBAL(movmem_i4_even))
910 ENDFUNC(GLOBAL(movmem_i4_odd))
912 .p2align 4
913 GLOBAL(movmemSI12_i4):
914 mov.l @r5,r0
915 mov.l @(4,r5),r1
916 mov.l @(8,r5),r2
917 mov.l r0,@r4
918 mov.l r1,@(4,r4)
920 mov.l r2,@(8,r4)
922 ENDFUNC(GLOBAL(movmemSI12_i4))
923 #endif
925 #ifdef L_mulsi3
928 .global GLOBAL(mulsi3)
929 HIDDEN_FUNC(GLOBAL(mulsi3))
931 ! r4 = aabb
932 ! r5 = ccdd
933 ! r0 = aabb*ccdd via partial products
935 ! if aa == 0 and cc = 0
936 ! r0 = bb*dd
938 ! else
939 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
942 GLOBAL(mulsi3):
943 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
944 mov r5,r3 ! r3 = ccdd
945 swap.w r4,r2 ! r2 = bbaa
946 xtrct r2,r3 ! r3 = aacc
947 tst r3,r3 ! msws zero ?
948 bf hiset
949 rts ! yes - then we have the answer
950 sts macl,r0
952 hiset: sts macl,r0 ! r0 = bb*dd
953 mulu.w r2,r5 ! brewing macl = aa*dd
954 sts macl,r1
955 mulu.w r3,r4 ! brewing macl = cc*bb
956 sts macl,r2
957 add r1,r2
958 shll16 r2
960 add r2,r0
962 ENDFUNC(GLOBAL(mulsi3))
963 #endif
964 #endif /* ! __SH5__ */
965 #ifdef L_sdivsi3_i4
966 .title "SH DIVIDE"
967 !! 4 byte integer Divide code for the Renesas SH
968 #ifdef __SH4__
969 !! args in r4 and r5, result in fpul, clobber dr0, dr2
971 .global GLOBAL(sdivsi3_i4)
972 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
973 GLOBAL(sdivsi3_i4):
974 lds r4,fpul
975 float fpul,dr0
976 lds r5,fpul
977 float fpul,dr2
978 fdiv dr2,dr0
980 ftrc dr0,fpul
982 ENDFUNC(GLOBAL(sdivsi3_i4))
983 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
984 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
986 #if ! __SH5__ || __SH5__ == 32
987 #if __SH5__
988 .mode SHcompact
989 #endif
990 .global GLOBAL(sdivsi3_i4)
991 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
992 GLOBAL(sdivsi3_i4):
993 sts.l fpscr,@-r15
994 mov #8,r2
995 swap.w r2,r2
996 lds r2,fpscr
997 lds r4,fpul
998 float fpul,dr0
999 lds r5,fpul
1000 float fpul,dr2
1001 fdiv dr2,dr0
1002 ftrc dr0,fpul
1004 lds.l @r15+,fpscr
1006 ENDFUNC(GLOBAL(sdivsi3_i4))
1007 #endif /* ! __SH5__ || __SH5__ == 32 */
1008 #endif /* ! __SH4__ */
1009 #endif
1011 #ifdef L_sdivsi3
1012 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1013 sh2e/sh3e code. */
1014 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1016 !! Steve Chamberlain
1017 !! sac@cygnus.com
1021 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1023 .global GLOBAL(sdivsi3)
1024 #if __SHMEDIA__
1025 #if __SH5__ == 32
1026 .section .text..SHmedia32,"ax"
1027 #else
1028 .text
1029 #endif
1030 .align 2
1031 #if 0
1032 /* The assembly code that follows is a hand-optimized version of the C
1033 code that follows. Note that the registers that are modified are
1034 exactly those listed as clobbered in the patterns divsi3_i1 and
1035 divsi3_i1_media.
1037 int __sdivsi3 (i, j)
1038 int i, j;
1040 register unsigned long long r18 asm ("r18");
1041 register unsigned long long r19 asm ("r19");
1042 register unsigned long long r0 asm ("r0") = 0;
1043 register unsigned long long r1 asm ("r1") = 1;
1044 register int r2 asm ("r2") = i >> 31;
1045 register int r3 asm ("r3") = j >> 31;
1047 r2 = r2 ? r2 : r1;
1048 r3 = r3 ? r3 : r1;
1049 r18 = i * r2;
1050 r19 = j * r3;
1051 r2 *= r3;
1053 r19 <<= 31;
1054 r1 <<= 31;
1056 if (r18 >= r19)
1057 r0 |= r1, r18 -= r19;
1058 while (r19 >>= 1, r1 >>= 1);
1060 return r2 * (int)r0;
1063 GLOBAL(sdivsi3):
1064 pt/l LOCAL(sdivsi3_dontadd), tr2
1065 pt/l LOCAL(sdivsi3_loop), tr1
1066 ptabs/l r18, tr0
1067 movi 0, r0
1068 movi 1, r1
1069 shari.l r4, 31, r2
1070 shari.l r5, 31, r3
1071 cmveq r2, r1, r2
1072 cmveq r3, r1, r3
1073 muls.l r4, r2, r18
1074 muls.l r5, r3, r19
1075 muls.l r2, r3, r2
1076 shlli r19, 31, r19
1077 shlli r1, 31, r1
1078 LOCAL(sdivsi3_loop):
1079 bgtu r19, r18, tr2
1080 or r0, r1, r0
1081 sub r18, r19, r18
1082 LOCAL(sdivsi3_dontadd):
1083 shlri r1, 1, r1
1084 shlri r19, 1, r19
1085 bnei r1, 0, tr1
1086 muls.l r0, r2, r0
1087 add.l r0, r63, r0
1088 blink tr0, r63
1089 #elif 0 /* ! 0 */
1090 // inputs: r4,r5
1091 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1092 // result in r0
1093 GLOBAL(sdivsi3):
1094 // can create absolute value without extra latency,
1095 // but dependent on proper sign extension of inputs:
1096 // shari.l r5,31,r2
1097 // xor r5,r2,r20
1098 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1099 shari.l r5,31,r2
1100 ori r2,1,r2
1101 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1102 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1103 shari.l r4,31,r3
1104 nsb r20,r0
1105 shlld r20,r0,r25
1106 shlri r25,48,r25
1107 sub r19,r25,r1
1108 mmulfx.w r1,r1,r2
1109 mshflo.w r1,r63,r1
1110 // If r4 was to be used in-place instead of r21, could use this sequence
1111 // to compute absolute:
1112 // sub r63,r4,r19 // compute absolute value of r4
1113 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1114 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1115 ori r3,1,r3
1116 mmulfx.w r25,r2,r2
1117 sub r19,r0,r0
1118 muls.l r4,r3,r21
1119 msub.w r1,r2,r2
1120 addi r2,-2,r1
1121 mulu.l r21,r1,r19
1122 mmulfx.w r2,r2,r2
1123 shlli r1,15,r1
1124 shlrd r19,r0,r19
1125 mulu.l r19,r20,r3
1126 mmacnfx.wl r25,r2,r1
1127 ptabs r18,tr0
1128 sub r21,r3,r25
1130 mulu.l r25,r1,r2
1131 addi r0,14,r0
1132 xor r4,r5,r18
1133 shlrd r2,r0,r2
1134 mulu.l r2,r20,r3
1135 add r19,r2,r19
1136 shari.l r18,31,r18
1137 sub r25,r3,r25
1139 mulu.l r25,r1,r2
1140 sub r25,r20,r25
1141 add r19,r18,r19
1142 shlrd r2,r0,r2
1143 mulu.l r2,r20,r3
1144 addi r25,1,r25
1145 add r19,r2,r19
1147 cmpgt r25,r3,r25
1148 add.l r19,r25,r0
1149 xor r0,r18,r0
1150 blink tr0,r63
1151 #else /* ! 0 && ! 0 */
1153 // inputs: r4,r5
1154 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1155 // result in r0
1156 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1157 #ifndef __pic__
1158 FUNC(GLOBAL(sdivsi3))
1159 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1160 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1161 // with the SHcompact implementation, which clobbers tr1 / tr2.
1162 .global GLOBAL(sdivsi3_1)
1163 GLOBAL(sdivsi3_1):
1164 .global GLOBAL(div_table_internal)
1165 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1166 shori GLOBAL(div_table_internal) & 65535, r20
1167 #endif
1168 .global GLOBAL(sdivsi3_2)
1169 // div_table in r20
1170 // clobbered: r1,r18,r19,r21,r25,tr0
1171 GLOBAL(sdivsi3_2):
1172 nsb r5, r1
1173 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1174 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1175 ldx.ub r20, r21, r19 // u0.8
1176 shari r25, 32, r25 // normalize to s2.30
1177 shlli r21, 1, r21
1178 muls.l r25, r19, r19 // s2.38
1179 ldx.w r20, r21, r21 // s2.14
1180 ptabs r18, tr0
1181 shari r19, 24, r19 // truncate to s2.14
1182 sub r21, r19, r19 // some 11 bit inverse in s1.14
1183 muls.l r19, r19, r21 // u0.28
1184 sub r63, r1, r1
1185 addi r1, 92, r1
1186 muls.l r25, r21, r18 // s2.58
1187 shlli r19, 45, r19 // multiply by two and convert to s2.58
1188 /* bubble */
1189 sub r19, r18, r18
1190 shari r18, 28, r18 // some 22 bit inverse in s1.30
1191 muls.l r18, r25, r0 // s2.60
1192 muls.l r18, r4, r25 // s32.30
1193 /* bubble */
1194 shari r0, 16, r19 // s-16.44
1195 muls.l r19, r18, r19 // s-16.74
1196 shari r25, 63, r0
1197 shari r4, 14, r18 // s19.-14
1198 shari r19, 30, r19 // s-16.44
1199 muls.l r19, r18, r19 // s15.30
1200 xor r21, r0, r21 // You could also use the constant 1 << 27.
1201 add r21, r25, r21
1202 sub r21, r19, r21
1203 shard r21, r1, r21
1204 sub r21, r0, r0
1205 blink tr0, r63
1206 #ifndef __pic__
1207 ENDFUNC(GLOBAL(sdivsi3))
1208 #endif
1209 ENDFUNC(GLOBAL(sdivsi3_2))
1210 #endif
1211 #elif defined __SHMEDIA__
1212 /* m5compact-nofpu */
1213 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1214 .mode SHmedia
1215 .section .text..SHmedia32,"ax"
1216 .align 2
1217 FUNC(GLOBAL(sdivsi3))
1218 GLOBAL(sdivsi3):
1219 pt/l LOCAL(sdivsi3_dontsub), tr0
1220 pt/l LOCAL(sdivsi3_loop), tr1
1221 ptabs/l r18,tr2
1222 shari.l r4,31,r18
1223 shari.l r5,31,r19
1224 xor r4,r18,r20
1225 xor r5,r19,r21
1226 sub.l r20,r18,r20
1227 sub.l r21,r19,r21
1228 xor r18,r19,r19
1229 shlli r21,32,r25
1230 addi r25,-1,r21
1231 addz.l r20,r63,r20
1232 LOCAL(sdivsi3_loop):
1233 shlli r20,1,r20
1234 bgeu/u r21,r20,tr0
1235 sub r20,r21,r20
1236 LOCAL(sdivsi3_dontsub):
1237 addi.l r25,-1,r25
1238 bnei r25,-32,tr1
1239 xor r20,r19,r20
1240 sub.l r20,r19,r0
1241 blink tr2,r63
1242 ENDFUNC(GLOBAL(sdivsi3))
1243 #else /* ! __SHMEDIA__ */
1244 FUNC(GLOBAL(sdivsi3))
1245 GLOBAL(sdivsi3):
1246 mov r4,r1
1247 mov r5,r0
1249 tst r0,r0
1250 bt div0
1251 mov #0,r2
1252 div0s r2,r1
1253 subc r3,r3
1254 subc r2,r1
1255 div0s r0,r3
1256 rotcl r1
1257 div1 r0,r3
1258 rotcl r1
1259 div1 r0,r3
1260 rotcl r1
1261 div1 r0,r3
1262 rotcl r1
1263 div1 r0,r3
1264 rotcl r1
1265 div1 r0,r3
1266 rotcl r1
1267 div1 r0,r3
1268 rotcl r1
1269 div1 r0,r3
1270 rotcl r1
1271 div1 r0,r3
1272 rotcl r1
1273 div1 r0,r3
1274 rotcl r1
1275 div1 r0,r3
1276 rotcl r1
1277 div1 r0,r3
1278 rotcl r1
1279 div1 r0,r3
1280 rotcl r1
1281 div1 r0,r3
1282 rotcl r1
1283 div1 r0,r3
1284 rotcl r1
1285 div1 r0,r3
1286 rotcl r1
1287 div1 r0,r3
1288 rotcl r1
1289 div1 r0,r3
1290 rotcl r1
1291 div1 r0,r3
1292 rotcl r1
1293 div1 r0,r3
1294 rotcl r1
1295 div1 r0,r3
1296 rotcl r1
1297 div1 r0,r3
1298 rotcl r1
1299 div1 r0,r3
1300 rotcl r1
1301 div1 r0,r3
1302 rotcl r1
1303 div1 r0,r3
1304 rotcl r1
1305 div1 r0,r3
1306 rotcl r1
1307 div1 r0,r3
1308 rotcl r1
1309 div1 r0,r3
1310 rotcl r1
1311 div1 r0,r3
1312 rotcl r1
1313 div1 r0,r3
1314 rotcl r1
1315 div1 r0,r3
1316 rotcl r1
1317 div1 r0,r3
1318 rotcl r1
1319 div1 r0,r3
1320 rotcl r1
1321 addc r2,r1
1323 mov r1,r0
1326 div0: rts
1327 mov #0,r0
1329 ENDFUNC(GLOBAL(sdivsi3))
1330 #endif /* ! __SHMEDIA__ */
1331 #endif /* ! __SH4__ */
1332 #endif
1333 #ifdef L_udivsi3_i4
1335 .title "SH DIVIDE"
1336 !! 4 byte integer Divide code for the Renesas SH
1337 #ifdef __SH4__
1338 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1339 !! and t bit
1341 .global GLOBAL(udivsi3_i4)
1342 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1343 GLOBAL(udivsi3_i4):
1344 mov #1,r1
1345 cmp/hi r1,r5
1346 bf trivial
1347 rotr r1
1348 xor r1,r4
1349 lds r4,fpul
1350 mova L1,r0
1351 #ifdef FMOVD_WORKS
1352 fmov.d @r0+,dr4
1353 #else
1354 fmov.s @r0+,DR40
1355 fmov.s @r0,DR41
1356 #endif
1357 float fpul,dr0
1358 xor r1,r5
1359 lds r5,fpul
1360 float fpul,dr2
1361 fadd dr4,dr0
1362 fadd dr4,dr2
1363 fdiv dr2,dr0
1365 ftrc dr0,fpul
1367 trivial:
1369 lds r4,fpul
1371 .align 2
1372 #ifdef FMOVD_WORKS
1373 .align 3 ! make double below 8 byte aligned.
1374 #endif
1376 .double 2147483648
1378 ENDFUNC(GLOBAL(udivsi3_i4))
1379 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1380 #if ! __SH5__ || __SH5__ == 32
1381 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1382 .mode SHmedia
1383 .global GLOBAL(udivsi3_i4)
1384 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1385 GLOBAL(udivsi3_i4):
1386 addz.l r4,r63,r20
1387 addz.l r5,r63,r21
1388 fmov.qd r20,dr0
1389 fmov.qd r21,dr32
1390 ptabs r18,tr0
1391 float.qd dr0,dr0
1392 float.qd dr32,dr32
1393 fdiv.d dr0,dr32,dr0
1394 ftrc.dq dr0,dr32
1395 fmov.s fr33,fr32
1396 blink tr0,r63
1398 ENDFUNC(GLOBAL(udivsi3_i4))
1399 #endif /* ! __SH5__ || __SH5__ == 32 */
1400 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1401 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1403 .global GLOBAL(udivsi3_i4)
1404 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1405 GLOBAL(udivsi3_i4):
1406 mov #1,r1
1407 cmp/hi r1,r5
1408 bf trivial
1409 sts.l fpscr,@-r15
1410 mova L1,r0
1411 lds.l @r0+,fpscr
1412 rotr r1
1413 xor r1,r4
1414 lds r4,fpul
1415 #ifdef FMOVD_WORKS
1416 fmov.d @r0+,dr4
1417 #else
1418 fmov.s @r0+,DR40
1419 fmov.s @r0,DR41
1420 #endif
1421 float fpul,dr0
1422 xor r1,r5
1423 lds r5,fpul
1424 float fpul,dr2
1425 fadd dr4,dr0
1426 fadd dr4,dr2
1427 fdiv dr2,dr0
1428 ftrc dr0,fpul
1430 lds.l @r15+,fpscr
1432 #ifdef FMOVD_WORKS
1433 .align 3 ! make double below 8 byte aligned.
1434 #endif
1435 trivial:
1437 lds r4,fpul
1439 .align 2
1441 #ifndef FMOVD_WORKS
1442 .long 0x80000
1443 #else
1444 .long 0x180000
1445 #endif
1446 .double 2147483648
1448 ENDFUNC(GLOBAL(udivsi3_i4))
1449 #endif /* ! __SH4__ */
1450 #endif
1452 #ifdef L_udivsi3
1453 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1454 sh2e/sh3e code. */
1455 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1457 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1458 .global GLOBAL(udivsi3)
1459 HIDDEN_FUNC(GLOBAL(udivsi3))
1461 #if __SHMEDIA__
1462 #if __SH5__ == 32
1463 .section .text..SHmedia32,"ax"
1464 #else
1465 .text
1466 #endif
1467 .align 2
1468 #if 0
1469 /* The assembly code that follows is a hand-optimized version of the C
1470 code that follows. Note that the registers that are modified are
1471 exactly those listed as clobbered in the patterns udivsi3_i1 and
1472 udivsi3_i1_media.
1474 unsigned
1475 __udivsi3 (i, j)
1476 unsigned i, j;
1478 register unsigned long long r0 asm ("r0") = 0;
1479 register unsigned long long r18 asm ("r18") = 1;
1480 register unsigned long long r4 asm ("r4") = i;
1481 register unsigned long long r19 asm ("r19") = j;
1483 r19 <<= 31;
1484 r18 <<= 31;
1486 if (r4 >= r19)
1487 r0 |= r18, r4 -= r19;
1488 while (r19 >>= 1, r18 >>= 1);
1490 return r0;
1493 GLOBAL(udivsi3):
1494 pt/l LOCAL(udivsi3_dontadd), tr2
1495 pt/l LOCAL(udivsi3_loop), tr1
1496 ptabs/l r18, tr0
1497 movi 0, r0
1498 movi 1, r18
1499 addz.l r5, r63, r19
1500 addz.l r4, r63, r4
1501 shlli r19, 31, r19
1502 shlli r18, 31, r18
1503 LOCAL(udivsi3_loop):
1504 bgtu r19, r4, tr2
1505 or r0, r18, r0
1506 sub r4, r19, r4
1507 LOCAL(udivsi3_dontadd):
1508 shlri r18, 1, r18
1509 shlri r19, 1, r19
1510 bnei r18, 0, tr1
1511 blink tr0, r63
1512 #else
1513 GLOBAL(udivsi3):
1514 // inputs: r4,r5
1515 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1516 // result in r0.
1517 addz.l r5,r63,r22
1518 nsb r22,r0
1519 shlld r22,r0,r25
1520 shlri r25,48,r25
1521 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1522 sub r20,r25,r21
1523 mmulfx.w r21,r21,r19
1524 mshflo.w r21,r63,r21
1525 ptabs r18,tr0
1526 mmulfx.w r25,r19,r19
1527 sub r20,r0,r0
1528 /* bubble */
1529 msub.w r21,r19,r19
1530 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1531 before the msub.w, but we need a different value for
1532 r19 to keep errors under control. */
1533 mulu.l r4,r21,r18
1534 mmulfx.w r19,r19,r19
1535 shlli r21,15,r21
1536 shlrd r18,r0,r18
1537 mulu.l r18,r22,r20
1538 mmacnfx.wl r25,r19,r21
1539 /* bubble */
1540 sub r4,r20,r25
1542 mulu.l r25,r21,r19
1543 addi r0,14,r0
1544 /* bubble */
1545 shlrd r19,r0,r19
1546 mulu.l r19,r22,r20
1547 add r18,r19,r18
1548 /* bubble */
1549 sub.l r25,r20,r25
1551 mulu.l r25,r21,r19
1552 addz.l r25,r63,r25
1553 sub r25,r22,r25
1554 shlrd r19,r0,r19
1555 mulu.l r19,r22,r20
1556 addi r25,1,r25
1557 add r18,r19,r18
1559 cmpgt r25,r20,r25
1560 add.l r18,r25,r0
1561 blink tr0,r63
1562 #endif
1563 #elif defined (__SHMEDIA__)
1564 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1565 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1566 So use a short shmedia loop. */
1567 // clobbered: r20,r21,r25,tr0,tr1,tr2
1568 .mode SHmedia
1569 .section .text..SHmedia32,"ax"
1570 .align 2
1571 GLOBAL(udivsi3):
1572 pt/l LOCAL(udivsi3_dontsub), tr0
1573 pt/l LOCAL(udivsi3_loop), tr1
1574 ptabs/l r18,tr2
1575 shlli r5,32,r25
1576 addi r25,-1,r21
1577 addz.l r4,r63,r20
1578 LOCAL(udivsi3_loop):
1579 shlli r20,1,r20
1580 bgeu/u r21,r20,tr0
1581 sub r20,r21,r20
1582 LOCAL(udivsi3_dontsub):
1583 addi.l r25,-1,r25
1584 bnei r25,-32,tr1
1585 add.l r20,r63,r0
1586 blink tr2,r63
1587 #else /* ! defined (__SHMEDIA__) */
1588 LOCAL(div8):
1589 div1 r5,r4
1590 LOCAL(div7):
1591 div1 r5,r4; div1 r5,r4; div1 r5,r4
1592 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1594 LOCAL(divx4):
1595 div1 r5,r4; rotcl r0
1596 div1 r5,r4; rotcl r0
1597 div1 r5,r4; rotcl r0
1598 rts; div1 r5,r4
1600 GLOBAL(udivsi3):
1601 sts.l pr,@-r15
1602 extu.w r5,r0
1603 cmp/eq r5,r0
1604 #ifdef __sh1__
1605 bf LOCAL(large_divisor)
1606 #else
1607 bf/s LOCAL(large_divisor)
1608 #endif
1609 div0u
1610 swap.w r4,r0
1611 shlr16 r4
1612 bsr LOCAL(div8)
1613 shll16 r5
1614 bsr LOCAL(div7)
1615 div1 r5,r4
1616 xtrct r4,r0
1617 xtrct r0,r4
1618 bsr LOCAL(div8)
1619 swap.w r4,r4
1620 bsr LOCAL(div7)
1621 div1 r5,r4
1622 lds.l @r15+,pr
1623 xtrct r4,r0
1624 swap.w r0,r0
1625 rotcl r0
1627 shlr16 r5
1629 LOCAL(large_divisor):
1630 #ifdef __sh1__
1631 div0u
1632 #endif
1633 mov #0,r0
1634 xtrct r4,r0
1635 xtrct r0,r4
1636 bsr LOCAL(divx4)
1637 rotcl r0
1638 bsr LOCAL(divx4)
1639 rotcl r0
1640 bsr LOCAL(divx4)
1641 rotcl r0
1642 bsr LOCAL(divx4)
1643 rotcl r0
1644 lds.l @r15+,pr
1646 rotcl r0
1648 ENDFUNC(GLOBAL(udivsi3))
1649 #endif /* ! __SHMEDIA__ */
1650 #endif /* __SH4__ */
1651 #endif /* L_udivsi3 */
1653 #ifdef L_udivdi3
1654 #ifdef __SHMEDIA__
1655 .mode SHmedia
1656 .section .text..SHmedia32,"ax"
1657 .align 2
1658 .global GLOBAL(udivdi3)
1659 FUNC(GLOBAL(udivdi3))
1660 GLOBAL(udivdi3):
1661 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1662 shlri r3,1,r4
1663 nsb r4,r22
1664 shlld r3,r22,r6
1665 shlri r6,49,r5
1666 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1667 sub r21,r5,r1
1668 mmulfx.w r1,r1,r4
1669 mshflo.w r1,r63,r1
1670 sub r63,r22,r20 // r63 == 64 % 64
1671 mmulfx.w r5,r4,r4
1672 pta LOCAL(large_divisor),tr0
1673 addi r20,32,r9
1674 msub.w r1,r4,r1
1675 madd.w r1,r1,r1
1676 mmulfx.w r1,r1,r4
1677 shlri r6,32,r7
1678 bgt/u r9,r63,tr0 // large_divisor
1679 mmulfx.w r5,r4,r4
1680 shlri r2,32+14,r19
1681 addi r22,-31,r0
1682 msub.w r1,r4,r1
1684 mulu.l r1,r7,r4
1685 addi r1,-3,r5
1686 mulu.l r5,r19,r5
1687 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1688 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1689 the case may be, %0000000000000000 000.11111111111, still */
1690 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1691 mulu.l r5,r3,r8
1692 mshalds.l r1,r21,r1
1693 shari r4,26,r4
1694 shlld r8,r0,r8
1695 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1696 sub r2,r8,r2
1697 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1699 shlri r2,22,r21
1700 mulu.l r21,r1,r21
1701 shlld r5,r0,r8
1702 addi r20,30-22,r0
1703 shlrd r21,r0,r21
1704 mulu.l r21,r3,r5
1705 add r8,r21,r8
1706 mcmpgt.l r21,r63,r21 // See Note 1
1707 addi r20,30,r0
1708 mshfhi.l r63,r21,r21
1709 sub r2,r5,r2
1710 andc r2,r21,r2
1712 /* small divisor: need a third divide step */
1713 mulu.l r2,r1,r7
1714 ptabs r18,tr0
1715 addi r2,1,r2
1716 shlrd r7,r0,r7
1717 mulu.l r7,r3,r5
1718 add r8,r7,r8
1719 sub r2,r3,r2
1720 cmpgt r2,r5,r5
1721 add r8,r5,r2
1722 /* could test r3 here to check for divide by zero. */
1723 blink tr0,r63
1725 LOCAL(large_divisor):
1726 mmulfx.w r5,r4,r4
1727 shlrd r2,r9,r25
1728 shlri r25,32,r8
1729 msub.w r1,r4,r1
1731 mulu.l r1,r7,r4
1732 addi r1,-3,r5
1733 mulu.l r5,r8,r5
1734 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1735 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1736 the case may be, %0000000000000000 000.11111111111, still */
1737 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1738 shlri r5,14-1,r8
1739 mulu.l r8,r7,r5
1740 mshalds.l r1,r21,r1
1741 shari r4,26,r4
1742 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1743 sub r25,r5,r25
1744 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1746 shlri r25,22,r21
1747 mulu.l r21,r1,r21
1748 pta LOCAL(no_lo_adj),tr0
1749 addi r22,32,r0
1750 shlri r21,40,r21
1751 mulu.l r21,r7,r5
1752 add r8,r21,r8
1753 shlld r2,r0,r2
1754 sub r25,r5,r25
1755 bgtu/u r7,r25,tr0 // no_lo_adj
1756 addi r8,1,r8
1757 sub r25,r7,r25
1758 LOCAL(no_lo_adj):
1759 mextr4 r2,r25,r2
1761 /* large_divisor: only needs a few adjustments. */
1762 mulu.l r8,r6,r5
1763 ptabs r18,tr0
1764 /* bubble */
1765 cmpgtu r5,r2,r5
1766 sub r8,r5,r2
1767 blink tr0,r63
1768 ENDFUNC(GLOBAL(udivdi3))
1769 /* Note 1: To shift the result of the second divide stage so that the result
1770 always fits into 32 bits, yet we still reduce the rest sufficiently
1771 would require a lot of instructions to do the shifts just right. Using
1772 the full 64 bit shift result to multiply with the divisor would require
1773 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1774 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1775 know that the rest after taking this partial result into account will
1776 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1777 upper 32 bits of the partial result are nonzero. */
1778 #endif /* __SHMEDIA__ */
1779 #endif /* L_udivdi3 */
1781 #ifdef L_divdi3
1782 #ifdef __SHMEDIA__
1783 .mode SHmedia
1784 .section .text..SHmedia32,"ax"
1785 .align 2
1786 .global GLOBAL(divdi3)
1787 FUNC(GLOBAL(divdi3))
1788 GLOBAL(divdi3):
1789 pta GLOBAL(udivdi3_internal),tr0
1790 shari r2,63,r22
1791 shari r3,63,r23
1792 xor r2,r22,r2
1793 xor r3,r23,r3
1794 sub r2,r22,r2
1795 sub r3,r23,r3
1796 beq/u r22,r23,tr0
1797 ptabs r18,tr1
1798 blink tr0,r18
1799 sub r63,r2,r2
1800 blink tr1,r63
1801 ENDFUNC(GLOBAL(divdi3))
1802 #endif /* __SHMEDIA__ */
1803 #endif /* L_divdi3 */
1805 #ifdef L_umoddi3
1806 #ifdef __SHMEDIA__
1807 .mode SHmedia
1808 .section .text..SHmedia32,"ax"
1809 .align 2
1810 .global GLOBAL(umoddi3)
1811 FUNC(GLOBAL(umoddi3))
1812 GLOBAL(umoddi3):
1813 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1814 shlri r3,1,r4
1815 nsb r4,r22
1816 shlld r3,r22,r6
1817 shlri r6,49,r5
1818 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1819 sub r21,r5,r1
1820 mmulfx.w r1,r1,r4
1821 mshflo.w r1,r63,r1
1822 sub r63,r22,r20 // r63 == 64 % 64
1823 mmulfx.w r5,r4,r4
1824 pta LOCAL(large_divisor),tr0
1825 addi r20,32,r9
1826 msub.w r1,r4,r1
1827 madd.w r1,r1,r1
1828 mmulfx.w r1,r1,r4
1829 shlri r6,32,r7
1830 bgt/u r9,r63,tr0 // large_divisor
1831 mmulfx.w r5,r4,r4
1832 shlri r2,32+14,r19
1833 addi r22,-31,r0
1834 msub.w r1,r4,r1
1836 mulu.l r1,r7,r4
1837 addi r1,-3,r5
1838 mulu.l r5,r19,r5
1839 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1840 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1841 the case may be, %0000000000000000 000.11111111111, still */
1842 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1843 mulu.l r5,r3,r5
1844 mshalds.l r1,r21,r1
1845 shari r4,26,r4
1846 shlld r5,r0,r5
1847 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1848 sub r2,r5,r2
1849 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1851 shlri r2,22,r21
1852 mulu.l r21,r1,r21
1853 addi r20,30-22,r0
1854 /* bubble */ /* could test r3 here to check for divide by zero. */
1855 shlrd r21,r0,r21
1856 mulu.l r21,r3,r5
1857 mcmpgt.l r21,r63,r21 // See Note 1
1858 addi r20,30,r0
1859 mshfhi.l r63,r21,r21
1860 sub r2,r5,r2
1861 andc r2,r21,r2
1863 /* small divisor: need a third divide step */
1864 mulu.l r2,r1,r7
1865 ptabs r18,tr0
1866 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1867 shlrd r7,r0,r7
1868 mulu.l r7,r3,r5
1869 /* bubble */
1870 addi r8,1,r7
1871 cmpgt r7,r5,r7
1872 cmvne r7,r8,r2
1873 sub r2,r5,r2
1874 blink tr0,r63
1876 LOCAL(large_divisor):
1877 mmulfx.w r5,r4,r4
1878 shlrd r2,r9,r25
1879 shlri r25,32,r8
1880 msub.w r1,r4,r1
1882 mulu.l r1,r7,r4
1883 addi r1,-3,r5
1884 mulu.l r5,r8,r5
1885 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1886 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1887 the case may be, %0000000000000000 000.11111111111, still */
1888 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1889 shlri r5,14-1,r8
1890 mulu.l r8,r7,r5
1891 mshalds.l r1,r21,r1
1892 shari r4,26,r4
1893 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1894 sub r25,r5,r25
1895 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1897 shlri r25,22,r21
1898 mulu.l r21,r1,r21
1899 pta LOCAL(no_lo_adj),tr0
1900 addi r22,32,r0
1901 shlri r21,40,r21
1902 mulu.l r21,r7,r5
1903 add r8,r21,r8
1904 shlld r2,r0,r2
1905 sub r25,r5,r25
1906 bgtu/u r7,r25,tr0 // no_lo_adj
1907 addi r8,1,r8
1908 sub r25,r7,r25
1909 LOCAL(no_lo_adj):
1910 mextr4 r2,r25,r2
1912 /* large_divisor: only needs a few adjustments. */
1913 mulu.l r8,r6,r5
1914 ptabs r18,tr0
1915 add r2,r6,r7
1916 cmpgtu r5,r2,r8
1917 cmvne r8,r7,r2
1918 sub r2,r5,r2
1919 shlrd r2,r22,r2
1920 blink tr0,r63
1921 ENDFUNC(GLOBAL(umoddi3))
1922 /* Note 1: To shift the result of the second divide stage so that the result
1923 always fits into 32 bits, yet we still reduce the rest sufficiently
1924 would require a lot of instructions to do the shifts just right. Using
1925 the full 64 bit shift result to multiply with the divisor would require
1926 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1927 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1928 know that the rest after taking this partial result into account will
1929 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1930 upper 32 bits of the partial result are nonzero. */
1931 #endif /* __SHMEDIA__ */
1932 #endif /* L_umoddi3 */
1934 #ifdef L_moddi3
1935 #ifdef __SHMEDIA__
1936 .mode SHmedia
1937 .section .text..SHmedia32,"ax"
1938 .align 2
1939 .global GLOBAL(moddi3)
1940 FUNC(GLOBAL(moddi3))
1941 GLOBAL(moddi3):
1942 pta GLOBAL(umoddi3_internal),tr0
1943 shari r2,63,r22
1944 shari r3,63,r23
1945 xor r2,r22,r2
1946 xor r3,r23,r3
1947 sub r2,r22,r2
1948 sub r3,r23,r3
1949 beq/u r22,r63,tr0
1950 ptabs r18,tr1
1951 blink tr0,r18
1952 sub r63,r2,r2
1953 blink tr1,r63
1954 ENDFUNC(GLOBAL(moddi3))
1955 #endif /* __SHMEDIA__ */
1956 #endif /* L_moddi3 */
1958 #ifdef L_set_fpscr
1959 #if !defined (__SH2A_NOFPU__)
1960 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1961 #ifdef __SH5__
1962 .mode SHcompact
1963 #endif
1964 .global GLOBAL(set_fpscr)
1965 HIDDEN_FUNC(GLOBAL(set_fpscr))
1966 GLOBAL(set_fpscr):
1967 lds r4,fpscr
1968 #ifdef __PIC__
1969 mov.l r12,@-r15
1970 mova LOCAL(set_fpscr_L0),r0
1971 mov.l LOCAL(set_fpscr_L0),r12
1972 add r0,r12
1973 mov.l LOCAL(set_fpscr_L1),r0
1974 mov.l @(r0,r12),r1
1975 mov.l @r15+,r12
1976 #else
1977 mov.l LOCAL(set_fpscr_L1),r1
1978 #endif
1979 swap.w r4,r0
1980 or #24,r0
1981 #ifndef FMOVD_WORKS
1982 xor #16,r0
1983 #endif
1984 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1985 swap.w r0,r3
1986 mov.l r3,@(4,r1)
1987 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1988 swap.w r0,r2
1989 mov.l r2,@r1
1990 #endif
1991 #ifndef FMOVD_WORKS
1992 xor #8,r0
1993 #else
1994 xor #24,r0
1995 #endif
1996 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1997 swap.w r0,r2
1999 mov.l r2,@r1
2000 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2001 swap.w r0,r3
2003 mov.l r3,@(4,r1)
2004 #endif
2005 .align 2
2006 #ifdef __PIC__
2007 LOCAL(set_fpscr_L0):
2008 .long _GLOBAL_OFFSET_TABLE_
2009 LOCAL(set_fpscr_L1):
2010 .long GLOBAL(fpscr_values@GOT)
2011 #else
2012 LOCAL(set_fpscr_L1):
2013 .long GLOBAL(fpscr_values)
2014 #endif
2016 ENDFUNC(GLOBAL(set_fpscr))
2017 #ifndef NO_FPSCR_VALUES
2018 #ifdef __ELF__
2019 .comm GLOBAL(fpscr_values),8,4
2020 #else
2021 .comm GLOBAL(fpscr_values),8
2022 #endif /* ELF */
2023 #endif /* NO_FPSCR_VALUES */
2024 #endif /* SH2E / SH3E / SH4 */
2025 #endif /* __SH2A_NOFPU__ */
2026 #endif /* L_set_fpscr */
2027 #ifdef L_ic_invalidate
2028 #if __SH5__ == 32
2029 .mode SHmedia
2030 .section .text..SHmedia32,"ax"
2031 .align 2
2032 .global GLOBAL(init_trampoline)
2033 HIDDEN_FUNC(GLOBAL(init_trampoline))
2034 GLOBAL(init_trampoline):
2035 st.l r0,8,r2
2036 #ifdef __LITTLE_ENDIAN__
2037 movi 9,r20
2038 shori 0x402b,r20
2039 shori 0xd101,r20
2040 shori 0xd002,r20
2041 #else
2042 movi 0xffffffffffffd002,r20
2043 shori 0xd101,r20
2044 shori 0x402b,r20
2045 shori 9,r20
2046 #endif
2047 st.q r0,0,r20
2048 st.l r0,12,r3
2049 ENDFUNC(GLOBAL(init_trampoline))
2050 .global GLOBAL(ic_invalidate)
2051 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2052 GLOBAL(ic_invalidate):
2053 ocbwb r0,0
2054 synco
2055 icbi r0, 0
2056 ptabs r18, tr0
2057 synci
2058 blink tr0, r63
2059 ENDFUNC(GLOBAL(ic_invalidate))
2060 #elif defined(__SH4A__)
2061 .global GLOBAL(ic_invalidate)
2062 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2063 GLOBAL(ic_invalidate):
2064 ocbwb @r4
2065 synco
2067 icbi @r4
2068 ENDFUNC(GLOBAL(ic_invalidate))
2069 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2070 /* For system code, we use ic_invalidate_line_i, but user code
2071 needs a different mechanism. A kernel call is generally not
2072 available, and it would also be slow. Different SH4 variants use
2073 different sizes and associativities of the Icache. We use a small
2074 bit of dispatch code that can be put hidden in every shared object,
2075 which calls the actual processor-specific invalidation code in a
2076 separate module.
2077 Or if you have operating system support, the OS could mmap the
2078 procesor-specific code from a single page, since it is highly
2079 repetitive. */
2080 .global GLOBAL(ic_invalidate)
2081 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2082 GLOBAL(ic_invalidate):
2083 mov.l 0f,r1
2084 #ifdef __pic__
2085 mova 0f,r0
2086 mov.l 1f,r2
2087 add r1,r0
2088 mov.l @(r0,r2),r1
2089 #endif
2090 ocbwb @r4
2091 mov.l @(8,r1),r0
2092 sub r1,r4
2093 and r4,r0
2094 add r1,r0
2095 jmp @r0
2096 mov.l @(4,r1),r0
2097 #ifndef __pic__
2098 0: .long GLOBAL(ic_invalidate_array)
2099 #else /* __pic__ */
2100 .global GLOBAL(ic_invalidate_array)
2101 /* ??? Why won't the assembler allow to add these two constants? */
2102 0: .long _GLOBAL_OFFSET_TABLE_
2103 1: .long GLOBAL(ic_invalidate_array)@GOT
2104 ENDFUNC(GLOBAL(ic_invalidate))
2105 #endif /* __pic__ */
2106 #endif /* SH4 */
2107 #endif /* L_ic_invalidate */
2109 #ifdef L_ic_invalidate_array
2110 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2111 .global GLOBAL(ic_invalidate_array)
2112 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2113 .global GLOBAL(ic_invalidate_array)
2114 FUNC(GLOBAL(ic_invalidate_array))
2115 GLOBAL(ic_invalidate_array):
2116 add r1,r4
2117 synco
2119 icbi @r4
2120 .long 0
2121 ENDFUNC(GLOBAL(ic_invalidate_array))
2122 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2123 .global GLOBAL(ic_invalidate_array)
2124 .p2align 5
2125 FUNC(GLOBAL(ic_invalidate_array))
2126 /* This must be aligned to the beginning of a cache line. */
2127 GLOBAL(ic_invalidate_array):
2128 #ifndef WAYS
2129 #define WAYS 4
2130 #define WAY_SIZE 0x4000
2131 #endif
2132 #if WAYS == 1
2133 .rept WAY_SIZE * WAYS / 32
2136 .rept 7
2137 .long WAY_SIZE - 32
2138 .endr
2139 .endr
2140 #elif WAYS <= 6
2141 .rept WAY_SIZE * WAYS / 32
2142 braf r0
2143 add #-8,r0
2144 .long WAY_SIZE + 8
2145 .long WAY_SIZE - 32
2146 .rept WAYS-2
2147 braf r0
2149 .endr
2150 .rept 7 - WAYS
2153 .endr
2154 .endr
2155 #else /* WAYS > 6 */
2156 /* This variant needs two different pages for mmap-ing. */
2157 .rept WAYS-1
2158 .rept WAY_SIZE / 32
2159 braf r0
2161 .long WAY_SIZE
2162 .rept 6
2163 .long WAY_SIZE - 32
2164 .endr
2165 .endr
2166 .endr
2167 .rept WAY_SIZE / 32
2169 .rept 15
2171 .endr
2172 .endr
2173 #endif /* WAYS */
2174 ENDFUNC(GLOBAL(ic_invalidate_array))
2175 #endif /* SH4 */
2176 #endif /* L_ic_invalidate_array */
2178 #if defined (__SH5__) && __SH5__ == 32
2179 #ifdef L_shcompact_call_trampoline
2180 .section .rodata
2181 .align 1
2182 LOCAL(ct_main_table):
2183 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2184 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2185 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2186 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2187 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2188 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2189 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2190 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2191 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2192 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2193 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2194 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2195 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2196 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2197 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2198 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2199 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2200 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2201 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2202 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2203 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2204 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2205 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2206 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2207 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2208 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2209 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2210 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2211 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2212 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2213 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2214 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2215 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2216 .mode SHmedia
2217 .section .text..SHmedia32, "ax"
2218 .align 2
2220 /* This function loads 64-bit general-purpose registers from the
2221 stack, from a memory address contained in them or from an FP
2222 register, according to a cookie passed in r1. Its execution
2223 time is linear on the number of registers that actually have
2224 to be copied. See sh.h for details on the actual bit pattern.
2226 The function to be called is passed in r0. If a 32-bit return
2227 value is expected, the actual function will be tail-called,
2228 otherwise the return address will be stored in r10 (that the
2229 caller should expect to be clobbered) and the return value
2230 will be expanded into r2/r3 upon return. */
2232 .global GLOBAL(GCC_shcompact_call_trampoline)
2233 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2234 GLOBAL(GCC_shcompact_call_trampoline):
2235 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2236 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2237 pt/l LOCAL(ct_loop), tr1
2238 addz.l r1, r63, r1
2239 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2240 LOCAL(ct_loop):
2241 nsb r1, r28
2242 shlli r28, 1, r29
2243 ldx.w r0, r29, r30
2244 LOCAL(ct_main_label):
2245 ptrel/l r30, tr2
2246 blink tr2, r63
2247 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2248 /* It must be dr0, so just do it. */
2249 fmov.dq dr0, r2
2250 movi 7, r30
2251 shlli r30, 29, r31
2252 andc r1, r31, r1
2253 blink tr1, r63
2254 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2255 /* It is either dr0 or dr2. */
2256 movi 7, r30
2257 shlri r1, 26, r32
2258 shlli r30, 26, r31
2259 andc r1, r31, r1
2260 fmov.dq dr0, r3
2261 beqi/l r32, 4, tr1
2262 fmov.dq dr2, r3
2263 blink tr1, r63
2264 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2265 shlri r1, 23 - 3, r34
2266 andi r34, 3 << 3, r33
2267 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2268 LOCAL(ct_r4_fp_base):
2269 ptrel/l r32, tr2
2270 movi 7, r30
2271 shlli r30, 23, r31
2272 andc r1, r31, r1
2273 blink tr2, r63
2274 LOCAL(ct_r4_fp_copy):
2275 fmov.dq dr0, r4
2276 blink tr1, r63
2277 fmov.dq dr2, r4
2278 blink tr1, r63
2279 fmov.dq dr4, r4
2280 blink tr1, r63
2281 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2282 shlri r1, 20 - 3, r34
2283 andi r34, 3 << 3, r33
2284 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2285 LOCAL(ct_r5_fp_base):
2286 ptrel/l r32, tr2
2287 movi 7, r30
2288 shlli r30, 20, r31
2289 andc r1, r31, r1
2290 blink tr2, r63
2291 LOCAL(ct_r5_fp_copy):
2292 fmov.dq dr0, r5
2293 blink tr1, r63
2294 fmov.dq dr2, r5
2295 blink tr1, r63
2296 fmov.dq dr4, r5
2297 blink tr1, r63
2298 fmov.dq dr6, r5
2299 blink tr1, r63
2300 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2301 /* It must be dr8. */
2302 fmov.dq dr8, r6
2303 movi 15, r30
2304 shlli r30, 16, r31
2305 andc r1, r31, r1
2306 blink tr1, r63
2307 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2308 shlri r1, 16 - 3, r34
2309 andi r34, 3 << 3, r33
2310 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2311 LOCAL(ct_r6_fp_base):
2312 ptrel/l r32, tr2
2313 movi 7, r30
2314 shlli r30, 16, r31
2315 andc r1, r31, r1
2316 blink tr2, r63
2317 LOCAL(ct_r6_fp_copy):
2318 fmov.dq dr0, r6
2319 blink tr1, r63
2320 fmov.dq dr2, r6
2321 blink tr1, r63
2322 fmov.dq dr4, r6
2323 blink tr1, r63
2324 fmov.dq dr6, r6
2325 blink tr1, r63
2326 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2327 /* It is either dr8 or dr10. */
2328 movi 15 << 12, r31
2329 shlri r1, 12, r32
2330 andc r1, r31, r1
2331 fmov.dq dr8, r7
2332 beqi/l r32, 8, tr1
2333 fmov.dq dr10, r7
2334 blink tr1, r63
2335 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2336 shlri r1, 12 - 3, r34
2337 andi r34, 3 << 3, r33
2338 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2339 LOCAL(ct_r7_fp_base):
2340 ptrel/l r32, tr2
2341 movi 7 << 12, r31
2342 andc r1, r31, r1
2343 blink tr2, r63
2344 LOCAL(ct_r7_fp_copy):
2345 fmov.dq dr0, r7
2346 blink tr1, r63
2347 fmov.dq dr2, r7
2348 blink tr1, r63
2349 fmov.dq dr4, r7
2350 blink tr1, r63
2351 fmov.dq dr6, r7
2352 blink tr1, r63
2353 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2354 /* It is either dr8 or dr10. */
2355 movi 15 << 8, r31
2356 andi r1, 1 << 8, r32
2357 andc r1, r31, r1
2358 fmov.dq dr8, r8
2359 beq/l r32, r63, tr1
2360 fmov.dq dr10, r8
2361 blink tr1, r63
2362 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2363 shlri r1, 8 - 3, r34
2364 andi r34, 3 << 3, r33
2365 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2366 LOCAL(ct_r8_fp_base):
2367 ptrel/l r32, tr2
2368 movi 7 << 8, r31
2369 andc r1, r31, r1
2370 blink tr2, r63
2371 LOCAL(ct_r8_fp_copy):
2372 fmov.dq dr0, r8
2373 blink tr1, r63
2374 fmov.dq dr2, r8
2375 blink tr1, r63
2376 fmov.dq dr4, r8
2377 blink tr1, r63
2378 fmov.dq dr6, r8
2379 blink tr1, r63
2380 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2381 /* It is either dr8 or dr10. */
2382 movi 15 << 4, r31
2383 andi r1, 1 << 4, r32
2384 andc r1, r31, r1
2385 fmov.dq dr8, r9
2386 beq/l r32, r63, tr1
2387 fmov.dq dr10, r9
2388 blink tr1, r63
2389 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2390 shlri r1, 4 - 3, r34
2391 andi r34, 3 << 3, r33
2392 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2393 LOCAL(ct_r9_fp_base):
2394 ptrel/l r32, tr2
2395 movi 7 << 4, r31
2396 andc r1, r31, r1
2397 blink tr2, r63
2398 LOCAL(ct_r9_fp_copy):
2399 fmov.dq dr0, r9
2400 blink tr1, r63
2401 fmov.dq dr2, r9
2402 blink tr1, r63
2403 fmov.dq dr4, r9
2404 blink tr1, r63
2405 fmov.dq dr6, r9
2406 blink tr1, r63
2407 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2408 pt/l LOCAL(ct_r2_load), tr2
2409 movi 3, r30
2410 shlli r30, 29, r31
2411 and r1, r31, r32
2412 andc r1, r31, r1
2413 beq/l r31, r32, tr2
2414 addi.l r2, 8, r3
2415 ldx.q r2, r63, r2
2416 /* Fall through. */
2417 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2418 pt/l LOCAL(ct_r3_load), tr2
2419 movi 3, r30
2420 shlli r30, 26, r31
2421 and r1, r31, r32
2422 andc r1, r31, r1
2423 beq/l r31, r32, tr2
2424 addi.l r3, 8, r4
2425 ldx.q r3, r63, r3
2426 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2427 pt/l LOCAL(ct_r4_load), tr2
2428 movi 3, r30
2429 shlli r30, 23, r31
2430 and r1, r31, r32
2431 andc r1, r31, r1
2432 beq/l r31, r32, tr2
2433 addi.l r4, 8, r5
2434 ldx.q r4, r63, r4
2435 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2436 pt/l LOCAL(ct_r5_load), tr2
2437 movi 3, r30
2438 shlli r30, 20, r31
2439 and r1, r31, r32
2440 andc r1, r31, r1
2441 beq/l r31, r32, tr2
2442 addi.l r5, 8, r6
2443 ldx.q r5, r63, r5
2444 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2445 pt/l LOCAL(ct_r6_load), tr2
2446 movi 3 << 16, r31
2447 and r1, r31, r32
2448 andc r1, r31, r1
2449 beq/l r31, r32, tr2
2450 addi.l r6, 8, r7
2451 ldx.q r6, r63, r6
2452 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2453 pt/l LOCAL(ct_r7_load), tr2
2454 movi 3 << 12, r31
2455 and r1, r31, r32
2456 andc r1, r31, r1
2457 beq/l r31, r32, tr2
2458 addi.l r7, 8, r8
2459 ldx.q r7, r63, r7
2460 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2461 pt/l LOCAL(ct_r8_load), tr2
2462 movi 3 << 8, r31
2463 and r1, r31, r32
2464 andc r1, r31, r1
2465 beq/l r31, r32, tr2
2466 addi.l r8, 8, r9
2467 ldx.q r8, r63, r8
2468 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2469 pt/l LOCAL(ct_check_tramp), tr2
2470 ldx.q r9, r63, r9
2471 blink tr2, r63
2472 LOCAL(ct_r2_load):
2473 ldx.q r2, r63, r2
2474 blink tr1, r63
2475 LOCAL(ct_r3_load):
2476 ldx.q r3, r63, r3
2477 blink tr1, r63
2478 LOCAL(ct_r4_load):
2479 ldx.q r4, r63, r4
2480 blink tr1, r63
2481 LOCAL(ct_r5_load):
2482 ldx.q r5, r63, r5
2483 blink tr1, r63
2484 LOCAL(ct_r6_load):
2485 ldx.q r6, r63, r6
2486 blink tr1, r63
2487 LOCAL(ct_r7_load):
2488 ldx.q r7, r63, r7
2489 blink tr1, r63
2490 LOCAL(ct_r8_load):
2491 ldx.q r8, r63, r8
2492 blink tr1, r63
2493 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2494 movi 1, r30
2495 ldx.q r15, r63, r2
2496 shlli r30, 29, r31
2497 addi.l r15, 8, r15
2498 andc r1, r31, r1
2499 blink tr1, r63
2500 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2501 movi 1, r30
2502 ldx.q r15, r63, r3
2503 shlli r30, 26, r31
2504 addi.l r15, 8, r15
2505 andc r1, r31, r1
2506 blink tr1, r63
2507 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2508 movi 1, r30
2509 ldx.q r15, r63, r4
2510 shlli r30, 23, r31
2511 addi.l r15, 8, r15
2512 andc r1, r31, r1
2513 blink tr1, r63
2514 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2515 movi 1, r30
2516 ldx.q r15, r63, r5
2517 shlli r30, 20, r31
2518 addi.l r15, 8, r15
2519 andc r1, r31, r1
2520 blink tr1, r63
2521 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2522 movi 1, r30
2523 ldx.q r15, r63, r6
2524 shlli r30, 16, r31
2525 addi.l r15, 8, r15
2526 andc r1, r31, r1
2527 blink tr1, r63
2528 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2529 ldx.q r15, r63, r7
2530 movi 1 << 12, r31
2531 addi.l r15, 8, r15
2532 andc r1, r31, r1
2533 blink tr1, r63
2534 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2535 ldx.q r15, r63, r8
2536 movi 1 << 8, r31
2537 addi.l r15, 8, r15
2538 andc r1, r31, r1
2539 blink tr1, r63
2540 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2541 andi r1, 7 << 1, r30
2542 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2543 shlli r30, 2, r31
2544 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2545 sub.l r32, r31, r33
2546 ptabs/l r33, tr2
2547 blink tr2, r63
2548 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2549 ldx.q r15, r63, r3
2550 addi.l r15, 8, r15
2551 ldx.q r15, r63, r4
2552 addi.l r15, 8, r15
2553 ldx.q r15, r63, r5
2554 addi.l r15, 8, r15
2555 ldx.q r15, r63, r6
2556 addi.l r15, 8, r15
2557 ldx.q r15, r63, r7
2558 addi.l r15, 8, r15
2559 ldx.q r15, r63, r8
2560 addi.l r15, 8, r15
2561 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2562 ldx.q r15, r63, r9
2563 addi.l r15, 8, r15
2564 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2565 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2566 pt/u LOCAL(ct_ret_wide), tr2
2567 andi r1, 1, r1
2568 bne/u r1, r63, tr2
2569 LOCAL(ct_call_func): /* Just branch to the function. */
2570 blink tr0, r63
2571 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2572 64-bit return value. */
2573 add.l r18, r63, r10
2574 blink tr0, r18
2575 ptabs r10, tr0
2576 #if __LITTLE_ENDIAN__
2577 shari r2, 32, r3
2578 add.l r2, r63, r2
2579 #else
2580 add.l r2, r63, r3
2581 shari r2, 32, r2
2582 #endif
2583 blink tr0, r63
2585 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2586 #endif /* L_shcompact_call_trampoline */
2588 #ifdef L_shcompact_return_trampoline
2589 /* This function does the converse of the code in `ret_wide'
2590 above. It is tail-called by SHcompact functions returning
2591 64-bit non-floating-point values, to pack the 32-bit values in
2592 r2 and r3 into r2. */
2594 .mode SHmedia
2595 .section .text..SHmedia32, "ax"
2596 .align 2
2597 .global GLOBAL(GCC_shcompact_return_trampoline)
2598 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2599 GLOBAL(GCC_shcompact_return_trampoline):
2600 ptabs/l r18, tr0
2601 #if __LITTLE_ENDIAN__
2602 addz.l r2, r63, r2
2603 shlli r3, 32, r3
2604 #else
2605 addz.l r3, r63, r3
2606 shlli r2, 32, r2
2607 #endif
2608 or r3, r2, r2
2609 blink tr0, r63
2611 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2612 #endif /* L_shcompact_return_trampoline */
2614 #ifdef L_shcompact_incoming_args
2615 .section .rodata
2616 .align 1
2617 LOCAL(ia_main_table):
2618 .word 1 /* Invalid, just loop */
2619 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2620 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2621 .word 1 /* Invalid, just loop */
2622 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2623 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2624 .word 1 /* Invalid, just loop */
2625 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2626 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2627 .word 1 /* Invalid, just loop */
2628 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2629 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2630 .word 1 /* Invalid, just loop */
2631 .word 1 /* Invalid, just loop */
2632 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2633 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2634 .word 1 /* Invalid, just loop */
2635 .word 1 /* Invalid, just loop */
2636 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2637 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2638 .word 1 /* Invalid, just loop */
2639 .word 1 /* Invalid, just loop */
2640 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2641 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2642 .word 1 /* Invalid, just loop */
2643 .word 1 /* Invalid, just loop */
2644 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2645 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2646 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2647 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2648 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2649 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2650 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2651 .mode SHmedia
2652 .section .text..SHmedia32, "ax"
2653 .align 2
2655 /* This function stores 64-bit general-purpose registers back in
2656 the stack, and loads the address in which each register
2657 was stored into itself. The lower 32 bits of r17 hold the address
2658 to begin storing, and the upper 32 bits of r17 hold the cookie.
2659 Its execution time is linear on the
2660 number of registers that actually have to be copied, and it is
2661 optimized for structures larger than 64 bits, as opposed to
2662 individual `long long' arguments. See sh.h for details on the
2663 actual bit pattern. */
2665 .global GLOBAL(GCC_shcompact_incoming_args)
2666 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2667 GLOBAL(GCC_shcompact_incoming_args):
2668 ptabs/l r18, tr0 /* Prepare to return. */
2669 shlri r17, 32, r0 /* Load the cookie. */
2670 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2671 pt/l LOCAL(ia_loop), tr1
2672 add.l r17, r63, r17
2673 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2674 LOCAL(ia_loop):
2675 nsb r0, r36
2676 shlli r36, 1, r37
2677 ldx.w r43, r37, r38
2678 LOCAL(ia_main_label):
2679 ptrel/l r38, tr2
2680 blink tr2, r63
2681 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2682 movi 3, r38
2683 shlli r38, 29, r39
2684 and r0, r39, r40
2685 andc r0, r39, r0
2686 stx.q r17, r63, r2
2687 add.l r17, r63, r2
2688 addi.l r17, 8, r17
2689 beq/u r39, r40, tr1
2690 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2691 movi 3, r38
2692 shlli r38, 26, r39
2693 and r0, r39, r40
2694 andc r0, r39, r0
2695 stx.q r17, r63, r3
2696 add.l r17, r63, r3
2697 addi.l r17, 8, r17
2698 beq/u r39, r40, tr1
2699 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2700 movi 3, r38
2701 shlli r38, 23, r39
2702 and r0, r39, r40
2703 andc r0, r39, r0
2704 stx.q r17, r63, r4
2705 add.l r17, r63, r4
2706 addi.l r17, 8, r17
2707 beq/u r39, r40, tr1
2708 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2709 movi 3, r38
2710 shlli r38, 20, r39
2711 and r0, r39, r40
2712 andc r0, r39, r0
2713 stx.q r17, r63, r5
2714 add.l r17, r63, r5
2715 addi.l r17, 8, r17
2716 beq/u r39, r40, tr1
2717 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2718 movi 3, r38
2719 shlli r38, 16, r39
2720 and r0, r39, r40
2721 andc r0, r39, r0
2722 stx.q r17, r63, r6
2723 add.l r17, r63, r6
2724 addi.l r17, 8, r17
2725 beq/u r39, r40, tr1
2726 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2727 movi 3 << 12, r39
2728 and r0, r39, r40
2729 andc r0, r39, r0
2730 stx.q r17, r63, r7
2731 add.l r17, r63, r7
2732 addi.l r17, 8, r17
2733 beq/u r39, r40, tr1
2734 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2735 movi 3 << 8, r39
2736 and r0, r39, r40
2737 andc r0, r39, r0
2738 stx.q r17, r63, r8
2739 add.l r17, r63, r8
2740 addi.l r17, 8, r17
2741 beq/u r39, r40, tr1
2742 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2743 stx.q r17, r63, r9
2744 add.l r17, r63, r9
2745 blink tr0, r63
2746 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2747 movi 1, r38
2748 shlli r38, 29, r39
2749 andc r0, r39, r0
2750 stx.q r17, r63, r2
2751 addi.l r17, 8, r17
2752 blink tr1, r63
2753 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2754 movi 1, r38
2755 shlli r38, 26, r39
2756 andc r0, r39, r0
2757 stx.q r17, r63, r3
2758 addi.l r17, 8, r17
2759 blink tr1, r63
2760 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2761 movi 1, r38
2762 shlli r38, 23, r39
2763 andc r0, r39, r0
2764 stx.q r17, r63, r4
2765 addi.l r17, 8, r17
2766 blink tr1, r63
2767 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2768 movi 1, r38
2769 shlli r38, 20, r39
2770 andc r0, r39, r0
2771 stx.q r17, r63, r5
2772 addi.l r17, 8, r17
2773 blink tr1, r63
2774 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2775 movi 1, r38
2776 shlli r38, 16, r39
2777 andc r0, r39, r0
2778 stx.q r17, r63, r6
2779 addi.l r17, 8, r17
2780 blink tr1, r63
2781 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2782 movi 1 << 12, r39
2783 andc r0, r39, r0
2784 stx.q r17, r63, r7
2785 addi.l r17, 8, r17
2786 blink tr1, r63
2787 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2788 movi 1 << 8, r39
2789 andc r0, r39, r0
2790 stx.q r17, r63, r8
2791 addi.l r17, 8, r17
2792 blink tr1, r63
2793 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2794 andi r0, 7 << 1, r38
2795 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2796 shlli r38, 2, r39
2797 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2798 sub.l r40, r39, r41
2799 ptabs/l r41, tr2
2800 blink tr2, r63
2801 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2802 stx.q r17, r63, r3
2803 addi.l r17, 8, r17
2804 stx.q r17, r63, r4
2805 addi.l r17, 8, r17
2806 stx.q r17, r63, r5
2807 addi.l r17, 8, r17
2808 stx.q r17, r63, r6
2809 addi.l r17, 8, r17
2810 stx.q r17, r63, r7
2811 addi.l r17, 8, r17
2812 stx.q r17, r63, r8
2813 addi.l r17, 8, r17
2814 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2815 stx.q r17, r63, r9
2816 LOCAL(ia_return): /* Return. */
2817 blink tr0, r63
2818 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2819 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2820 #endif /* L_shcompact_incoming_args */
2821 #endif
2822 #if __SH5__
2823 #ifdef L_nested_trampoline
2824 #if __SH5__ == 32
2825 .section .text..SHmedia32,"ax"
2826 #else
2827 .text
2828 #endif
2829 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2830 .global GLOBAL(GCC_nested_trampoline)
2831 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2832 GLOBAL(GCC_nested_trampoline):
2833 .mode SHmedia
2834 ptrel/u r63, tr0
2835 gettr tr0, r0
2836 #if __SH5__ == 64
2837 ld.q r0, 24, r1
2838 #else
2839 ld.l r0, 24, r1
2840 #endif
2841 ptabs/l r1, tr1
2842 #if __SH5__ == 64
2843 ld.q r0, 32, r1
2844 #else
2845 ld.l r0, 28, r1
2846 #endif
2847 blink tr1, r63
2849 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2850 #endif /* L_nested_trampoline */
2851 #endif /* __SH5__ */
2852 #if __SH5__ == 32
2853 #ifdef L_push_pop_shmedia_regs
2854 .section .text..SHmedia32,"ax"
2855 .mode SHmedia
2856 .align 2
2857 #ifndef __SH4_NOFPU__
2858 .global GLOBAL(GCC_push_shmedia_regs)
2859 FUNC(GLOBAL(GCC_push_shmedia_regs))
2860 GLOBAL(GCC_push_shmedia_regs):
2861 addi.l r15, -14*8, r15
2862 fst.d r15, 13*8, dr62
2863 fst.d r15, 12*8, dr60
2864 fst.d r15, 11*8, dr58
2865 fst.d r15, 10*8, dr56
2866 fst.d r15, 9*8, dr54
2867 fst.d r15, 8*8, dr52
2868 fst.d r15, 7*8, dr50
2869 fst.d r15, 6*8, dr48
2870 fst.d r15, 5*8, dr46
2871 fst.d r15, 4*8, dr44
2872 fst.d r15, 3*8, dr42
2873 fst.d r15, 2*8, dr40
2874 fst.d r15, 1*8, dr38
2875 fst.d r15, 0*8, dr36
2876 #else /* ! __SH4_NOFPU__ */
2877 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2878 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2879 GLOBAL(GCC_push_shmedia_regs_nofpu):
2880 #endif /* ! __SH4_NOFPU__ */
2881 ptabs/l r18, tr0
2882 addi.l r15, -27*8, r15
2883 gettr tr7, r62
2884 gettr tr6, r61
2885 gettr tr5, r60
2886 st.q r15, 26*8, r62
2887 st.q r15, 25*8, r61
2888 st.q r15, 24*8, r60
2889 st.q r15, 23*8, r59
2890 st.q r15, 22*8, r58
2891 st.q r15, 21*8, r57
2892 st.q r15, 20*8, r56
2893 st.q r15, 19*8, r55
2894 st.q r15, 18*8, r54
2895 st.q r15, 17*8, r53
2896 st.q r15, 16*8, r52
2897 st.q r15, 15*8, r51
2898 st.q r15, 14*8, r50
2899 st.q r15, 13*8, r49
2900 st.q r15, 12*8, r48
2901 st.q r15, 11*8, r47
2902 st.q r15, 10*8, r46
2903 st.q r15, 9*8, r45
2904 st.q r15, 8*8, r44
2905 st.q r15, 7*8, r35
2906 st.q r15, 6*8, r34
2907 st.q r15, 5*8, r33
2908 st.q r15, 4*8, r32
2909 st.q r15, 3*8, r31
2910 st.q r15, 2*8, r30
2911 st.q r15, 1*8, r29
2912 st.q r15, 0*8, r28
2913 blink tr0, r63
2914 #ifndef __SH4_NOFPU__
2915 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2916 #else
2917 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2918 #endif
2919 #ifndef __SH4_NOFPU__
2920 .global GLOBAL(GCC_pop_shmedia_regs)
2921 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2922 GLOBAL(GCC_pop_shmedia_regs):
2923 pt .L0, tr1
2924 movi 41*8, r0
2925 fld.d r15, 40*8, dr62
2926 fld.d r15, 39*8, dr60
2927 fld.d r15, 38*8, dr58
2928 fld.d r15, 37*8, dr56
2929 fld.d r15, 36*8, dr54
2930 fld.d r15, 35*8, dr52
2931 fld.d r15, 34*8, dr50
2932 fld.d r15, 33*8, dr48
2933 fld.d r15, 32*8, dr46
2934 fld.d r15, 31*8, dr44
2935 fld.d r15, 30*8, dr42
2936 fld.d r15, 29*8, dr40
2937 fld.d r15, 28*8, dr38
2938 fld.d r15, 27*8, dr36
2939 blink tr1, r63
2940 #else /* ! __SH4_NOFPU__ */
2941 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2942 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2943 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2944 #endif /* ! __SH4_NOFPU__ */
2945 movi 27*8, r0
2946 .L0:
2947 ptabs r18, tr0
2948 ld.q r15, 26*8, r62
2949 ld.q r15, 25*8, r61
2950 ld.q r15, 24*8, r60
2951 ptabs r62, tr7
2952 ptabs r61, tr6
2953 ptabs r60, tr5
2954 ld.q r15, 23*8, r59
2955 ld.q r15, 22*8, r58
2956 ld.q r15, 21*8, r57
2957 ld.q r15, 20*8, r56
2958 ld.q r15, 19*8, r55
2959 ld.q r15, 18*8, r54
2960 ld.q r15, 17*8, r53
2961 ld.q r15, 16*8, r52
2962 ld.q r15, 15*8, r51
2963 ld.q r15, 14*8, r50
2964 ld.q r15, 13*8, r49
2965 ld.q r15, 12*8, r48
2966 ld.q r15, 11*8, r47
2967 ld.q r15, 10*8, r46
2968 ld.q r15, 9*8, r45
2969 ld.q r15, 8*8, r44
2970 ld.q r15, 7*8, r35
2971 ld.q r15, 6*8, r34
2972 ld.q r15, 5*8, r33
2973 ld.q r15, 4*8, r32
2974 ld.q r15, 3*8, r31
2975 ld.q r15, 2*8, r30
2976 ld.q r15, 1*8, r29
2977 ld.q r15, 0*8, r28
2978 add.l r15, r0, r15
2979 blink tr0, r63
2981 #ifndef __SH4_NOFPU__
2982 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
2983 #else
2984 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2985 #endif
2986 #endif /* __SH5__ == 32 */
2987 #endif /* L_push_pop_shmedia_regs */
2989 #ifdef L_div_table
2990 #if __SH5__
2991 #if defined(__pic__) && defined(__SHMEDIA__)
2992 .global GLOBAL(sdivsi3)
2993 FUNC(GLOBAL(sdivsi3))
2994 #if __SH5__ == 32
2995 .section .text..SHmedia32,"ax"
2996 #else
2997 .text
2998 #endif
2999 #if 0
3000 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3001 in a text section does not work (at least for shared libraries):
3002 the linker sets the LSB of the address as if this was SHmedia code. */
3003 #define TEXT_DATA_BUG
3004 #endif
3005 .align 2
3006 // inputs: r4,r5
3007 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3008 // result in r0
3009 .global GLOBAL(sdivsi3)
3010 GLOBAL(sdivsi3):
3011 #ifdef TEXT_DATA_BUG
3012 ptb datalabel Local_div_table,tr0
3013 #else
3014 ptb GLOBAL(div_table_internal),tr0
3015 #endif
3016 nsb r5, r1
3017 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3018 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3019 /* bubble */
3020 gettr tr0,r20
3021 ldx.ub r20, r21, r19 // u0.8
3022 shari r25, 32, r25 // normalize to s2.30
3023 shlli r21, 1, r21
3024 muls.l r25, r19, r19 // s2.38
3025 ldx.w r20, r21, r21 // s2.14
3026 ptabs r18, tr0
3027 shari r19, 24, r19 // truncate to s2.14
3028 sub r21, r19, r19 // some 11 bit inverse in s1.14
3029 muls.l r19, r19, r21 // u0.28
3030 sub r63, r1, r1
3031 addi r1, 92, r1
3032 muls.l r25, r21, r18 // s2.58
3033 shlli r19, 45, r19 // multiply by two and convert to s2.58
3034 /* bubble */
3035 sub r19, r18, r18
3036 shari r18, 28, r18 // some 22 bit inverse in s1.30
3037 muls.l r18, r25, r0 // s2.60
3038 muls.l r18, r4, r25 // s32.30
3039 /* bubble */
3040 shari r0, 16, r19 // s-16.44
3041 muls.l r19, r18, r19 // s-16.74
3042 shari r25, 63, r0
3043 shari r4, 14, r18 // s19.-14
3044 shari r19, 30, r19 // s-16.44
3045 muls.l r19, r18, r19 // s15.30
3046 xor r21, r0, r21 // You could also use the constant 1 << 27.
3047 add r21, r25, r21
3048 sub r21, r19, r21
3049 shard r21, r1, r21
3050 sub r21, r0, r0
3051 blink tr0, r63
3052 ENDFUNC(GLOBAL(sdivsi3))
3053 /* This table has been generated by divtab.c .
3054 Defects for bias -330:
3055 Max defect: 6.081536e-07 at -1.000000e+00
3056 Min defect: 2.849516e-08 at 1.030651e+00
3057 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3058 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3059 Defect at 1: 1.238659e-07
3060 Defect at -2: 1.061708e-07 */
3061 #else /* ! __pic__ || ! __SHMEDIA__ */
3062 .section .rodata
3063 #endif /* __pic__ */
3064 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3065 .balign 2
3066 .type Local_div_table,@object
3067 .size Local_div_table,128
3068 /* negative division constants */
3069 .word -16638
3070 .word -17135
3071 .word -17737
3072 .word -18433
3073 .word -19103
3074 .word -19751
3075 .word -20583
3076 .word -21383
3077 .word -22343
3078 .word -23353
3079 .word -24407
3080 .word -25582
3081 .word -26863
3082 .word -28382
3083 .word -29965
3084 .word -31800
3085 /* negative division factors */
3086 .byte 66
3087 .byte 70
3088 .byte 75
3089 .byte 81
3090 .byte 87
3091 .byte 93
3092 .byte 101
3093 .byte 109
3094 .byte 119
3095 .byte 130
3096 .byte 142
3097 .byte 156
3098 .byte 172
3099 .byte 192
3100 .byte 214
3101 .byte 241
3102 .skip 16
3103 Local_div_table:
3104 .skip 16
3105 /* positive division factors */
3106 .byte 241
3107 .byte 214
3108 .byte 192
3109 .byte 172
3110 .byte 156
3111 .byte 142
3112 .byte 130
3113 .byte 119
3114 .byte 109
3115 .byte 101
3116 .byte 93
3117 .byte 87
3118 .byte 81
3119 .byte 75
3120 .byte 70
3121 .byte 66
3122 /* positive division constants */
3123 .word 31801
3124 .word 29966
3125 .word 28383
3126 .word 26864
3127 .word 25583
3128 .word 24408
3129 .word 23354
3130 .word 22344
3131 .word 21384
3132 .word 20584
3133 .word 19752
3134 .word 19104
3135 .word 18434
3136 .word 17738
3137 .word 17136
3138 .word 16639
3139 .section .rodata
3140 #endif /* TEXT_DATA_BUG */
3141 .balign 2
3142 .type GLOBAL(div_table),@object
3143 .size GLOBAL(div_table),128
3144 /* negative division constants */
3145 .word -16638
3146 .word -17135
3147 .word -17737
3148 .word -18433
3149 .word -19103
3150 .word -19751
3151 .word -20583
3152 .word -21383
3153 .word -22343
3154 .word -23353
3155 .word -24407
3156 .word -25582
3157 .word -26863
3158 .word -28382
3159 .word -29965
3160 .word -31800
3161 /* negative division factors */
3162 .byte 66
3163 .byte 70
3164 .byte 75
3165 .byte 81
3166 .byte 87
3167 .byte 93
3168 .byte 101
3169 .byte 109
3170 .byte 119
3171 .byte 130
3172 .byte 142
3173 .byte 156
3174 .byte 172
3175 .byte 192
3176 .byte 214
3177 .byte 241
3178 .skip 16
3179 .global GLOBAL(div_table)
3180 GLOBAL(div_table):
3181 HIDDEN_ALIAS(div_table_internal,div_table)
3182 .skip 16
3183 /* positive division factors */
3184 .byte 241
3185 .byte 214
3186 .byte 192
3187 .byte 172
3188 .byte 156
3189 .byte 142
3190 .byte 130
3191 .byte 119
3192 .byte 109
3193 .byte 101
3194 .byte 93
3195 .byte 87
3196 .byte 81
3197 .byte 75
3198 .byte 70
3199 .byte 66
3200 /* positive division constants */
3201 .word 31801
3202 .word 29966
3203 .word 28383
3204 .word 26864
3205 .word 25583
3206 .word 24408
3207 .word 23354
3208 .word 22344
3209 .word 21384
3210 .word 20584
3211 .word 19752
3212 .word 19104
3213 .word 18434
3214 .word 17738
3215 .word 17136
3216 .word 16639
3218 #elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3219 /* This code used shld, thus is not suitable for SH1 / SH2. */
3221 /* Signed / unsigned division without use of FPU, optimized for SH4.
3222 Uses a lookup table for divisors in the range -128 .. +128, and
3223 div1 with case distinction for larger divisors in three more ranges.
3224 The code is lumped together with the table to allow the use of mova. */
3225 #ifdef __LITTLE_ENDIAN__
3226 #define L_LSB 0
3227 #define L_LSWMSB 1
3228 #define L_MSWLSB 2
3229 #else
3230 #define L_LSB 3
3231 #define L_LSWMSB 2
3232 #define L_MSWLSB 1
3233 #endif
3235 .balign 4
3236 .global GLOBAL(udivsi3_i4i)
3237 FUNC(GLOBAL(udivsi3_i4i))
3238 GLOBAL(udivsi3_i4i):
3239 mov.w LOCAL(c128_w), r1
3240 div0u
3241 mov r4,r0
3242 shlr8 r0
3243 cmp/hi r1,r5
3244 extu.w r5,r1
3245 bf LOCAL(udiv_le128)
3246 cmp/eq r5,r1
3247 bf LOCAL(udiv_ge64k)
3248 shlr r0
3249 mov r5,r1
3250 shll16 r5
3251 mov.l r4,@-r15
3252 div1 r5,r0
3253 mov.l r1,@-r15
3254 div1 r5,r0
3255 div1 r5,r0
3256 bra LOCAL(udiv_25)
3257 div1 r5,r0
3259 LOCAL(div_le128):
3260 mova LOCAL(div_table_ix),r0
3261 bra LOCAL(div_le128_2)
3262 mov.b @(r0,r5),r1
3263 LOCAL(udiv_le128):
3264 mov.l r4,@-r15
3265 mova LOCAL(div_table_ix),r0
3266 mov.b @(r0,r5),r1
3267 mov.l r5,@-r15
3268 LOCAL(div_le128_2):
3269 mova LOCAL(div_table_inv),r0
3270 mov.l @(r0,r1),r1
3271 mov r5,r0
3272 tst #0xfe,r0
3273 mova LOCAL(div_table_clz),r0
3274 dmulu.l r1,r4
3275 mov.b @(r0,r5),r1
3276 bt/s LOCAL(div_by_1)
3277 mov r4,r0
3278 mov.l @r15+,r5
3279 sts mach,r0
3280 /* clrt */
3281 addc r4,r0
3282 mov.l @r15+,r4
3283 rotcr r0
3285 shld r1,r0
3287 LOCAL(div_by_1_neg):
3288 neg r4,r0
3289 LOCAL(div_by_1):
3290 mov.l @r15+,r5
3292 mov.l @r15+,r4
3294 LOCAL(div_ge64k):
3295 bt/s LOCAL(div_r8)
3296 div0u
3297 shll8 r5
3298 bra LOCAL(div_ge64k_2)
3299 div1 r5,r0
3300 LOCAL(udiv_ge64k):
3301 cmp/hi r0,r5
3302 mov r5,r1
3303 bt LOCAL(udiv_r8)
3304 shll8 r5
3305 mov.l r4,@-r15
3306 div1 r5,r0
3307 mov.l r1,@-r15
3308 LOCAL(div_ge64k_2):
3309 div1 r5,r0
3310 mov.l LOCAL(zero_l),r1
3311 .rept 4
3312 div1 r5,r0
3313 .endr
3314 mov.l r1,@-r15
3315 div1 r5,r0
3316 mov.w LOCAL(m256_w),r1
3317 div1 r5,r0
3318 mov.b r0,@(L_LSWMSB,r15)
3319 xor r4,r0
3320 and r1,r0
3321 bra LOCAL(div_ge64k_end)
3322 xor r4,r0
3324 LOCAL(div_r8):
3325 shll16 r4
3326 bra LOCAL(div_r8_2)
3327 shll8 r4
3328 LOCAL(udiv_r8):
3329 mov.l r4,@-r15
3330 shll16 r4
3331 clrt
3332 shll8 r4
3333 mov.l r5,@-r15
3334 LOCAL(div_r8_2):
3335 rotcl r4
3336 mov r0,r1
3337 div1 r5,r1
3338 mov r4,r0
3339 rotcl r0
3340 mov r5,r4
3341 div1 r5,r1
3342 .rept 5
3343 rotcl r0; div1 r5,r1
3344 .endr
3345 rotcl r0
3346 mov.l @r15+,r5
3347 div1 r4,r1
3348 mov.l @r15+,r4
3350 rotcl r0
3352 ENDFUNC(GLOBAL(udivsi3_i4i))
3354 .global GLOBAL(sdivsi3_i4i)
3355 FUNC(GLOBAL(sdivsi3_i4i))
3356 /* This is link-compatible with a GLOBAL(sdivsi3) call,
3357 but we effectively clobber only r1. */
3358 GLOBAL(sdivsi3_i4i):
3359 mov.l r4,@-r15
3360 cmp/pz r5
3361 mov.w LOCAL(c128_w), r1
3362 bt/s LOCAL(pos_divisor)
3363 cmp/pz r4
3364 mov.l r5,@-r15
3365 neg r5,r5
3366 bt/s LOCAL(neg_result)
3367 cmp/hi r1,r5
3368 neg r4,r4
3369 LOCAL(pos_result):
3370 extu.w r5,r0
3371 bf LOCAL(div_le128)
3372 cmp/eq r5,r0
3373 mov r4,r0
3374 shlr8 r0
3375 bf/s LOCAL(div_ge64k)
3376 cmp/hi r0,r5
3377 div0u
3378 shll16 r5
3379 div1 r5,r0
3380 div1 r5,r0
3381 div1 r5,r0
3382 LOCAL(udiv_25):
3383 mov.l LOCAL(zero_l),r1
3384 div1 r5,r0
3385 div1 r5,r0
3386 mov.l r1,@-r15
3387 .rept 3
3388 div1 r5,r0
3389 .endr
3390 mov.b r0,@(L_MSWLSB,r15)
3391 xtrct r4,r0
3392 swap.w r0,r0
3393 .rept 8
3394 div1 r5,r0
3395 .endr
3396 mov.b r0,@(L_LSWMSB,r15)
3397 LOCAL(div_ge64k_end):
3398 .rept 8
3399 div1 r5,r0
3400 .endr
3401 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3402 extu.b r0,r0
3403 mov.l @r15+,r5
3404 or r4,r0
3405 mov.l @r15+,r4
3407 rotcl r0
3409 LOCAL(div_le128_neg):
3410 tst #0xfe,r0
3411 mova LOCAL(div_table_ix),r0
3412 mov.b @(r0,r5),r1
3413 mova LOCAL(div_table_inv),r0
3414 bt/s LOCAL(div_by_1_neg)
3415 mov.l @(r0,r1),r1
3416 mova LOCAL(div_table_clz),r0
3417 dmulu.l r1,r4
3418 mov.b @(r0,r5),r1
3419 mov.l @r15+,r5
3420 sts mach,r0
3421 /* clrt */
3422 addc r4,r0
3423 mov.l @r15+,r4
3424 rotcr r0
3425 shld r1,r0
3427 neg r0,r0
3429 LOCAL(pos_divisor):
3430 mov.l r5,@-r15
3431 bt/s LOCAL(pos_result)
3432 cmp/hi r1,r5
3433 neg r4,r4
3434 LOCAL(neg_result):
3435 extu.w r5,r0
3436 bf LOCAL(div_le128_neg)
3437 cmp/eq r5,r0
3438 mov r4,r0
3439 shlr8 r0
3440 bf/s LOCAL(div_ge64k_neg)
3441 cmp/hi r0,r5
3442 div0u
3443 mov.l LOCAL(zero_l),r1
3444 shll16 r5
3445 div1 r5,r0
3446 mov.l r1,@-r15
3447 .rept 7
3448 div1 r5,r0
3449 .endr
3450 mov.b r0,@(L_MSWLSB,r15)
3451 xtrct r4,r0
3452 swap.w r0,r0
3453 .rept 8
3454 div1 r5,r0
3455 .endr
3456 mov.b r0,@(L_LSWMSB,r15)
3457 LOCAL(div_ge64k_neg_end):
3458 .rept 8
3459 div1 r5,r0
3460 .endr
3461 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3462 extu.b r0,r1
3463 mov.l @r15+,r5
3464 or r4,r1
3465 LOCAL(div_r8_neg_end):
3466 mov.l @r15+,r4
3467 rotcl r1
3469 neg r1,r0
3471 LOCAL(div_ge64k_neg):
3472 bt/s LOCAL(div_r8_neg)
3473 div0u
3474 shll8 r5
3475 mov.l LOCAL(zero_l),r1
3476 .rept 6
3477 div1 r5,r0
3478 .endr
3479 mov.l r1,@-r15
3480 div1 r5,r0
3481 mov.w LOCAL(m256_w),r1
3482 div1 r5,r0
3483 mov.b r0,@(L_LSWMSB,r15)
3484 xor r4,r0
3485 and r1,r0
3486 bra LOCAL(div_ge64k_neg_end)
3487 xor r4,r0
3489 LOCAL(c128_w):
3490 .word 128
3492 LOCAL(div_r8_neg):
3493 clrt
3494 shll16 r4
3495 mov r4,r1
3496 shll8 r1
3497 mov r5,r4
3498 .rept 7
3499 rotcl r1; div1 r5,r0
3500 .endr
3501 mov.l @r15+,r5
3502 rotcl r1
3503 bra LOCAL(div_r8_neg_end)
3504 div1 r4,r0
3506 LOCAL(m256_w):
3507 .word 0xff00
3508 /* This table has been generated by divtab-sh4.c. */
3509 .balign 4
3510 LOCAL(div_table_clz):
3511 .byte 0
3512 .byte 1
3513 .byte 0
3514 .byte -1
3515 .byte -1
3516 .byte -2
3517 .byte -2
3518 .byte -2
3519 .byte -2
3520 .byte -3
3521 .byte -3
3522 .byte -3
3523 .byte -3
3524 .byte -3
3525 .byte -3
3526 .byte -3
3527 .byte -3
3528 .byte -4
3529 .byte -4
3530 .byte -4
3531 .byte -4
3532 .byte -4
3533 .byte -4
3534 .byte -4
3535 .byte -4
3536 .byte -4
3537 .byte -4
3538 .byte -4
3539 .byte -4
3540 .byte -4
3541 .byte -4
3542 .byte -4
3543 .byte -4
3544 .byte -5
3545 .byte -5
3546 .byte -5
3547 .byte -5
3548 .byte -5
3549 .byte -5
3550 .byte -5
3551 .byte -5
3552 .byte -5
3553 .byte -5
3554 .byte -5
3555 .byte -5
3556 .byte -5
3557 .byte -5
3558 .byte -5
3559 .byte -5
3560 .byte -5
3561 .byte -5
3562 .byte -5
3563 .byte -5
3564 .byte -5
3565 .byte -5
3566 .byte -5
3567 .byte -5
3568 .byte -5
3569 .byte -5
3570 .byte -5
3571 .byte -5
3572 .byte -5
3573 .byte -5
3574 .byte -5
3575 .byte -5
3576 .byte -6
3577 .byte -6
3578 .byte -6
3579 .byte -6
3580 .byte -6
3581 .byte -6
3582 .byte -6
3583 .byte -6
3584 .byte -6
3585 .byte -6
3586 .byte -6
3587 .byte -6
3588 .byte -6
3589 .byte -6
3590 .byte -6
3591 .byte -6
3592 .byte -6
3593 .byte -6
3594 .byte -6
3595 .byte -6
3596 .byte -6
3597 .byte -6
3598 .byte -6
3599 .byte -6
3600 .byte -6
3601 .byte -6
3602 .byte -6
3603 .byte -6
3604 .byte -6
3605 .byte -6
3606 .byte -6
3607 .byte -6
3608 .byte -6
3609 .byte -6
3610 .byte -6
3611 .byte -6
3612 .byte -6
3613 .byte -6
3614 .byte -6
3615 .byte -6
3616 .byte -6
3617 .byte -6
3618 .byte -6
3619 .byte -6
3620 .byte -6
3621 .byte -6
3622 .byte -6
3623 .byte -6
3624 .byte -6
3625 .byte -6
3626 .byte -6
3627 .byte -6
3628 .byte -6
3629 .byte -6
3630 .byte -6
3631 .byte -6
3632 .byte -6
3633 .byte -6
3634 .byte -6
3635 .byte -6
3636 .byte -6
3637 .byte -6
3638 .byte -6
3639 /* Lookup table translating positive divisor to index into table of
3640 normalized inverse. N.B. the '0' entry is also the last entry of the
3641 previous table, and causes an unaligned access for division by zero. */
3642 LOCAL(div_table_ix):
3643 .byte -6
3644 .byte -128
3645 .byte -128
3646 .byte 0
3647 .byte -128
3648 .byte -64
3649 .byte 0
3650 .byte 64
3651 .byte -128
3652 .byte -96
3653 .byte -64
3654 .byte -32
3655 .byte 0
3656 .byte 32
3657 .byte 64
3658 .byte 96
3659 .byte -128
3660 .byte -112
3661 .byte -96
3662 .byte -80
3663 .byte -64
3664 .byte -48
3665 .byte -32
3666 .byte -16
3667 .byte 0
3668 .byte 16
3669 .byte 32
3670 .byte 48
3671 .byte 64
3672 .byte 80
3673 .byte 96
3674 .byte 112
3675 .byte -128
3676 .byte -120
3677 .byte -112
3678 .byte -104
3679 .byte -96
3680 .byte -88
3681 .byte -80
3682 .byte -72
3683 .byte -64
3684 .byte -56
3685 .byte -48
3686 .byte -40
3687 .byte -32
3688 .byte -24
3689 .byte -16
3690 .byte -8
3691 .byte 0
3692 .byte 8
3693 .byte 16
3694 .byte 24
3695 .byte 32
3696 .byte 40
3697 .byte 48
3698 .byte 56
3699 .byte 64
3700 .byte 72
3701 .byte 80
3702 .byte 88
3703 .byte 96
3704 .byte 104
3705 .byte 112
3706 .byte 120
3707 .byte -128
3708 .byte -124
3709 .byte -120
3710 .byte -116
3711 .byte -112
3712 .byte -108
3713 .byte -104
3714 .byte -100
3715 .byte -96
3716 .byte -92
3717 .byte -88
3718 .byte -84
3719 .byte -80
3720 .byte -76
3721 .byte -72
3722 .byte -68
3723 .byte -64
3724 .byte -60
3725 .byte -56
3726 .byte -52
3727 .byte -48
3728 .byte -44
3729 .byte -40
3730 .byte -36
3731 .byte -32
3732 .byte -28
3733 .byte -24
3734 .byte -20
3735 .byte -16
3736 .byte -12
3737 .byte -8
3738 .byte -4
3739 .byte 0
3740 .byte 4
3741 .byte 8
3742 .byte 12
3743 .byte 16
3744 .byte 20
3745 .byte 24
3746 .byte 28
3747 .byte 32
3748 .byte 36
3749 .byte 40
3750 .byte 44
3751 .byte 48
3752 .byte 52
3753 .byte 56
3754 .byte 60
3755 .byte 64
3756 .byte 68
3757 .byte 72
3758 .byte 76
3759 .byte 80
3760 .byte 84
3761 .byte 88
3762 .byte 92
3763 .byte 96
3764 .byte 100
3765 .byte 104
3766 .byte 108
3767 .byte 112
3768 .byte 116
3769 .byte 120
3770 .byte 124
3771 .byte -128
3772 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3773 .balign 4
3774 LOCAL(zero_l):
3775 .long 0x0
3776 .long 0xF81F81F9
3777 .long 0xF07C1F08
3778 .long 0xE9131AC0
3779 .long 0xE1E1E1E2
3780 .long 0xDAE6076C
3781 .long 0xD41D41D5
3782 .long 0xCD856891
3783 .long 0xC71C71C8
3784 .long 0xC0E07039
3785 .long 0xBACF914D
3786 .long 0xB4E81B4F
3787 .long 0xAF286BCB
3788 .long 0xA98EF607
3789 .long 0xA41A41A5
3790 .long 0x9EC8E952
3791 .long 0x9999999A
3792 .long 0x948B0FCE
3793 .long 0x8F9C18FA
3794 .long 0x8ACB90F7
3795 .long 0x86186187
3796 .long 0x81818182
3797 .long 0x7D05F418
3798 .long 0x78A4C818
3799 .long 0x745D1746
3800 .long 0x702E05C1
3801 .long 0x6C16C16D
3802 .long 0x68168169
3803 .long 0x642C8591
3804 .long 0x60581606
3805 .long 0x5C9882BA
3806 .long 0x58ED2309
3807 LOCAL(div_table_inv):
3808 .long 0x55555556
3809 .long 0x51D07EAF
3810 .long 0x4E5E0A73
3811 .long 0x4AFD6A06
3812 .long 0x47AE147B
3813 .long 0x446F8657
3814 .long 0x41414142
3815 .long 0x3E22CBCF
3816 .long 0x3B13B13C
3817 .long 0x38138139
3818 .long 0x3521CFB3
3819 .long 0x323E34A3
3820 .long 0x2F684BDB
3821 .long 0x2C9FB4D9
3822 .long 0x29E4129F
3823 .long 0x27350B89
3824 .long 0x24924925
3825 .long 0x21FB7813
3826 .long 0x1F7047DD
3827 .long 0x1CF06ADB
3828 .long 0x1A7B9612
3829 .long 0x18118119
3830 .long 0x15B1E5F8
3831 .long 0x135C8114
3832 .long 0x11111112
3833 .long 0xECF56BF
3834 .long 0xC9714FC
3835 .long 0xA6810A7
3836 .long 0x8421085
3837 .long 0x624DD30
3838 .long 0x4104105
3839 .long 0x2040811
3840 /* maximum error: 0.987342 scaled: 0.921875*/
3842 ENDFUNC(GLOBAL(sdivsi3_i4i))
3843 #endif /* SH3 / SH4 */
3845 #endif /* L_div_table */
3847 #ifdef L_udiv_qrnnd_16
3848 #if !__SHMEDIA__
3849 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3850 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3851 /* n1 < d, but n1 might be larger than d1. */
3852 .global GLOBAL(udiv_qrnnd_16)
3853 .balign 8
3854 GLOBAL(udiv_qrnnd_16):
3855 div0u
3856 cmp/hi r6,r0
3857 bt .Lots
3858 .rept 16
3859 div1 r6,r0
3860 .endr
3861 extu.w r0,r1
3862 bt 0f
3863 add r6,r0
3864 0: rotcl r1
3865 mulu.w r1,r5
3866 xtrct r4,r0
3867 swap.w r0,r0
3868 sts macl,r2
3869 cmp/hs r2,r0
3870 sub r2,r0
3871 bt 0f
3872 addc r5,r0
3873 add #-1,r1
3874 bt 0f
3875 1: add #-1,r1
3877 add r5,r0
3878 .balign 8
3879 .Lots:
3880 sub r5,r0
3881 swap.w r4,r1
3882 xtrct r0,r1
3883 clrt
3884 mov r1,r0
3885 addc r5,r0
3886 mov #-1,r1
3887 SL1(bf, 1b,
3888 shlr16 r1)
3889 0: rts
3891 ENDFUNC(GLOBAL(udiv_qrnnd_16))
3892 #endif /* !__SHMEDIA__ */
3893 #endif /* L_udiv_qrnnd_16 */