gcc/config/sh/lib1funcs.asm

   1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002
   2    Free Software Foundation, Inc.
   3
   4 This file is free software; you can redistribute it and/or modify it
   5 under the terms of the GNU General Public License as published by the
   6 Free Software Foundation; either version 2, or (at your option) any
   7 later version.
   8
   9 In addition to the permissions in the GNU General Public License, the
  10 Free Software Foundation gives you unlimited permission to link the
  11 compiled version of this file into combinations with other programs,
  12 and to distribute those combinations without any restriction coming
  13 from the use of this file.  (The General Public License restrictions
  14 do apply in other respects; for example, they cover modification of
  15 the file, and distribution when not linked into a combine
  16 executable.)
  17
  18 This file is distributed in the hope that it will be useful, but
  19 WITHOUT ANY WARRANTY; without even the implied warranty of
  20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 General Public License for more details.
  22
  23 You should have received a copy of the GNU General Public License
  24 along with this program; see the file COPYING.  If not, write to
  25 the Free Software Foundation, 59 Temple Place - Suite 330,
  26 Boston, MA 02111-1307, USA.  */
  27
  28 !! libgcc routines for the Hitachi / SuperH SH CPUs.
  29 !! Contributed by Steve Chamberlain.
  30 !! sac@cygnus.com
  31
  32 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
  33 !! recoded in assembly by Toshiyasu Morita
  34 !! tm@netcom.com
  35
  36 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
  37    ELF local label prefixes by J"orn Rennecke
  38    amylaar@cygnus.com  */
  39
  40 #ifdef __ELF__
  41 #define LOCAL(X) .L_##X
  42 #else
  43 #define LOCAL(X) L_##X
  44 #endif
  45
  46 #define CONCAT(A,B)     A##B
  47 #define GLOBAL0(U,X)    CONCAT(U,__##X)
  48 #define GLOBAL(X)       GLOBAL0(__USER_LABEL_PREFIX__,X)
  49
  50 #if defined __SH5__ && ! defined __SH4_NOFPU__ && ! defined (__LITTLE_ENDIAN__)
  51 #define FMOVD_WORKS
  52 #endif
  53
  54 #if ! __SH5__
  55 #ifdef L_ashiftrt
  56         .global GLOBAL(ashiftrt_r4_0)
  57         .global GLOBAL(ashiftrt_r4_1)
  58         .global GLOBAL(ashiftrt_r4_2)
  59         .global GLOBAL(ashiftrt_r4_3)
  60         .global GLOBAL(ashiftrt_r4_4)
  61         .global GLOBAL(ashiftrt_r4_5)
  62         .global GLOBAL(ashiftrt_r4_6)
  63         .global GLOBAL(ashiftrt_r4_7)
  64         .global GLOBAL(ashiftrt_r4_8)
  65         .global GLOBAL(ashiftrt_r4_9)
  66         .global GLOBAL(ashiftrt_r4_10)
  67         .global GLOBAL(ashiftrt_r4_11)
  68         .global GLOBAL(ashiftrt_r4_12)
  69         .global GLOBAL(ashiftrt_r4_13)
  70         .global GLOBAL(ashiftrt_r4_14)
  71         .global GLOBAL(ashiftrt_r4_15)
  72         .global GLOBAL(ashiftrt_r4_16)
  73         .global GLOBAL(ashiftrt_r4_17)
  74         .global GLOBAL(ashiftrt_r4_18)
  75         .global GLOBAL(ashiftrt_r4_19)
  76         .global GLOBAL(ashiftrt_r4_20)
  77         .global GLOBAL(ashiftrt_r4_21)
  78         .global GLOBAL(ashiftrt_r4_22)
  79         .global GLOBAL(ashiftrt_r4_23)
  80         .global GLOBAL(ashiftrt_r4_24)
  81         .global GLOBAL(ashiftrt_r4_25)
  82         .global GLOBAL(ashiftrt_r4_26)
  83         .global GLOBAL(ashiftrt_r4_27)
  84         .global GLOBAL(ashiftrt_r4_28)
  85         .global GLOBAL(ashiftrt_r4_29)
  86         .global GLOBAL(ashiftrt_r4_30)
  87         .global GLOBAL(ashiftrt_r4_31)
  88         .global GLOBAL(ashiftrt_r4_32)
  89
  90         .align  1
  91 GLOBAL(ashiftrt_r4_32):
  92 GLOBAL(ashiftrt_r4_31):
  93         rotcl   r4
  94         rts
  95         subc    r4,r4
  96
  97 GLOBAL(ashiftrt_r4_30):
  98         shar    r4
  99 GLOBAL(ashiftrt_r4_29):
 100         shar    r4
 101 GLOBAL(ashiftrt_r4_28):
 102         shar    r4
 103 GLOBAL(ashiftrt_r4_27):
 104         shar    r4
 105 GLOBAL(ashiftrt_r4_26):
 106         shar    r4
 107 GLOBAL(ashiftrt_r4_25):
 108         shar    r4
 109 GLOBAL(ashiftrt_r4_24):
 110         shlr16  r4
 111         shlr8   r4
 112         rts
 113         exts.b  r4,r4
 114
 115 GLOBAL(ashiftrt_r4_23):
 116         shar    r4
 117 GLOBAL(ashiftrt_r4_22):
 118         shar    r4
 119 GLOBAL(ashiftrt_r4_21):
 120         shar    r4
 121 GLOBAL(ashiftrt_r4_20):
 122         shar    r4
 123 GLOBAL(ashiftrt_r4_19):
 124         shar    r4
 125 GLOBAL(ashiftrt_r4_18):
 126         shar    r4
 127 GLOBAL(ashiftrt_r4_17):
 128         shar    r4
 129 GLOBAL(ashiftrt_r4_16):
 130         shlr16  r4
 131         rts
 132         exts.w  r4,r4
 133
 134 GLOBAL(ashiftrt_r4_15):
 135         shar    r4
 136 GLOBAL(ashiftrt_r4_14):
 137         shar    r4
 138 GLOBAL(ashiftrt_r4_13):
 139         shar    r4
 140 GLOBAL(ashiftrt_r4_12):
 141         shar    r4
 142 GLOBAL(ashiftrt_r4_11):
 143         shar    r4
 144 GLOBAL(ashiftrt_r4_10):
 145         shar    r4
 146 GLOBAL(ashiftrt_r4_9):
 147         shar    r4
 148 GLOBAL(ashiftrt_r4_8):
 149         shar    r4
 150 GLOBAL(ashiftrt_r4_7):
 151         shar    r4
 152 GLOBAL(ashiftrt_r4_6):
 153         shar    r4
 154 GLOBAL(ashiftrt_r4_5):
 155         shar    r4
 156 GLOBAL(ashiftrt_r4_4):
 157         shar    r4
 158 GLOBAL(ashiftrt_r4_3):
 159         shar    r4
 160 GLOBAL(ashiftrt_r4_2):
 161         shar    r4
 162 GLOBAL(ashiftrt_r4_1):
 163         rts
 164         shar    r4
 165
 166 GLOBAL(ashiftrt_r4_0):
 167         rts
 168         nop
 169 #endif
 170
 171 #ifdef L_ashiftrt_n
 172
 173 !
 174 ! GLOBAL(ashrsi3)
 175 !
 176 ! Entry:
 177 !
 178 ! r4: Value to shift
 179 ! r5: Shifts
 180 !
 181 ! Exit:
 182 !
 183 ! r0: Result
 184 !
 185 ! Destroys:
 186 !
 187 ! (none)
 188 !
 189
 190         .global GLOBAL(ashrsi3)
 191         .align  2
 192 GLOBAL(ashrsi3):
 193         mov     #31,r0
 194         and     r0,r5
 195         mova    LOCAL(ashrsi3_table),r0
 196         mov.b   @(r0,r5),r5
 197 #ifdef __sh1__
 198         add     r5,r0
 199         jmp     @r0
 200 #else
 201         braf    r5
 202 #endif
 203         mov     r4,r0
 204
 205         .align  2
 206 LOCAL(ashrsi3_table):
 207         .byte           LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
 208         .byte           LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
 209         .byte           LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
 210         .byte           LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
 211         .byte           LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
 212         .byte           LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
 213         .byte           LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
 214         .byte           LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
 215         .byte           LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
 216         .byte           LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
 217         .byte           LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
 218         .byte           LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
 219         .byte           LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
 220         .byte           LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
 221         .byte           LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
 222         .byte           LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
 223         .byte           LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
 224         .byte           LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
 225         .byte           LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
 226         .byte           LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
 227         .byte           LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
 228         .byte           LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
 229         .byte           LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
 230         .byte           LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
 231         .byte           LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
 232         .byte           LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
 233         .byte           LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
 234         .byte           LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
 235         .byte           LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
 236         .byte           LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
 237         .byte           LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
 238         .byte           LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
 239
 240 LOCAL(ashrsi3_31):
 241         rotcl   r0
 242         rts
 243         subc    r0,r0
 244
 245 LOCAL(ashrsi3_30):
 246         shar    r0
 247 LOCAL(ashrsi3_29):
 248         shar    r0
 249 LOCAL(ashrsi3_28):
 250         shar    r0
 251 LOCAL(ashrsi3_27):
 252         shar    r0
 253 LOCAL(ashrsi3_26):
 254         shar    r0
 255 LOCAL(ashrsi3_25):
 256         shar    r0
 257 LOCAL(ashrsi3_24):
 258         shlr16  r0
 259         shlr8   r0
 260         rts
 261         exts.b  r0,r0
 262
 263 LOCAL(ashrsi3_23):
 264         shar    r0
 265 LOCAL(ashrsi3_22):
 266         shar    r0
 267 LOCAL(ashrsi3_21):
 268         shar    r0
 269 LOCAL(ashrsi3_20):
 270         shar    r0
 271 LOCAL(ashrsi3_19):
 272         shar    r0
 273 LOCAL(ashrsi3_18):
 274         shar    r0
 275 LOCAL(ashrsi3_17):
 276         shar    r0
 277 LOCAL(ashrsi3_16):
 278         shlr16  r0
 279         rts
 280         exts.w  r0,r0
 281
 282 LOCAL(ashrsi3_15):
 283         shar    r0
 284 LOCAL(ashrsi3_14):
 285         shar    r0
 286 LOCAL(ashrsi3_13):
 287         shar    r0
 288 LOCAL(ashrsi3_12):
 289         shar    r0
 290 LOCAL(ashrsi3_11):
 291         shar    r0
 292 LOCAL(ashrsi3_10):
 293         shar    r0
 294 LOCAL(ashrsi3_9):
 295         shar    r0
 296 LOCAL(ashrsi3_8):
 297         shar    r0
 298 LOCAL(ashrsi3_7):
 299         shar    r0
 300 LOCAL(ashrsi3_6):
 301         shar    r0
 302 LOCAL(ashrsi3_5):
 303         shar    r0
 304 LOCAL(ashrsi3_4):
 305         shar    r0
 306 LOCAL(ashrsi3_3):
 307         shar    r0
 308 LOCAL(ashrsi3_2):
 309         shar    r0
 310 LOCAL(ashrsi3_1):
 311         rts
 312         shar    r0
 313
 314 LOCAL(ashrsi3_0):
 315         rts
 316         nop
 317
 318 #endif
 319
 320 #ifdef L_ashiftlt
 321
 322 !
 323 ! GLOBAL(ashlsi3)
 324 !
 325 ! Entry:
 326 !
 327 ! r4: Value to shift
 328 ! r5: Shifts
 329 !
 330 ! Exit:
 331 !
 332 ! r0: Result
 333 !
 334 ! Destroys:
 335 !
 336 ! (none)
 337 !
 338         .global GLOBAL(ashlsi3)
 339         .align  2
 340 GLOBAL(ashlsi3):
 341         mov     #31,r0
 342         and     r0,r5
 343         mova    LOCAL(ashlsi3_table),r0
 344         mov.b   @(r0,r5),r5
 345 #ifdef __sh1__
 346         add     r5,r0
 347         jmp     @r0
 348 #else
 349         braf    r5
 350 #endif
 351         mov     r4,r0
 352
 353         .align  2
 354 LOCAL(ashlsi3_table):
 355         .byte           LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
 356         .byte           LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
 357         .byte           LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
 358         .byte           LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
 359         .byte           LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
 360         .byte           LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
 361         .byte           LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
 362         .byte           LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
 363         .byte           LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
 364         .byte           LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
 365         .byte           LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
 366         .byte           LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
 367         .byte           LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
 368         .byte           LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
 369         .byte           LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
 370         .byte           LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
 371         .byte           LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
 372         .byte           LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
 373         .byte           LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
 374         .byte           LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
 375         .byte           LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
 376         .byte           LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
 377         .byte           LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
 378         .byte           LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
 379         .byte           LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
 380         .byte           LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
 381         .byte           LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
 382         .byte           LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
 383         .byte           LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
 384         .byte           LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
 385         .byte           LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
 386         .byte           LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
 387
 388 LOCAL(ashlsi3_6):
 389         shll2   r0
 390 LOCAL(ashlsi3_4):
 391         shll2   r0
 392 LOCAL(ashlsi3_2):
 393         rts
 394         shll2   r0
 395
 396 LOCAL(ashlsi3_7):
 397         shll2   r0
 398 LOCAL(ashlsi3_5):
 399         shll2   r0
 400 LOCAL(ashlsi3_3):
 401         shll2   r0
 402 LOCAL(ashlsi3_1):
 403         rts
 404         shll    r0
 405
 406 LOCAL(ashlsi3_14):
 407         shll2   r0
 408 LOCAL(ashlsi3_12):
 409         shll2   r0
 410 LOCAL(ashlsi3_10):
 411         shll2   r0
 412 LOCAL(ashlsi3_8):
 413         rts
 414         shll8   r0
 415
 416 LOCAL(ashlsi3_15):
 417         shll2   r0
 418 LOCAL(ashlsi3_13):
 419         shll2   r0
 420 LOCAL(ashlsi3_11):
 421         shll2   r0
 422 LOCAL(ashlsi3_9):
 423         shll8   r0
 424         rts
 425         shll    r0
 426
 427 LOCAL(ashlsi3_22):
 428         shll2   r0
 429 LOCAL(ashlsi3_20):
 430         shll2   r0
 431 LOCAL(ashlsi3_18):
 432         shll2   r0
 433 LOCAL(ashlsi3_16):
 434         rts
 435         shll16  r0
 436
 437 LOCAL(ashlsi3_23):
 438         shll2   r0
 439 LOCAL(ashlsi3_21):
 440         shll2   r0
 441 LOCAL(ashlsi3_19):
 442         shll2   r0
 443 LOCAL(ashlsi3_17):
 444         shll16  r0
 445         rts
 446         shll    r0
 447
 448 LOCAL(ashlsi3_30):
 449         shll2   r0
 450 LOCAL(ashlsi3_28):
 451         shll2   r0
 452 LOCAL(ashlsi3_26):
 453         shll2   r0
 454 LOCAL(ashlsi3_24):
 455         shll16  r0
 456         rts
 457         shll8   r0
 458
 459 LOCAL(ashlsi3_31):
 460         shll2   r0
 461 LOCAL(ashlsi3_29):
 462         shll2   r0
 463 LOCAL(ashlsi3_27):
 464         shll2   r0
 465 LOCAL(ashlsi3_25):
 466         shll16  r0
 467         shll8   r0
 468         rts
 469         shll    r0
 470
 471 LOCAL(ashlsi3_0):
 472         rts
 473         nop
 474
 475 #endif
 476
 477 #ifdef L_lshiftrt
 478
 479 !
 480 ! GLOBAL(lshrsi3)
 481 !
 482 ! Entry:
 483 !
 484 ! r4: Value to shift
 485 ! r5: Shifts
 486 !
 487 ! Exit:
 488 !
 489 ! r0: Result
 490 !
 491 ! Destroys:
 492 !
 493 ! (none)
 494 !
 495         .global GLOBAL(lshrsi3)
 496         .align  2
 497 GLOBAL(lshrsi3):
 498         mov     #31,r0
 499         and     r0,r5
 500         mova    LOCAL(lshrsi3_table),r0
 501         mov.b   @(r0,r5),r5
 502 #ifdef __sh1__
 503         add     r5,r0
 504         jmp     @r0
 505 #else
 506         braf    r5
 507 #endif
 508         mov     r4,r0
 509
 510         .align  2
 511 LOCAL(lshrsi3_table):
 512         .byte           LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
 513         .byte           LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
 514         .byte           LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
 515         .byte           LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
 516         .byte           LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
 517         .byte           LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
 518         .byte           LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
 519         .byte           LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
 520         .byte           LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
 521         .byte           LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
 522         .byte           LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
 523         .byte           LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
 524         .byte           LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
 525         .byte           LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
 526         .byte           LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
 527         .byte           LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
 528         .byte           LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
 529         .byte           LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
 530         .byte           LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
 531         .byte           LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
 532         .byte           LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
 533         .byte           LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
 534         .byte           LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
 535         .byte           LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
 536         .byte           LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
 537         .byte           LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
 538         .byte           LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
 539         .byte           LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
 540         .byte           LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
 541         .byte           LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
 542         .byte           LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
 543         .byte           LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
 544
 545 LOCAL(lshrsi3_6):
 546         shlr2   r0
 547 LOCAL(lshrsi3_4):
 548         shlr2   r0
 549 LOCAL(lshrsi3_2):
 550         rts
 551         shlr2   r0
 552
 553 LOCAL(lshrsi3_7):
 554         shlr2   r0
 555 LOCAL(lshrsi3_5):
 556         shlr2   r0
 557 LOCAL(lshrsi3_3):
 558         shlr2   r0
 559 LOCAL(lshrsi3_1):
 560         rts
 561         shlr    r0
 562
 563 LOCAL(lshrsi3_14):
 564         shlr2   r0
 565 LOCAL(lshrsi3_12):
 566         shlr2   r0
 567 LOCAL(lshrsi3_10):
 568         shlr2   r0
 569 LOCAL(lshrsi3_8):
 570         rts
 571         shlr8   r0
 572
 573 LOCAL(lshrsi3_15):
 574         shlr2   r0
 575 LOCAL(lshrsi3_13):
 576         shlr2   r0
 577 LOCAL(lshrsi3_11):
 578         shlr2   r0
 579 LOCAL(lshrsi3_9):
 580         shlr8   r0
 581         rts
 582         shlr    r0
 583
 584 LOCAL(lshrsi3_22):
 585         shlr2   r0
 586 LOCAL(lshrsi3_20):
 587         shlr2   r0
 588 LOCAL(lshrsi3_18):
 589         shlr2   r0
 590 LOCAL(lshrsi3_16):
 591         rts
 592         shlr16  r0
 593
 594 LOCAL(lshrsi3_23):
 595         shlr2   r0
 596 LOCAL(lshrsi3_21):
 597         shlr2   r0
 598 LOCAL(lshrsi3_19):
 599         shlr2   r0
 600 LOCAL(lshrsi3_17):
 601         shlr16  r0
 602         rts
 603         shlr    r0
 604
 605 LOCAL(lshrsi3_30):
 606         shlr2   r0
 607 LOCAL(lshrsi3_28):
 608         shlr2   r0
 609 LOCAL(lshrsi3_26):
 610         shlr2   r0
 611 LOCAL(lshrsi3_24):
 612         shlr16  r0
 613         rts
 614         shlr8   r0
 615
 616 LOCAL(lshrsi3_31):
 617         shlr2   r0
 618 LOCAL(lshrsi3_29):
 619         shlr2   r0
 620 LOCAL(lshrsi3_27):
 621         shlr2   r0
 622 LOCAL(lshrsi3_25):
 623         shlr16  r0
 624         shlr8   r0
 625         rts
 626         shlr    r0
 627
 628 LOCAL(lshrsi3_0):
 629         rts
 630         nop
 631
 632 #endif
 633
 634 #ifdef L_movstr
 635         .text
 636 ! done all the large groups, do the remainder
 637
 638 ! jump to movstr+
 639 done:
 640         add     #64,r5
 641         mova    GLOBAL(movstrSI0),r0
 642         shll2   r6
 643         add     r6,r0
 644         jmp     @r0
 645         add     #64,r4
 646         .align  4
 647         .global GLOBAL(movstrSI64)
 648 GLOBAL(movstrSI64):
 649         mov.l   @(60,r5),r0
 650         mov.l   r0,@(60,r4)
 651         .global GLOBAL(movstrSI60)
 652 GLOBAL(movstrSI60):
 653         mov.l   @(56,r5),r0
 654         mov.l   r0,@(56,r4)
 655         .global GLOBAL(movstrSI56)
 656 GLOBAL(movstrSI56):
 657         mov.l   @(52,r5),r0
 658         mov.l   r0,@(52,r4)
 659         .global GLOBAL(movstrSI52)
 660 GLOBAL(movstrSI52):
 661         mov.l   @(48,r5),r0
 662         mov.l   r0,@(48,r4)
 663         .global GLOBAL(movstrSI48)
 664 GLOBAL(movstrSI48):
 665         mov.l   @(44,r5),r0
 666         mov.l   r0,@(44,r4)
 667         .global GLOBAL(movstrSI44)
 668 GLOBAL(movstrSI44):
 669         mov.l   @(40,r5),r0
 670         mov.l   r0,@(40,r4)
 671         .global GLOBAL(movstrSI40)
 672 GLOBAL(movstrSI40):
 673         mov.l   @(36,r5),r0
 674         mov.l   r0,@(36,r4)
 675         .global GLOBAL(movstrSI36)
 676 GLOBAL(movstrSI36):
 677         mov.l   @(32,r5),r0
 678         mov.l   r0,@(32,r4)
 679         .global GLOBAL(movstrSI32)
 680 GLOBAL(movstrSI32):
 681         mov.l   @(28,r5),r0
 682         mov.l   r0,@(28,r4)
 683         .global GLOBAL(movstrSI28)
 684 GLOBAL(movstrSI28):
 685         mov.l   @(24,r5),r0
 686         mov.l   r0,@(24,r4)
 687         .global GLOBAL(movstrSI24)
 688 GLOBAL(movstrSI24):
 689         mov.l   @(20,r5),r0
 690         mov.l   r0,@(20,r4)
 691         .global GLOBAL(movstrSI20)
 692 GLOBAL(movstrSI20):
 693         mov.l   @(16,r5),r0
 694         mov.l   r0,@(16,r4)
 695         .global GLOBAL(movstrSI16)
 696 GLOBAL(movstrSI16):
 697         mov.l   @(12,r5),r0
 698         mov.l   r0,@(12,r4)
 699         .global GLOBAL(movstrSI12)
 700 GLOBAL(movstrSI12):
 701         mov.l   @(8,r5),r0
 702         mov.l   r0,@(8,r4)
 703         .global GLOBAL(movstrSI8)
 704 GLOBAL(movstrSI8):
 705         mov.l   @(4,r5),r0
 706         mov.l   r0,@(4,r4)
 707         .global GLOBAL(movstrSI4)
 708 GLOBAL(movstrSI4):
 709         mov.l   @(0,r5),r0
 710         mov.l   r0,@(0,r4)
 711 GLOBAL(movstrSI0):
 712         rts
 713         nop
 714
 715         .align  4
 716
 717         .global GLOBAL(movstr)
 718 GLOBAL(movstr):
 719         mov.l   @(60,r5),r0
 720         mov.l   r0,@(60,r4)
 721
 722         mov.l   @(56,r5),r0
 723         mov.l   r0,@(56,r4)
 724
 725         mov.l   @(52,r5),r0
 726         mov.l   r0,@(52,r4)
 727
 728         mov.l   @(48,r5),r0
 729         mov.l   r0,@(48,r4)
 730
 731         mov.l   @(44,r5),r0
 732         mov.l   r0,@(44,r4)
 733
 734         mov.l   @(40,r5),r0
 735         mov.l   r0,@(40,r4)
 736
 737         mov.l   @(36,r5),r0
 738         mov.l   r0,@(36,r4)
 739
 740         mov.l   @(32,r5),r0
 741         mov.l   r0,@(32,r4)
 742
 743         mov.l   @(28,r5),r0
 744         mov.l   r0,@(28,r4)
 745
 746         mov.l   @(24,r5),r0
 747         mov.l   r0,@(24,r4)
 748
 749         mov.l   @(20,r5),r0
 750         mov.l   r0,@(20,r4)
 751
 752         mov.l   @(16,r5),r0
 753         mov.l   r0,@(16,r4)
 754
 755         mov.l   @(12,r5),r0
 756         mov.l   r0,@(12,r4)
 757
 758         mov.l   @(8,r5),r0
 759         mov.l   r0,@(8,r4)
 760
 761         mov.l   @(4,r5),r0
 762         mov.l   r0,@(4,r4)
 763
 764         mov.l   @(0,r5),r0
 765         mov.l   r0,@(0,r4)
 766
 767         add     #-16,r6
 768         cmp/pl  r6
 769         bf      done
 770
 771         add     #64,r5
 772         bra     GLOBAL(movstr)
 773         add     #64,r4
 774 #endif
 775
 776 #ifdef L_movstr_i4
 777         .text
 778         .global GLOBAL(movstr_i4_even)
 779         .global GLOBAL(movstr_i4_odd)
 780         .global GLOBAL(movstrSI12_i4)
 781
 782         .p2align        5
 783 L_movstr_2mod4_end:
 784         mov.l   r0,@(16,r4)
 785         rts
 786         mov.l   r1,@(20,r4)
 787
 788         .p2align        2
 789
 790 GLOBAL(movstr_i4_odd):
 791         mov.l   @r5+,r1
 792         add     #-4,r4
 793         mov.l   @r5+,r2
 794         mov.l   @r5+,r3
 795         mov.l   r1,@(4,r4)
 796         mov.l   r2,@(8,r4)
 797
 798 L_movstr_loop:
 799         mov.l   r3,@(12,r4)
 800         dt      r6
 801         mov.l   @r5+,r0
 802         bt/s    L_movstr_2mod4_end
 803         mov.l   @r5+,r1
 804         add     #16,r4
 805 L_movstr_start_even:
 806         mov.l   @r5+,r2
 807         mov.l   @r5+,r3
 808         mov.l   r0,@r4
 809         dt      r6
 810         mov.l   r1,@(4,r4)
 811         bf/s    L_movstr_loop
 812         mov.l   r2,@(8,r4)
 813         rts
 814         mov.l   r3,@(12,r4)
 815
 816 GLOBAL(movstr_i4_even):
 817         mov.l   @r5+,r0
 818         bra     L_movstr_start_even
 819         mov.l   @r5+,r1
 820
 821         .p2align        4
 822 GLOBAL(movstrSI12_i4):
 823         mov.l   @r5,r0
 824         mov.l   @(4,r5),r1
 825         mov.l   @(8,r5),r2
 826         mov.l   r0,@r4
 827         mov.l   r1,@(4,r4)
 828         rts
 829         mov.l   r2,@(8,r4)
 830 #endif
 831
 832 #ifdef L_mulsi3
 833
 834
 835         .global GLOBAL(mulsi3)
 836
 837 ! r4 =       aabb
 838 ! r5 =       ccdd
 839 ! r0 = aabb*ccdd  via partial products
 840 !
 841 ! if aa == 0 and cc = 0
 842 ! r0 = bb*dd
 843 !
 844 ! else
 845 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
 846 !
 847
 848 GLOBAL(mulsi3):
 849         mulu.w  r4,r5           ! multiply the lsws  macl=bb*dd
 850         mov     r5,r3           ! r3 = ccdd
 851         swap.w  r4,r2           ! r2 = bbaa
 852         xtrct   r2,r3           ! r3 = aacc
 853         tst     r3,r3           ! msws zero ?
 854         bf      hiset
 855         rts                     ! yes - then we have the answer
 856         sts     macl,r0
 857
 858 hiset:  sts     macl,r0         ! r0 = bb*dd
 859         mulu.w  r2,r5           ! brewing macl = aa*dd
 860         sts     macl,r1
 861         mulu.w  r3,r4           ! brewing macl = cc*bb
 862         sts     macl,r2
 863         add     r1,r2
 864         shll16  r2
 865         rts
 866         add     r2,r0
 867
 868
 869 #endif
 870 #endif /* ! __SH5__ */
 871 #ifdef L_sdivsi3_i4
 872         .title "SH DIVIDE"
 873 !! 4 byte integer Divide code for the Hitachi SH
 874 #ifdef __SH4__
 875 !! args in r4 and r5, result in fpul, clobber dr0, dr2
 876
 877         .global GLOBAL(sdivsi3_i4)
 878 GLOBAL(sdivsi3_i4):
 879         lds r4,fpul
 880         float fpul,dr0
 881         lds r5,fpul
 882         float fpul,dr2
 883         fdiv dr2,dr0
 884         rts
 885         ftrc dr0,fpul
 886
 887 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
 888 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
 889
 890 #if ! __SH5__ || __SH5__ == 32
 891 #if __SH5__
 892         .mode   SHcompact
 893 #endif
 894         .global GLOBAL(sdivsi3_i4)
 895 GLOBAL(sdivsi3_i4):
 896         sts.l fpscr,@-r15
 897         mov #8,r2
 898         swap.w r2,r2
 899         lds r2,fpscr
 900         lds r4,fpul
 901         float fpul,dr0
 902         lds r5,fpul
 903         float fpul,dr2
 904         fdiv dr2,dr0
 905         ftrc dr0,fpul
 906         rts
 907         lds.l @r15+,fpscr
 908
 909 #endif /* ! __SH5__ || __SH5__ == 32 */
 910 #endif /* ! __SH4__ */
 911 #endif
 912
 913 #ifdef L_sdivsi3
 914 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
 915    sh3e code.  */
 916 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
 917 !!
 918 !! Steve Chamberlain
 919 !! sac@cygnus.com
 920 !!
 921 !!
 922
 923 !! args in r4 and r5, result in r0 clobber r1,r2,r3
 924
 925         .global GLOBAL(sdivsi3)
 926 #if __SHMEDIA__
 927 #if __SH5__ == 32
 928         .section        .text..SHmedia32,"ax"
 929 #else
 930         .text
 931 #endif
 932         .align  2
 933 #if 0
 934 /* The assembly code that follows is a hand-optimized version of the C
 935    code that follows.  Note that the registers that are modified are
 936    exactly those listed as clobbered in the patterns divsi3_i1 and
 937    divsi3_i1_media.
 938
 939 int __sdivsi3 (i, j)
 940      int i, j;
 941 {
 942   register unsigned long long r18 asm ("r18");
 943   register unsigned long long r19 asm ("r19");
 944   register unsigned long long r0 asm ("r0") = 0;
 945   register unsigned long long r1 asm ("r1") = 1;
 946   register int r2 asm ("r2") = i >> 31;
 947   register int r3 asm ("r3") = j >> 31;
 948
 949   r2 = r2 ? r2 : r1;
 950   r3 = r3 ? r3 : r1;
 951   r18 = i * r2;
 952   r19 = j * r3;
 953   r2 *= r3;
 954
 955   r19 <<= 31;
 956   r1 <<= 31;
 957   do
 958     if (r18 >= r19)
 959       r0 |= r1, r18 -= r19;
 960   while (r19 >>= 1, r1 >>= 1);
 961
 962   return r2 * (int)r0;
 963 }
 964 */
 965 GLOBAL(sdivsi3):
 966         pt/l    LOCAL(sdivsi3_dontadd), tr2
 967         pt/l    LOCAL(sdivsi3_loop), tr1
 968         ptabs/l r18, tr0
 969         movi    0, r0
 970         movi    1, r1
 971         shari.l r4, 31, r2
 972         shari.l r5, 31, r3
 973         cmveq   r2, r1, r2
 974         cmveq   r3, r1, r3
 975         muls.l  r4, r2, r18
 976         muls.l  r5, r3, r19
 977         muls.l  r2, r3, r2
 978         shlli   r19, 31, r19
 979         shlli   r1, 31, r1
 980 LOCAL(sdivsi3_loop):
 981         bgtu    r19, r18, tr2
 982         or      r0, r1, r0
 983         sub     r18, r19, r18
 984 LOCAL(sdivsi3_dontadd):
 985         shlri   r1, 1, r1
 986         shlri   r19, 1, r19
 987         bnei    r1, 0, tr1
 988         muls.l  r0, r2, r0
 989         add.l   r0, r63, r0
 990         blink   tr0, r63
 991 #else /* ! 0 */
 992  // inputs: r4,r5
 993  // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
 994  // result in r0
 995 GLOBAL(sdivsi3):
 996  // can create absolute value without extra latency,
 997  // but dependent on proper sign extension of inputs:
 998  // shari.l r5,31,r2
 999  // xor r5,r2,r20
1000  // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1001  shari.l r5,31,r2
1002  ori r2,1,r2
1003  muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1004  movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1005  shari.l r4,31,r3
1006  nsb r20,r0
1007  shlld r20,r0,r25
1008  shlri r25,48,r25
1009  sub r19,r25,r1
1010  mmulfx.w r1,r1,r2
1011  mshflo.w r1,r63,r1
1012  // If r4 was to be used in-place instead of r21, could use this sequence
1013  // to compute absolute:
1014  // sub r63,r4,r19 // compute absolute value of r4
1015  // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1016  // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1017  ori r3,1,r3
1018  mmulfx.w r25,r2,r2
1019  sub r19,r0,r0
1020  muls.l r4,r3,r21
1021  msub.w r1,r2,r2
1022  addi r2,-2,r1
1023  mulu.l r21,r1,r19
1024  mmulfx.w r2,r2,r2
1025  shlli r1,15,r1
1026  shlrd r19,r0,r19
1027  mulu.l r19,r20,r3
1028  mmacnfx.wl r25,r2,r1
1029  ptabs r18,tr0
1030  sub r21,r3,r25
1031
1032  mulu.l r25,r1,r2
1033  addi r0,14,r0
1034  xor r4,r5,r18
1035  shlrd r2,r0,r2
1036  mulu.l r2,r20,r3
1037  add r19,r2,r19
1038  shari.l r18,31,r18
1039  sub r25,r3,r25
1040
1041  mulu.l r25,r1,r2
1042  sub r25,r20,r25
1043  add r19,r18,r19
1044  shlrd r2,r0,r2
1045  mulu.l r2,r20,r3
1046  addi r25,1,r25
1047  add r19,r2,r19
1048
1049  cmpgt r25,r3,r25
1050  add.l r19,r25,r0
1051  xor r0,r18,r0
1052  blink tr0,r63
1053 #endif
1054 #elif defined __SHMEDIA__
1055 /* m5compact-nofpu */
1056  // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1057         .mode   SHmedia
1058         .section        .text..SHmedia32,"ax"
1059         .align  2
1060 GLOBAL(sdivsi3):
1061         pt/l LOCAL(sdivsi3_dontsub), tr0
1062         pt/l LOCAL(sdivsi3_loop), tr1
1063         ptabs/l r18,tr2
1064         shari.l r4,31,r18
1065         shari.l r5,31,r19
1066         xor r4,r18,r20
1067         xor r5,r19,r21
1068         sub.l r20,r18,r20
1069         sub.l r21,r19,r21
1070         xor r18,r19,r19
1071         shlli r21,32,r25
1072         addi r25,-1,r21
1073         addz.l r20,r63,r20
1074 LOCAL(sdivsi3_loop):
1075         shlli r20,1,r20
1076         bgeu/u r21,r20,tr0
1077         sub r20,r21,r20
1078 LOCAL(sdivsi3_dontsub):
1079         addi.l r25,-1,r25
1080         bnei r25,-32,tr1
1081         xor r20,r19,r20
1082         sub.l r20,r19,r0
1083         blink tr2,r63
1084 #else /* ! __SHMEDIA__ */
1085 GLOBAL(sdivsi3):
1086         mov     r4,r1
1087         mov     r5,r0
1088
1089         tst     r0,r0
1090         bt      div0
1091         mov     #0,r2
1092         div0s   r2,r1
1093         subc    r3,r3
1094         subc    r2,r1
1095         div0s   r0,r3
1096         rotcl   r1
1097         div1    r0,r3
1098         rotcl   r1
1099         div1    r0,r3
1100         rotcl   r1
1101         div1    r0,r3
1102         rotcl   r1
1103         div1    r0,r3
1104         rotcl   r1
1105         div1    r0,r3
1106         rotcl   r1
1107         div1    r0,r3
1108         rotcl   r1
1109         div1    r0,r3
1110         rotcl   r1
1111         div1    r0,r3
1112         rotcl   r1
1113         div1    r0,r3
1114         rotcl   r1
1115         div1    r0,r3
1116         rotcl   r1
1117         div1    r0,r3
1118         rotcl   r1
1119         div1    r0,r3
1120         rotcl   r1
1121         div1    r0,r3
1122         rotcl   r1
1123         div1    r0,r3
1124         rotcl   r1
1125         div1    r0,r3
1126         rotcl   r1
1127         div1    r0,r3
1128         rotcl   r1
1129         div1    r0,r3
1130         rotcl   r1
1131         div1    r0,r3
1132         rotcl   r1
1133         div1    r0,r3
1134         rotcl   r1
1135         div1    r0,r3
1136         rotcl   r1
1137         div1    r0,r3
1138         rotcl   r1
1139         div1    r0,r3
1140         rotcl   r1
1141         div1    r0,r3
1142         rotcl   r1
1143         div1    r0,r3
1144         rotcl   r1
1145         div1    r0,r3
1146         rotcl   r1
1147         div1    r0,r3
1148         rotcl   r1
1149         div1    r0,r3
1150         rotcl   r1
1151         div1    r0,r3
1152         rotcl   r1
1153         div1    r0,r3
1154         rotcl   r1
1155         div1    r0,r3
1156         rotcl   r1
1157         div1    r0,r3
1158         rotcl   r1
1159         div1    r0,r3
1160         rotcl   r1
1161         addc    r2,r1
1162         rts
1163         mov     r1,r0
1164
1165
1166 div0:   rts
1167         mov     #0,r0
1168
1169 #endif /* ! __SHMEDIA__ */
1170 #endif /* ! __SH4__ */
1171 #endif
1172 #ifdef L_udivsi3_i4
1173
1174         .title "SH DIVIDE"
1175 !! 4 byte integer Divide code for the Hitachi SH
1176 #ifdef __SH4__
1177 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1178
1179         .global GLOBAL(udivsi3_i4)
1180 GLOBAL(udivsi3_i4):
1181         mov #1,r1
1182         cmp/hi r1,r5
1183         bf trivial
1184         rotr r1
1185         xor r1,r4
1186         lds r4,fpul
1187         mova L1,r0
1188 #ifdef FMOVD_WORKS
1189         fmov.d @r0+,dr4
1190 #else
1191 #ifdef __LITTLE_ENDIAN__
1192         fmov.s @r0+,fr5
1193         fmov.s @r0,fr4
1194 #else
1195         fmov.s @r0+,fr4
1196         fmov.s @r0,fr5
1197 #endif
1198 #endif
1199         float fpul,dr0
1200         xor r1,r5
1201         lds r5,fpul
1202         float fpul,dr2
1203         fadd dr4,dr0
1204         fadd dr4,dr2
1205         fdiv dr2,dr0
1206         rts
1207         ftrc dr0,fpul
1208
1209 trivial:
1210         rts
1211         lds r4,fpul
1212
1213         .align 2
1214 #ifdef FMOVD_WORKS
1215         .align 3        ! make double below 8 byte aligned.
1216 #endif
1217 L1:
1218         .double 2147483648
1219
1220 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1221 #if ! __SH5__ || __SH5__ == 32
1222 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1223         .mode   SHmedia
1224         .global GLOBAL(udivsi3_i4)
1225 GLOBAL(udivsi3_i4):
1226         addz.l  r4,r63,r20
1227         addz.l  r5,r63,r21
1228         fmov.qd r20,dr0
1229         fmov.qd r21,dr32
1230         ptabs   r18,tr0
1231         float.qd dr0,dr0
1232         float.qd dr32,dr32
1233         fdiv.d  dr0,dr32,dr0
1234         ftrc.dq dr0,dr32
1235         fmov.s fr33,fr32
1236         blink tr0,r63
1237 #endif /* ! __SH5__ || __SH5__ == 32 */
1238 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1239 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1240
1241         .global GLOBAL(udivsi3_i4)
1242 GLOBAL(udivsi3_i4):
1243         mov #1,r1
1244         cmp/hi r1,r5
1245         bf trivial
1246         sts.l fpscr,@-r15
1247         mova L1,r0
1248         lds.l @r0+,fpscr
1249         rotr r1
1250         xor r1,r4
1251         lds r4,fpul
1252 #ifdef FMOVD_WORKS
1253         fmov.d @r0+,dr4
1254 #else
1255 #ifdef __LITTLE_ENDIAN__
1256         fmov.s @r0+,fr5
1257         fmov.s @r0,fr4
1258 #else
1259         fmov.s @r0+,fr4
1260         fmov.s @r0,fr5
1261 #endif
1262 #endif
1263         float fpul,dr0
1264         xor r1,r5
1265         lds r5,fpul
1266         float fpul,dr2
1267         fadd dr4,dr0
1268         fadd dr4,dr2
1269         fdiv dr2,dr0
1270         ftrc dr0,fpul
1271         rts
1272         lds.l @r15+,fpscr
1273
1274 #ifdef FMOVD_WORKS
1275         .align 3        ! make double below 8 byte aligned.
1276 #endif
1277 trivial:
1278         rts
1279         lds r4,fpul
1280
1281         .align 2
1282 L1:
1283 #ifndef FMOVD_WORKS
1284         .long 0x80000
1285 #else
1286         .long 0x180000
1287 #endif
1288         .double 2147483648
1289
1290 #endif /* ! __SH4__ */
1291 #endif
1292
1293 #ifdef L_udivsi3
1294 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1295    sh3e code.  */
1296 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1297
1298 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1299         .global GLOBAL(udivsi3)
1300
1301 #if __SHMEDIA__
1302 #if __SH5__ == 32
1303         .section        .text..SHmedia32,"ax"
1304 #else
1305         .text
1306 #endif
1307         .align  2
1308 #if 0
1309 /* The assembly code that follows is a hand-optimized version of the C
1310    code that follows.  Note that the registers that are modified are
1311    exactly those listed as clobbered in the patterns udivsi3_i1 and
1312    udivsi3_i1_media.
1313
1314 unsigned
1315 __udivsi3 (i, j)
1316     unsigned i, j;
1317 {
1318   register unsigned long long r0 asm ("r0") = 0;
1319   register unsigned long long r18 asm ("r18") = 1;
1320   register unsigned long long r4 asm ("r4") = i;
1321   register unsigned long long r19 asm ("r19") = j;
1322
1323   r19 <<= 31;
1324   r18 <<= 31;
1325   do
1326     if (r4 >= r19)
1327       r0 |= r18, r4 -= r19;
1328   while (r19 >>= 1, r18 >>= 1);
1329
1330   return r0;
1331 }
1332 */
1333 GLOBAL(udivsi3):
1334         pt/l    LOCAL(udivsi3_dontadd), tr2
1335         pt/l    LOCAL(udivsi3_loop), tr1
1336         ptabs/l r18, tr0
1337         movi    0, r0
1338         movi    1, r18
1339         addz.l  r5, r63, r19
1340         addz.l  r4, r63, r4
1341         shlli   r19, 31, r19
1342         shlli   r18, 31, r18
1343 LOCAL(udivsi3_loop):
1344         bgtu    r19, r4, tr2
1345         or      r0, r18, r0
1346         sub     r4, r19, r4
1347 LOCAL(udivsi3_dontadd):
1348         shlri   r18, 1, r18
1349         shlri   r19, 1, r19
1350         bnei    r18, 0, tr1
1351         blink   tr0, r63
1352 #else
1353 GLOBAL(udivsi3):
1354  // inputs: r4,r5
1355  // clobbered: r18,r19,r20,r21,r22,r25,tr0
1356  // result in r0.
1357  addz.l r5,r63,r22
1358  nsb r22,r0
1359  shlld r22,r0,r25
1360  shlri r25,48,r25
1361  movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1362  sub r20,r25,r21
1363  mmulfx.w r21,r21,r19
1364  mshflo.w r21,r63,r21
1365  ptabs r18,tr0
1366  mmulfx.w r25,r19,r19
1367  sub r20,r0,r0
1368  /* bubble */
1369  msub.w r21,r19,r19
1370  addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1371                     before the msub.w, but we need a different value for
1372                     r19 to keep errors under control.  */
1373  mulu.l r4,r21,r18
1374  mmulfx.w r19,r19,r19
1375  shlli r21,15,r21
1376  shlrd r18,r0,r18
1377  mulu.l r18,r22,r20
1378  mmacnfx.wl r25,r19,r21
1379  /* bubble */
1380  sub r4,r20,r25
1381
1382  mulu.l r25,r21,r19
1383  addi r0,14,r0
1384  /* bubble */
1385  shlrd r19,r0,r19
1386  mulu.l r19,r22,r20
1387  add r18,r19,r18
1388  /* bubble */
1389  sub.l r25,r20,r25
1390
1391  mulu.l r25,r21,r19
1392  addz.l r25,r63,r25
1393  sub r25,r22,r25
1394  shlrd r19,r0,r19
1395  mulu.l r19,r22,r20
1396  addi r25,1,r25
1397  add r18,r19,r18
1398
1399  cmpgt r25,r20,r25
1400  add.l r18,r25,r0
1401  blink tr0,r63
1402 #endif
1403 #elif defined (__SHMEDIA__)
1404 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1405    ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1406    So use a short shmedia loop.  */
1407  // clobbered: r20,r21,r25,tr0,tr1,tr2
1408         .mode   SHmedia
1409         .section        .text..SHmedia32,"ax"
1410         .align  2
1411 GLOBAL(udivsi3):
1412  pt/l LOCAL(udivsi3_dontsub), tr0
1413  pt/l LOCAL(udivsi3_loop), tr1
1414  ptabs/l r18,tr2
1415  shlli r5,32,r25
1416  addi r25,-1,r21
1417  addz.l r4,r63,r20
1418 LOCAL(udivsi3_loop):
1419  shlli r20,1,r20
1420  bgeu/u r21,r20,tr0
1421  sub r20,r21,r20
1422 LOCAL(udivsi3_dontsub):
1423  addi.l r25,-1,r25
1424  bnei r25,-32,tr1
1425  add.l r20,r63,r0
1426  blink tr2,r63
1427 #else /* ! defined (__SHMEDIA__) */
1428 LOCAL(div8):
1429  div1 r5,r4
1430 LOCAL(div7):
1431  div1 r5,r4; div1 r5,r4; div1 r5,r4
1432  div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1433
1434 LOCAL(divx4):
1435  div1 r5,r4; rotcl r0
1436  div1 r5,r4; rotcl r0
1437  div1 r5,r4; rotcl r0
1438  rts; div1 r5,r4
1439
1440 GLOBAL(udivsi3):
1441  sts.l pr,@-r15
1442  extu.w r5,r0
1443  cmp/eq r5,r0
1444 #ifdef __sh1__
1445  bf LOCAL(large_divisor)
1446 #else
1447  bf/s LOCAL(large_divisor)
1448 #endif
1449  div0u
1450  swap.w r4,r0
1451  shlr16 r4
1452  bsr LOCAL(div8)
1453  shll16 r5
1454  bsr LOCAL(div7)
1455  div1 r5,r4
1456  xtrct r4,r0
1457  xtrct r0,r4
1458  bsr LOCAL(div8)
1459  swap.w r4,r4
1460  bsr LOCAL(div7)
1461  div1 r5,r4
1462  lds.l @r15+,pr
1463  xtrct r4,r0
1464  swap.w r0,r0
1465  rotcl r0
1466  rts
1467  shlr16 r5
1468
1469 LOCAL(large_divisor):
1470 #ifdef __sh1__
1471  div0u
1472 #endif
1473  mov #0,r0
1474  xtrct r4,r0
1475  xtrct r0,r4
1476  bsr LOCAL(divx4)
1477  rotcl r0
1478  bsr LOCAL(divx4)
1479  rotcl r0
1480  bsr LOCAL(divx4)
1481  rotcl r0
1482  bsr LOCAL(divx4)
1483  rotcl r0
1484  lds.l @r15+,pr
1485  rts
1486  rotcl r0
1487
1488 #endif /* ! __SHMEDIA__ */
1489 #endif /* __SH4__ */
1490 #endif /* L_udivsi3 */
1491
1492 #ifdef L_udivdi3
1493 #ifdef __SHMEDIA__
1494         .mode   SHmedia
1495         .section        .text..SHmedia32,"ax"
1496         .align  2
1497         .global GLOBAL(udivdi3)
1498 GLOBAL(udivdi3):
1499         shlri r3,1,r4
1500         nsb r4,r22
1501         shlld r3,r22,r6
1502         shlri r6,49,r5
1503         movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1504         sub r21,r5,r1
1505         mmulfx.w r1,r1,r4
1506         mshflo.w r1,r63,r1
1507         sub r63,r22,r20 // r63 == 64 % 64
1508         mmulfx.w r5,r4,r4
1509         pta LOCAL(large_divisor),tr0
1510         addi r20,32,r9
1511         msub.w r1,r4,r1
1512         madd.w r1,r1,r1
1513         mmulfx.w r1,r1,r4
1514         shlri r6,32,r7
1515         bgt/u r9,r63,tr0 // large_divisor
1516         mmulfx.w r5,r4,r4
1517         shlri r2,32+14,r19
1518         addi r22,-31,r0
1519         msub.w r1,r4,r1
1520
1521         mulu.l r1,r7,r4
1522         addi r1,-3,r5
1523         mulu.l r5,r19,r5
1524         sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1525         shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1526                          the case may be, %0000000000000000 000.11111111111, still */
1527         muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1528         mulu.l r5,r3,r8
1529         mshalds.l r1,r21,r1
1530         shari r4,26,r4
1531         shlld r8,r0,r8
1532         add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1533         sub r2,r8,r2
1534         /* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1535
1536         shlri r2,22,r21
1537         mulu.l r21,r1,r21
1538         shlld r5,r0,r8
1539         addi r20,30-22,r0
1540         shlrd r21,r0,r21
1541         mulu.l r21,r3,r5
1542         add r8,r21,r8
1543         mcmpgt.l r21,r63,r21 // See Note 1
1544         addi r20,30,r0
1545         mshfhi.l r63,r21,r21
1546         sub r2,r5,r2
1547         andc r2,r21,r2
1548
1549         /* small divisor: need a third divide step */
1550         mulu.l r2,r1,r7
1551         ptabs r18,tr0
1552         addi r2,1,r2
1553         shlrd r7,r0,r7
1554         mulu.l r7,r3,r5
1555         add r8,r7,r8
1556         sub r2,r3,r2
1557         cmpgt r2,r5,r5
1558         add r8,r5,r2
1559         /* could test r3 here to check for divide by zero.  */
1560         blink tr0,r63
1561
1562 LOCAL(large_divisor):
1563         mmulfx.w r5,r4,r4
1564         shlrd r2,r9,r25
1565         shlri r25,32,r8
1566         msub.w r1,r4,r1
1567
1568         mulu.l r1,r7,r4
1569         addi r1,-3,r5
1570         mulu.l r5,r8,r5
1571         sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1572         shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1573                          the case may be, %0000000000000000 000.11111111111, still */
1574         muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1575         shlri r5,14-1,r8
1576         mulu.l r8,r7,r5
1577         mshalds.l r1,r21,r1
1578         shari r4,26,r4
1579         add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1580         sub r25,r5,r25
1581         /* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1582
1583         shlri r25,22,r21
1584         mulu.l r21,r1,r21
1585         pta LOCAL(no_lo_adj),tr0
1586         addi r22,32,r0
1587         shlri r21,40,r21
1588         mulu.l r21,r7,r5
1589         add r8,r21,r8
1590         shlld r2,r0,r2
1591         sub r25,r5,r25
1592         bgtu/u r7,r25,tr0 // no_lo_adj
1593         addi r8,1,r8
1594         sub r25,r7,r25
1595 LOCAL(no_lo_adj):
1596         mextr4 r2,r25,r2
1597
1598         /* large_divisor: only needs a few adjustments.  */
1599         mulu.l r8,r6,r5
1600         ptabs r18,tr0
1601         /* bubble */
1602         cmpgtu r5,r2,r5
1603         sub r8,r5,r2
1604         blink tr0,r63
1605 /* Note 1: To shift the result of the second divide stage so that the result
1606    always fits into 32 bits, yet we still reduce the rest sufficiently
1607    would require a lot of instructions to do the shifts just right.  Using
1608    the full 64 bit shift result to multiply with the divisor would require
1609    four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1610    Fortunately, if the upper 32 bits of the shift result are non-zero, we
1611    know that the rest after taking this partial result into account will
1612    fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1613    upper 32 bits of the partial result are non-zero.  */
1614 #endif /* __SHMEDIA__ */
1615 #endif /* L_udivdi3 */
1616
1617 #ifdef L_divdi3
1618 #ifdef __SHMEDIA__
1619         .mode   SHmedia
1620         .section        .text..SHmedia32,"ax"
1621         .align  2
1622         .global GLOBAL(divdi3)
1623 GLOBAL(divdi3):
1624         pta GLOBAL(udivdi3),tr0
1625         shari r2,63,r22
1626         shari r3,63,r23
1627         xor r2,r22,r2
1628         xor r3,r23,r3
1629         sub r2,r22,r2
1630         sub r3,r23,r3
1631         beq/u r22,r23,tr0
1632         ptabs r18,tr1
1633         blink tr0,r18
1634         sub r63,r2,r2
1635         blink tr1,r63
1636 #endif /* __SHMEDIA__ */
1637 #endif /* L_divdi3 */
1638
1639 #ifdef L_umoddi3
1640 #ifdef __SHMEDIA__
1641         .mode   SHmedia
1642         .section        .text..SHmedia32,"ax"
1643         .align  2
1644         .global GLOBAL(umoddi3)
1645 GLOBAL(umoddi3):
1646         shlri r3,1,r4
1647         nsb r4,r22
1648         shlld r3,r22,r6
1649         shlri r6,49,r5
1650         movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1651         sub r21,r5,r1
1652         mmulfx.w r1,r1,r4
1653         mshflo.w r1,r63,r1
1654         sub r63,r22,r20 // r63 == 64 % 64
1655         mmulfx.w r5,r4,r4
1656         pta LOCAL(large_divisor),tr0
1657         addi r20,32,r9
1658         msub.w r1,r4,r1
1659         madd.w r1,r1,r1
1660         mmulfx.w r1,r1,r4
1661         shlri r6,32,r7
1662         bgt/u r9,r63,tr0 // large_divisor
1663         mmulfx.w r5,r4,r4
1664         shlri r2,32+14,r19
1665         addi r22,-31,r0
1666         msub.w r1,r4,r1
1667
1668         mulu.l r1,r7,r4
1669         addi r1,-3,r5
1670         mulu.l r5,r19,r5
1671         sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1672         shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1673                          the case may be, %0000000000000000 000.11111111111, still */
1674         muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1675         mulu.l r5,r3,r5
1676         mshalds.l r1,r21,r1
1677         shari r4,26,r4
1678         shlld r5,r0,r5
1679         add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1680         sub r2,r5,r2
1681         /* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1682
1683         shlri r2,22,r21
1684         mulu.l r21,r1,r21
1685         addi r20,30-22,r0
1686         /* bubble */ /* could test r3 here to check for divide by zero.  */
1687         shlrd r21,r0,r21
1688         mulu.l r21,r3,r5
1689         mcmpgt.l r21,r63,r21 // See Note 1
1690         addi r20,30,r0
1691         mshfhi.l r63,r21,r21
1692         sub r2,r5,r2
1693         andc r2,r21,r2
1694
1695         /* small divisor: need a third divide step */
1696         mulu.l r2,r1,r7
1697         ptabs r18,tr0
1698         sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1699         shlrd r7,r0,r7
1700         mulu.l r7,r3,r5
1701         /* bubble */
1702         addi r8,1,r7
1703         cmpgt r7,r5,r7
1704         cmvne r7,r8,r2
1705         sub r2,r5,r2
1706         blink tr0,r63
1707
1708 LOCAL(large_divisor):
1709         mmulfx.w r5,r4,r4
1710         shlrd r2,r9,r25
1711         shlri r25,32,r8
1712         msub.w r1,r4,r1
1713
1714         mulu.l r1,r7,r4
1715         addi r1,-3,r5
1716         mulu.l r5,r8,r5
1717         sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1718         shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1719                          the case may be, %0000000000000000 000.11111111111, still */
1720         muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1721         shlri r5,14-1,r8
1722         mulu.l r8,r7,r5
1723         mshalds.l r1,r21,r1
1724         shari r4,26,r4
1725         add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1726         sub r25,r5,r25
1727         /* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1728
1729         shlri r25,22,r21
1730         mulu.l r21,r1,r21
1731         pta LOCAL(no_lo_adj),tr0
1732         addi r22,32,r0
1733         shlri r21,40,r21
1734         mulu.l r21,r7,r5
1735         add r8,r21,r8
1736         shlld r2,r0,r2
1737         sub r25,r5,r25
1738         bgtu/u r7,r25,tr0 // no_lo_adj
1739         addi r8,1,r8
1740         sub r25,r7,r25
1741 LOCAL(no_lo_adj):
1742         mextr4 r2,r25,r2
1743
1744         /* large_divisor: only needs a few adjustments.  */
1745         mulu.l r8,r6,r5
1746         ptabs r18,tr0
1747         add r2,r6,r7
1748         cmpgtu r5,r2,r8
1749         cmvne r8,r7,r2
1750         sub r2,r5,r2
1751         shlrd r2,r22,r2
1752         blink tr0,r63
1753 /* Note 1: To shift the result of the second divide stage so that the result
1754    always fits into 32 bits, yet we still reduce the rest sufficiently
1755    would require a lot of instructions to do the shifts just right.  Using
1756    the full 64 bit shift result to multiply with the divisor would require
1757    four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1758    Fortunately, if the upper 32 bits of the shift result are non-zero, we
1759    know that the rest after taking this partial result into account will
1760    fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1761    upper 32 bits of the partial result are non-zero.  */
1762 #endif /* __SHMEDIA__ */
1763 #endif /* L_umoddi3 */
1764
1765 #ifdef L_moddi3
1766 #ifdef __SHMEDIA__
1767         .mode   SHmedia
1768         .section        .text..SHmedia32,"ax"
1769         .align  2
1770         .global GLOBAL(moddi3)
1771 GLOBAL(moddi3):
1772         pta GLOBAL(umoddi3),tr0
1773         shari r2,63,r22
1774         shari r3,63,r23
1775         xor r2,r22,r2
1776         xor r3,r23,r3
1777         sub r2,r22,r2
1778         sub r3,r23,r3
1779         beq/u r22,r63,tr0
1780         ptabs r18,tr1
1781         blink tr0,r18
1782         sub r63,r2,r2
1783         blink tr1,r63
1784 #endif /* __SHMEDIA__ */
1785 #endif /* L_moddi3 */
1786
1787 #ifdef L_set_fpscr
1788 #if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1789 #ifdef __SH5__
1790         .mode   SHcompact
1791 #endif
1792         .global GLOBAL(set_fpscr)
1793 GLOBAL(set_fpscr):
1794         lds r4,fpscr
1795         mov.l LOCAL(set_fpscr_L1),r1
1796         swap.w r4,r0
1797         or #24,r0
1798 #ifndef FMOVD_WORKS
1799         xor #16,r0
1800 #endif
1801 #if defined(__SH4__)
1802         swap.w r0,r3
1803         mov.l r3,@(4,r1)
1804 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1805         swap.w r0,r2
1806         mov.l r2,@r1
1807 #endif
1808 #ifndef FMOVD_WORKS
1809         xor #8,r0
1810 #else
1811         xor #24,r0
1812 #endif
1813 #if defined(__SH4__)
1814         swap.w r0,r2
1815         rts
1816         mov.l r2,@r1
1817 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1818         swap.w r0,r3
1819         rts
1820         mov.l r3,@(4,r1)
1821 #endif
1822         .align 2
1823 LOCAL(set_fpscr_L1):
1824         .long GLOBAL(fpscr_values)
1825 #ifdef __ELF__
1826         .comm   GLOBAL(fpscr_values),8,4
1827 #else
1828         .comm   GLOBAL(fpscr_values),8
1829 #endif /* ELF */
1830 #endif /* SH3E / SH4 */
1831 #endif /* L_set_fpscr */
1832 #ifdef L_ic_invalidate
1833 #if __SH5__ == 32
1834         .mode   SHmedia
1835         .section        .text..SHmedia32,"ax"
1836         .align  2
1837         .global GLOBAL(init_trampoline)
1838 GLOBAL(init_trampoline):
1839         st.l    r0,8,r2
1840 #ifdef __LITTLE_ENDIAN__
1841         movi    9,r20
1842         shori   0x402b,r20
1843         shori   0xd101,r20
1844         shori   0xd002,r20
1845 #else
1846         movi    0xffffffffffffd002,r20
1847         shori   0xd101,r20
1848         shori   0x402b,r20
1849         shori   9,r20
1850 #endif
1851         st.q    r0,0,r20
1852         st.l    r0,12,r3
1853         .global GLOBAL(ic_invalidate)
1854 GLOBAL(ic_invalidate):
1855         ocbwb   r0,0
1856         synco
1857         icbi    r0, 0
1858         ptabs   r18, tr0
1859         synci
1860         blink   tr0, r63
1861 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
1862         .global GLOBAL(ic_invalidate)
1863 GLOBAL(ic_invalidate):
1864         ocbwb   @r4
1865         mova    0f,r0
1866         mov.w   1f,r1
1867 /* Compute how many cache lines 0f is away from r4.  */
1868         sub     r0,r4
1869         and     r1,r4
1870 /* Prepare to branch to 0f plus the cache-line offset.  */
1871         add     # 0f - 1f,r4
1872         braf    r4
1873         nop
1874 1:
1875         .short  0x1fe0
1876         .p2align 5
1877 /* This must be aligned to the beginning of a cache line.  */
1878 0:
1879         .rept   256 /* There are 256 cache lines of 32 bytes.  */
1880         rts
1881         .rept   15
1882         nop
1883         .endr
1884         .endr
1885 #endif /* SH4 */
1886 #endif /* L_ic_invalidate */
1887
1888 #if defined (__SH5__) && __SH5__ == 32
1889 #ifdef L_shcompact_call_trampoline
1890         .section        .rodata
1891         .align  1
1892 LOCAL(ct_main_table):
1893 .word   LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
1894 .word   LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
1895 .word   LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
1896 .word   LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
1897 .word   LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
1898 .word   LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
1899 .word   LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
1900 .word   LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
1901 .word   LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
1902 .word   LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
1903 .word   LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
1904 .word   LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
1905 .word   LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
1906 .word   LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
1907 .word   LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
1908 .word   LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
1909 .word   LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
1910 .word   LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
1911 .word   LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
1912 .word   LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
1913 .word   LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
1914 .word   LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
1915 .word   LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
1916 .word   LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
1917 .word   LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
1918 .word   LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
1919 .word   LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
1920 .word   LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
1921 .word   LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
1922 .word   LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
1923 .word   LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
1924 .word   LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
1925 .word   LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
1926         .mode   SHmedia
1927         .section        .text..SHmedia32, "ax"
1928         .align  2
1929
1930      /* This function loads 64-bit general-purpose registers from the
1931         stack, from a memory address contained in them or from an FP
1932         register, according to a cookie passed in r1.  Its execution
1933         time is linear on the number of registers that actually have
1934         to be copied.  See sh.h for details on the actual bit pattern.
1935
1936         The function to be called is passed in r0.  If a 32-bit return
1937         value is expected, the actual function will be tail-called,
1938         otherwise the return address will be stored in r10 (that the
1939         caller should expect to be clobbered) and the return value
1940         will be expanded into r2/r3 upon return.  */
1941
1942         .global GLOBAL(GCC_shcompact_call_trampoline)
1943 GLOBAL(GCC_shcompact_call_trampoline):
1944         ptabs/l r0, tr0 /* Prepare to call the actual function.  */
1945         movi    ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
1946         pt/l    LOCAL(ct_loop), tr1
1947         addz.l  r1, r63, r1
1948         shori   ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
1949 LOCAL(ct_loop):
1950         nsb     r1, r28
1951         shlli   r28, 1, r29
1952         ldx.w   r0, r29, r30
1953 LOCAL(ct_main_label):
1954         ptrel/l r30, tr2
1955         blink   tr2, r63
1956 LOCAL(ct_r2_fp):        /* Copy r2 from an FP register.  */
1957         /* It must be dr0, so just do it.  */
1958         fmov.dq dr0, r2
1959         movi    7, r30
1960         shlli   r30, 29, r31
1961         andc    r1, r31, r1
1962         blink   tr1, r63
1963 LOCAL(ct_r3_fp):        /* Copy r3 from an FP register.  */
1964         /* It is either dr0 or dr2.  */
1965         movi    7, r30
1966         shlri   r1, 26, r32
1967         shlli   r30, 26, r31
1968         andc    r1, r31, r1
1969         fmov.dq dr0, r3
1970         beqi/l  r32, 4, tr1
1971         fmov.dq dr2, r3
1972         blink   tr1, r63
1973 LOCAL(ct_r4_fp):        /* Copy r4 from an FP register.  */
1974         shlri   r1, 23 - 3, r34
1975         andi    r34, 3 << 3, r33
1976         addi    r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
1977 LOCAL(ct_r4_fp_base):
1978         ptrel/l r32, tr2
1979         movi    7, r30
1980         shlli   r30, 23, r31
1981         andc    r1, r31, r1
1982         blink   tr2, r63
1983 LOCAL(ct_r4_fp_copy):
1984         fmov.dq dr0, r4
1985         blink   tr1, r63
1986         fmov.dq dr2, r4
1987         blink   tr1, r63
1988         fmov.dq dr4, r4
1989         blink   tr1, r63
1990 LOCAL(ct_r5_fp):        /* Copy r5 from an FP register.  */
1991         shlri   r1, 20 - 3, r34
1992         andi    r34, 3 << 3, r33
1993         addi    r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
1994 LOCAL(ct_r5_fp_base):
1995         ptrel/l r32, tr2
1996         movi    7, r30
1997         shlli   r30, 20, r31
1998         andc    r1, r31, r1
1999         blink   tr2, r63
2000 LOCAL(ct_r5_fp_copy):
2001         fmov.dq dr0, r5
2002         blink   tr1, r63
2003         fmov.dq dr2, r5
2004         blink   tr1, r63
2005         fmov.dq dr4, r5
2006         blink   tr1, r63
2007         fmov.dq dr6, r5
2008         blink   tr1, r63
2009 LOCAL(ct_r6_fph):       /* Copy r6 from a high FP register.  */
2010         /* It must be dr8.  */
2011         fmov.dq dr8, r6
2012         movi    15, r30
2013         shlli   r30, 16, r31
2014         andc    r1, r31, r1
2015         blink   tr1, r63
2016 LOCAL(ct_r6_fpl):       /* Copy r6 from a low FP register.  */
2017         shlri   r1, 16 - 3, r34
2018         andi    r34, 3 << 3, r33
2019         addi    r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2020 LOCAL(ct_r6_fp_base):
2021         ptrel/l r32, tr2
2022         movi    7, r30
2023         shlli   r30, 16, r31
2024         andc    r1, r31, r1
2025         blink   tr2, r63
2026 LOCAL(ct_r6_fp_copy):
2027         fmov.dq dr0, r6
2028         blink   tr1, r63
2029         fmov.dq dr2, r6
2030         blink   tr1, r63
2031         fmov.dq dr4, r6
2032         blink   tr1, r63
2033         fmov.dq dr6, r6
2034         blink   tr1, r63
2035 LOCAL(ct_r7_fph):       /* Copy r7 from a high FP register.  */
2036         /* It is either dr8 or dr10.  */
2037         movi    15 << 12, r31
2038         shlri   r1, 12, r32
2039         andc    r1, r31, r1
2040         fmov.dq dr8, r7
2041         beqi/l  r32, 8, tr1
2042         fmov.dq dr10, r7
2043         blink   tr1, r63
2044 LOCAL(ct_r7_fpl):       /* Copy r7 from a low FP register.  */
2045         shlri   r1, 12 - 3, r34
2046         andi    r34, 3 << 3, r33
2047         addi    r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2048 LOCAL(ct_r7_fp_base):
2049         ptrel/l r32, tr2
2050         movi    7 << 12, r31
2051         andc    r1, r31, r1
2052         blink   tr2, r63
2053 LOCAL(ct_r7_fp_copy):
2054         fmov.dq dr0, r7
2055         blink   tr1, r63
2056         fmov.dq dr2, r7
2057         blink   tr1, r63
2058         fmov.dq dr4, r7
2059         blink   tr1, r63
2060         fmov.dq dr6, r7
2061         blink   tr1, r63
2062 LOCAL(ct_r8_fph):       /* Copy r8 from a high FP register.  */
2063         /* It is either dr8 or dr10.  */
2064         movi    15 << 8, r31
2065         andi    r1, 1 << 8, r32
2066         andc    r1, r31, r1
2067         fmov.dq dr8, r8
2068         beq/l   r32, r63, tr1
2069         fmov.dq dr10, r8
2070         blink   tr1, r63
2071 LOCAL(ct_r8_fpl):       /* Copy r8 from a low FP register.  */
2072         shlri   r1, 8 - 3, r34
2073         andi    r34, 3 << 3, r33
2074         addi    r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2075 LOCAL(ct_r8_fp_base):
2076         ptrel/l r32, tr2
2077         movi    7 << 8, r31
2078         andc    r1, r31, r1
2079         blink   tr2, r63
2080 LOCAL(ct_r8_fp_copy):
2081         fmov.dq dr0, r8
2082         blink   tr1, r63
2083         fmov.dq dr2, r8
2084         blink   tr1, r63
2085         fmov.dq dr4, r8
2086         blink   tr1, r63
2087         fmov.dq dr6, r8
2088         blink   tr1, r63
2089 LOCAL(ct_r9_fph):       /* Copy r9 from a high FP register.  */
2090         /* It is either dr8 or dr10.  */
2091         movi    15 << 4, r31
2092         andi    r1, 1 << 4, r32
2093         andc    r1, r31, r1
2094         fmov.dq dr8, r9
2095         beq/l   r32, r63, tr1
2096         fmov.dq dr10, r9
2097         blink   tr1, r63
2098 LOCAL(ct_r9_fpl):       /* Copy r9 from a low FP register.  */
2099         shlri   r1, 4 - 3, r34
2100         andi    r34, 3 << 3, r33
2101         addi    r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2102 LOCAL(ct_r9_fp_base):
2103         ptrel/l r32, tr2
2104         movi    7 << 4, r31
2105         andc    r1, r31, r1
2106         blink   tr2, r63
2107 LOCAL(ct_r9_fp_copy):
2108         fmov.dq dr0, r9
2109         blink   tr1, r63
2110         fmov.dq dr2, r9
2111         blink   tr1, r63
2112         fmov.dq dr4, r9
2113         blink   tr1, r63
2114         fmov.dq dr6, r9
2115         blink   tr1, r63
2116 LOCAL(ct_r2_ld):        /* Copy r2 from a memory address.  */
2117         pt/l    LOCAL(ct_r2_load), tr2
2118         movi    3, r30
2119         shlli   r30, 29, r31
2120         and     r1, r31, r32
2121         andc    r1, r31, r1
2122         beq/l   r31, r32, tr2
2123         addi.l  r2, 8, r3
2124         ldx.q   r2, r63, r2
2125         /* Fall through.  */
2126 LOCAL(ct_r3_ld):        /* Copy r3 from a memory address.  */
2127         pt/l    LOCAL(ct_r3_load), tr2
2128         movi    3, r30
2129         shlli   r30, 26, r31
2130         and     r1, r31, r32
2131         andc    r1, r31, r1
2132         beq/l   r31, r32, tr2
2133         addi.l  r3, 8, r4
2134         ldx.q   r3, r63, r3
2135 LOCAL(ct_r4_ld):        /* Copy r4 from a memory address.  */
2136         pt/l    LOCAL(ct_r4_load), tr2
2137         movi    3, r30
2138         shlli   r30, 23, r31
2139         and     r1, r31, r32
2140         andc    r1, r31, r1
2141         beq/l   r31, r32, tr2
2142         addi.l  r4, 8, r5
2143         ldx.q   r4, r63, r4
2144 LOCAL(ct_r5_ld):        /* Copy r5 from a memory address.  */
2145         pt/l    LOCAL(ct_r5_load), tr2
2146         movi    3, r30
2147         shlli   r30, 20, r31
2148         and     r1, r31, r32
2149         andc    r1, r31, r1
2150         beq/l   r31, r32, tr2
2151         addi.l  r5, 8, r6
2152         ldx.q   r5, r63, r5
2153 LOCAL(ct_r6_ld):        /* Copy r6 from a memory address.  */
2154         pt/l    LOCAL(ct_r6_load), tr2
2155         movi    3 << 16, r31
2156         and     r1, r31, r32
2157         andc    r1, r31, r1
2158         beq/l   r31, r32, tr2
2159         addi.l  r6, 8, r7
2160         ldx.q   r6, r63, r6
2161 LOCAL(ct_r7_ld):        /* Copy r7 from a memory address.  */
2162         pt/l    LOCAL(ct_r7_load), tr2
2163         movi    3 << 12, r31
2164         and     r1, r31, r32
2165         andc    r1, r31, r1
2166         beq/l   r31, r32, tr2
2167         addi.l  r7, 8, r8
2168         ldx.q   r7, r63, r7
2169 LOCAL(ct_r8_ld):        /* Copy r8 from a memory address.  */
2170         pt/l    LOCAL(ct_r8_load), tr2
2171         movi    3 << 8, r31
2172         and     r1, r31, r32
2173         andc    r1, r31, r1
2174         beq/l   r31, r32, tr2
2175         addi.l  r8, 8, r9
2176         ldx.q   r8, r63, r8
2177 LOCAL(ct_r9_ld):        /* Copy r9 from a memory address.  */
2178         pt/l    LOCAL(ct_check_tramp), tr2
2179         ldx.q   r9, r63, r9
2180         blink   tr2, r63
2181 LOCAL(ct_r2_load):
2182         ldx.q   r2, r63, r2
2183         blink   tr1, r63
2184 LOCAL(ct_r3_load):
2185         ldx.q   r3, r63, r3
2186         blink   tr1, r63
2187 LOCAL(ct_r4_load):
2188         ldx.q   r4, r63, r4
2189         blink   tr1, r63
2190 LOCAL(ct_r5_load):
2191         ldx.q   r5, r63, r5
2192         blink   tr1, r63
2193 LOCAL(ct_r6_load):
2194         ldx.q   r6, r63, r6
2195         blink   tr1, r63
2196 LOCAL(ct_r7_load):
2197         ldx.q   r7, r63, r7
2198         blink   tr1, r63
2199 LOCAL(ct_r8_load):
2200         ldx.q   r8, r63, r8
2201         blink   tr1, r63
2202 LOCAL(ct_r2_pop):       /* Pop r2 from the stack.  */
2203         movi    1, r30
2204         ldx.q   r15, r63, r2
2205         shlli   r30, 29, r31
2206         addi.l  r15, 8, r15
2207         andc    r1, r31, r1
2208         blink   tr1, r63
2209 LOCAL(ct_r3_pop):       /* Pop r3 from the stack.  */
2210         movi    1, r30
2211         ldx.q   r15, r63, r3
2212         shlli   r30, 26, r31
2213         addi.l  r15, 8, r15
2214         andc    r1, r31, r1
2215         blink   tr1, r63
2216 LOCAL(ct_r4_pop):       /* Pop r4 from the stack.  */
2217         movi    1, r30
2218         ldx.q   r15, r63, r4
2219         shlli   r30, 23, r31
2220         addi.l  r15, 8, r15
2221         andc    r1, r31, r1
2222         blink   tr1, r63
2223 LOCAL(ct_r5_pop):       /* Pop r5 from the stack.  */
2224         movi    1, r30
2225         ldx.q   r15, r63, r5
2226         shlli   r30, 20, r31
2227         addi.l  r15, 8, r15
2228         andc    r1, r31, r1
2229         blink   tr1, r63
2230 LOCAL(ct_r6_pop):       /* Pop r6 from the stack.  */
2231         movi    1, r30
2232         ldx.q   r15, r63, r6
2233         shlli   r30, 16, r31
2234         addi.l  r15, 8, r15
2235         andc    r1, r31, r1
2236         blink   tr1, r63
2237 LOCAL(ct_r7_pop):       /* Pop r7 from the stack.  */
2238         ldx.q   r15, r63, r7
2239         movi    1 << 12, r31
2240         addi.l  r15, 8, r15
2241         andc    r1, r31, r1
2242         blink   tr1, r63
2243 LOCAL(ct_r8_pop):       /* Pop r8 from the stack.  */
2244         ldx.q   r15, r63, r8
2245         movi    1 << 8, r31
2246         addi.l  r15, 8, r15
2247         andc    r1, r31, r1
2248         blink   tr1, r63
2249 LOCAL(ct_pop_seq):      /* Pop a sequence of registers off the stack.  */
2250         andi    r1, 7 << 1, r30
2251         movi    (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2252         shlli   r30, 2, r31
2253         shori   LOCAL(ct_end_of_pop_seq) & 65535, r32
2254         sub.l   r32, r31, r33
2255         ptabs/l r33, tr2
2256         blink   tr2, r63
2257 LOCAL(ct_start_of_pop_seq):     /* Beginning of pop sequence.  */
2258         ldx.q   r15, r63, r3
2259         addi.l  r15, 8, r15
2260         ldx.q   r15, r63, r4
2261         addi.l  r15, 8, r15
2262         ldx.q   r15, r63, r5
2263         addi.l  r15, 8, r15
2264         ldx.q   r15, r63, r6
2265         addi.l  r15, 8, r15
2266         ldx.q   r15, r63, r7
2267         addi.l  r15, 8, r15
2268         ldx.q   r15, r63, r8
2269         addi.l  r15, 8, r15
2270 LOCAL(ct_r9_pop):       /* Pop r9 from the stack.  */
2271         ldx.q   r15, r63, r9
2272         addi.l  r15, 8, r15
2273 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
2274 LOCAL(ct_check_tramp):  /* Check whether we need a trampoline.  */
2275         pt/u    LOCAL(ct_ret_wide), tr2
2276         andi    r1, 1, r1
2277         bne/u   r1, r63, tr2
2278 LOCAL(ct_call_func):    /* Just branch to the function.  */
2279         blink   tr0, r63
2280 LOCAL(ct_ret_wide):     /* Call the function, so that we can unpack its
2281                            64-bit return value.  */
2282         add.l   r18, r63, r10
2283         blink   tr0, r18
2284         ptabs   r10, tr0
2285 #if __LITTLE_ENDIAN__
2286         shari   r2, 32, r3
2287         add.l   r2, r63, r2
2288 #else
2289         add.l   r2, r63, r3
2290         shari   r2, 32, r2
2291 #endif
2292         blink   tr0, r63
2293 #endif /* L_shcompact_call_trampoline */
2294
2295 #ifdef L_shcompact_return_trampoline
2296      /* This function does the converse of the code in `ret_wide'
2297         above.  It is tail-called by SHcompact functions returning
2298         64-bit non-floating-point values, to pack the 32-bit values in
2299         r2 and r3 into r2.  */
2300
2301         .mode   SHmedia
2302         .section        .text..SHmedia32, "ax"
2303         .align  2
2304         .global GLOBAL(GCC_shcompact_return_trampoline)
2305 GLOBAL(GCC_shcompact_return_trampoline):
2306         ptabs/l r18, tr0
2307 #if __LITTLE_ENDIAN__
2308         addz.l  r2, r63, r2
2309         shlli   r3, 32, r3
2310 #else
2311         addz.l  r3, r63, r3
2312         shlli   r2, 32, r2
2313 #endif
2314         or      r3, r2, r2
2315         blink   tr0, r63
2316 #endif /* L_shcompact_return_trampoline */
2317
2318 #ifdef L_shcompact_incoming_args
2319         .section        .rodata
2320         .align  1
2321 LOCAL(ia_main_table):
2322 .word   1 /* Invalid, just loop */
2323 .word   LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2324 .word   LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2325 .word   1 /* Invalid, just loop */
2326 .word   LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2327 .word   LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2328 .word   1 /* Invalid, just loop */
2329 .word   LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2330 .word   LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2331 .word   1 /* Invalid, just loop */
2332 .word   LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2333 .word   LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2334 .word   1 /* Invalid, just loop */
2335 .word   1 /* Invalid, just loop */
2336 .word   LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2337 .word   LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2338 .word   1 /* Invalid, just loop */
2339 .word   1 /* Invalid, just loop */
2340 .word   LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2341 .word   LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2342 .word   1 /* Invalid, just loop */
2343 .word   1 /* Invalid, just loop */
2344 .word   LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2345 .word   LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2346 .word   1 /* Invalid, just loop */
2347 .word   1 /* Invalid, just loop */
2348 .word   LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2349 .word   LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2350 .word   LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2351 .word   LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2352 .word   LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2353 .word   LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2354 .word   LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2355         .mode   SHmedia
2356         .section        .text..SHmedia32, "ax"
2357         .align  2
2358
2359      /* This function stores 64-bit general-purpose registers back in
2360         the stack, and loads the address in which each register
2361         was stored into itself.  The lower 32 bits of r17 hold the address
2362         to begin storing, and the upper 32 bits of r17 hold the cookie.
2363         Its execution time is linear on the
2364         number of registers that actually have to be copied, and it is
2365         optimized for structures larger than 64 bits, as opposed to
2366         invidivual `long long' arguments.  See sh.h for details on the
2367         actual bit pattern.  */
2368
2369         .global GLOBAL(GCC_shcompact_incoming_args)
2370 GLOBAL(GCC_shcompact_incoming_args):
2371         ptabs/l r18, tr0        /* Prepare to return.  */
2372         shlri   r17, 32, r0     /* Load the cookie.  */
2373         movi    ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2374         pt/l    LOCAL(ia_loop), tr1
2375         add.l   r17, r63, r17
2376         shori   ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2377 LOCAL(ia_loop):
2378         nsb     r0, r36
2379         shlli   r36, 1, r37
2380         ldx.w   r43, r37, r38
2381 LOCAL(ia_main_label):
2382         ptrel/l r38, tr2
2383         blink   tr2, r63
2384 LOCAL(ia_r2_ld):        /* Store r2 and load its address.  */
2385         movi    3, r38
2386         shlli   r38, 29, r39
2387         and     r0, r39, r40
2388         andc    r0, r39, r0
2389         stx.q   r17, r63, r2
2390         add.l   r17, r63, r2
2391         addi.l  r17, 8, r17
2392         beq/u   r39, r40, tr1
2393 LOCAL(ia_r3_ld):        /* Store r3 and load its address.  */
2394         movi    3, r38
2395         shlli   r38, 26, r39
2396         and     r0, r39, r40
2397         andc    r0, r39, r0
2398         stx.q   r17, r63, r3
2399         add.l   r17, r63, r3
2400         addi.l  r17, 8, r17
2401         beq/u   r39, r40, tr1
2402 LOCAL(ia_r4_ld):        /* Store r4 and load its address.  */
2403         movi    3, r38
2404         shlli   r38, 23, r39
2405         and     r0, r39, r40
2406         andc    r0, r39, r0
2407         stx.q   r17, r63, r4
2408         add.l   r17, r63, r4
2409         addi.l  r17, 8, r17
2410         beq/u   r39, r40, tr1
2411 LOCAL(ia_r5_ld):        /* Store r5 and load its address.  */
2412         movi    3, r38
2413         shlli   r38, 20, r39
2414         and     r0, r39, r40
2415         andc    r0, r39, r0
2416         stx.q   r17, r63, r5
2417         add.l   r17, r63, r5
2418         addi.l  r17, 8, r17
2419         beq/u   r39, r40, tr1
2420 LOCAL(ia_r6_ld):        /* Store r6 and load its address.  */
2421         movi    3, r38
2422         shlli   r38, 16, r39
2423         and     r0, r39, r40
2424         andc    r0, r39, r0
2425         stx.q   r17, r63, r6
2426         add.l   r17, r63, r6
2427         addi.l  r17, 8, r17
2428         beq/u   r39, r40, tr1
2429 LOCAL(ia_r7_ld):        /* Store r7 and load its address.  */
2430         movi    3 << 12, r39
2431         and     r0, r39, r40
2432         andc    r0, r39, r0
2433         stx.q   r17, r63, r7
2434         add.l   r17, r63, r7
2435         addi.l  r17, 8, r17
2436         beq/u   r39, r40, tr1
2437 LOCAL(ia_r8_ld):        /* Store r8 and load its address.  */
2438         movi    3 << 8, r39
2439         and     r0, r39, r40
2440         andc    r0, r39, r0
2441         stx.q   r17, r63, r8
2442         add.l   r17, r63, r8
2443         addi.l  r17, 8, r17
2444         beq/u   r39, r40, tr1
2445 LOCAL(ia_r9_ld):        /* Store r9 and load its address.  */
2446         stx.q   r17, r63, r9
2447         add.l   r17, r63, r9
2448         blink   tr0, r63
2449 LOCAL(ia_r2_push):      /* Push r2 onto the stack.  */
2450         movi    1, r38
2451         shlli   r38, 29, r39
2452         andc    r0, r39, r0
2453         stx.q   r17, r63, r2
2454         addi.l  r17, 8, r17
2455         blink   tr1, r63
2456 LOCAL(ia_r3_push):      /* Push r3 onto the stack.  */
2457         movi    1, r38
2458         shlli   r38, 26, r39
2459         andc    r0, r39, r0
2460         stx.q   r17, r63, r3
2461         addi.l  r17, 8, r17
2462         blink   tr1, r63
2463 LOCAL(ia_r4_push):      /* Push r4 onto the stack.  */
2464         movi    1, r38
2465         shlli   r38, 23, r39
2466         andc    r0, r39, r0
2467         stx.q   r17, r63, r4
2468         addi.l  r17, 8, r17
2469         blink   tr1, r63
2470 LOCAL(ia_r5_push):      /* Push r5 onto the stack.  */
2471         movi    1, r38
2472         shlli   r38, 20, r39
2473         andc    r0, r39, r0
2474         stx.q   r17, r63, r5
2475         addi.l  r17, 8, r17
2476         blink   tr1, r63
2477 LOCAL(ia_r6_push):      /* Push r6 onto the stack.  */
2478         movi    1, r38
2479         shlli   r38, 16, r39
2480         andc    r0, r39, r0
2481         stx.q   r17, r63, r6
2482         addi.l  r17, 8, r17
2483         blink   tr1, r63
2484 LOCAL(ia_r7_push):      /* Push r7 onto the stack.  */
2485         movi    1 << 12, r39
2486         andc    r0, r39, r0
2487         stx.q   r17, r63, r7
2488         addi.l  r17, 8, r17
2489         blink   tr1, r63
2490 LOCAL(ia_r8_push):      /* Push r8 onto the stack.  */
2491         movi    1 << 8, r39
2492         andc    r0, r39, r0
2493         stx.q   r17, r63, r8
2494         addi.l  r17, 8, r17
2495         blink   tr1, r63
2496 LOCAL(ia_push_seq):     /* Push a sequence of registers onto the stack.  */
2497         andi    r0, 7 << 1, r38
2498         movi    (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2499         shlli   r38, 2, r39
2500         shori   LOCAL(ia_end_of_push_seq) & 65535, r40
2501         sub.l   r40, r39, r41
2502         ptabs/l r41, tr2
2503         blink   tr2, r63
2504 LOCAL(ia_stack_of_push_seq):     /* Beginning of push sequence.  */
2505         stx.q   r17, r63, r3
2506         addi.l  r17, 8, r17
2507         stx.q   r17, r63, r4
2508         addi.l  r17, 8, r17
2509         stx.q   r17, r63, r5
2510         addi.l  r17, 8, r17
2511         stx.q   r17, r63, r6
2512         addi.l  r17, 8, r17
2513         stx.q   r17, r63, r7
2514         addi.l  r17, 8, r17
2515         stx.q   r17, r63, r8
2516         addi.l  r17, 8, r17
2517 LOCAL(ia_r9_push):      /* Push r9 onto the stack.  */
2518         stx.q   r17, r63, r9
2519 LOCAL(ia_return):       /* Return.  */
2520         blink   tr0, r63
2521 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
2522 #endif /* L_shcompact_incoming_args */
2523 #endif
2524 #if __SH5__
2525 #ifdef L_nested_trampoline
2526 #if __SH5__ == 32
2527         .section        .text..SHmedia32,"ax"
2528 #else
2529         .text
2530 #endif
2531         .align  3 /* It is copied in units of 8 bytes in SHmedia mode.  */
2532         .global GLOBAL(GCC_nested_trampoline)
2533 GLOBAL(GCC_nested_trampoline):
2534         .mode   SHmedia
2535         ptrel/u r63, tr0
2536         gettr   tr0, r0
2537 #if __SH5__ == 64
2538         ld.q    r0, 24, r1
2539 #else
2540         ld.l    r0, 24, r1
2541 #endif
2542         ptabs/l r1, tr1
2543 #if __SH5__ == 64
2544         ld.q    r0, 32, r1
2545 #else
2546         ld.l    r0, 28, r1
2547 #endif
2548         blink   tr1, r63
2549 #endif /* L_nested_trampoline */
2550 #endif /* __SH5__ */
2551 #if __SH5__ == 32
2552 #ifdef L_push_pop_shmedia_regs
2553         .section        .text..SHmedia32,"ax"
2554         .mode   SHmedia
2555         .align  2
2556 #ifndef __SH4_NOFPU__
2557         .global GLOBAL(GCC_push_shmedia_regs)
2558 GLOBAL(GCC_push_shmedia_regs):
2559         addi.l  r15, -14*8, r15
2560         fst.d   r15, 13*8, dr62
2561         fst.d   r15, 12*8, dr60
2562         fst.d   r15, 11*8, dr58
2563         fst.d   r15, 10*8, dr56
2564         fst.d   r15,  9*8, dr54
2565         fst.d   r15,  8*8, dr52
2566         fst.d   r15,  7*8, dr50
2567         fst.d   r15,  6*8, dr48
2568         fst.d   r15,  5*8, dr46
2569         fst.d   r15,  4*8, dr44
2570         fst.d   r15,  3*8, dr42
2571         fst.d   r15,  2*8, dr40
2572         fst.d   r15,  1*8, dr38
2573         fst.d   r15,  0*8, dr36
2574 #endif
2575         .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2576 GLOBAL(GCC_push_shmedia_regs_nofpu):
2577         ptabs/l r18, tr0
2578         addi.l  r15, -27*8, r15
2579         gettr   tr7, r62
2580         gettr   tr6, r61
2581         gettr   tr5, r60
2582         st.q    r15, 26*8, r62
2583         st.q    r15, 25*8, r61
2584         st.q    r15, 24*8, r60
2585         st.q    r15, 23*8, r59
2586         st.q    r15, 22*8, r58
2587         st.q    r15, 21*8, r57
2588         st.q    r15, 20*8, r56
2589         st.q    r15, 19*8, r55
2590         st.q    r15, 18*8, r54
2591         st.q    r15, 17*8, r53
2592         st.q    r15, 16*8, r52
2593         st.q    r15, 15*8, r51
2594         st.q    r15, 14*8, r50
2595         st.q    r15, 13*8, r49
2596         st.q    r15, 12*8, r48
2597         st.q    r15, 11*8, r47
2598         st.q    r15, 10*8, r46
2599         st.q    r15,  9*8, r45
2600         st.q    r15,  8*8, r44
2601         st.q    r15,  7*8, r35
2602         st.q    r15,  6*8, r34
2603         st.q    r15,  5*8, r33
2604         st.q    r15,  4*8, r32
2605         st.q    r15,  3*8, r31
2606         st.q    r15,  2*8, r30
2607         st.q    r15,  1*8, r29
2608         st.q    r15,  0*8, r28
2609         blink   tr0, r63
2610
2611 #ifndef __SH4_NOFPU__
2612         .global GLOBAL(GCC_pop_shmedia_regs)
2613 GLOBAL(GCC_pop_shmedia_regs):
2614         pt      .L0, tr1
2615         movi    41*8, r0
2616         fld.d   r15, 40*8, dr62
2617         fld.d   r15, 39*8, dr60
2618         fld.d   r15, 38*8, dr58
2619         fld.d   r15, 37*8, dr56
2620         fld.d   r15, 36*8, dr54
2621         fld.d   r15, 35*8, dr52
2622         fld.d   r15, 34*8, dr50
2623         fld.d   r15, 33*8, dr48
2624         fld.d   r15, 32*8, dr46
2625         fld.d   r15, 31*8, dr44
2626         fld.d   r15, 30*8, dr42
2627         fld.d   r15, 29*8, dr40
2628         fld.d   r15, 28*8, dr38
2629         fld.d   r15, 27*8, dr36
2630         blink   tr1, r63
2631 #endif
2632         .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2633 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2634         movi    27*8, r0
2635 .L0:
2636         ptabs   r18, tr0
2637         ld.q    r15, 26*8, r62
2638         ld.q    r15, 25*8, r61
2639         ld.q    r15, 24*8, r60
2640         ptabs   r62, tr7
2641         ptabs   r61, tr6
2642         ptabs   r60, tr5
2643         ld.q    r15, 23*8, r59
2644         ld.q    r15, 22*8, r58
2645         ld.q    r15, 21*8, r57
2646         ld.q    r15, 20*8, r56
2647         ld.q    r15, 19*8, r55
2648         ld.q    r15, 18*8, r54
2649         ld.q    r15, 17*8, r53
2650         ld.q    r15, 16*8, r52
2651         ld.q    r15, 15*8, r51
2652         ld.q    r15, 14*8, r50
2653         ld.q    r15, 13*8, r49
2654         ld.q    r15, 12*8, r48
2655         ld.q    r15, 11*8, r47
2656         ld.q    r15, 10*8, r46
2657         ld.q    r15,  9*8, r45
2658         ld.q    r15,  8*8, r44
2659         ld.q    r15,  7*8, r35
2660         ld.q    r15,  6*8, r34
2661         ld.q    r15,  5*8, r33
2662         ld.q    r15,  4*8, r32
2663         ld.q    r15,  3*8, r31
2664         ld.q    r15,  2*8, r30
2665         ld.q    r15,  1*8, r29
2666         ld.q    r15,  0*8, r28
2667         add.l   r15, r0, r15
2668         blink   tr0, r63
2669 #endif /* __SH5__ == 32 */
2670 #endif /* L_push_pop_shmedia_regs */