gcc/config/sparc/lb1spc.asm

   1 /* This is an assembly language implementation of mulsi3, divsi3, and modsi3
   2    for the sparc processor.
   3
   4    These routines are derived from the SPARC Architecture Manual, version 8,
   5    slightly edited to match the desired calling convention, and also to
   6    optimize them for our purposes.  */
   7
   8 #ifdef L_mulsi3
   9 .text
  10         .align 4
  11         .global .umul
  12         .proc 4
  13 .umul:
  14         or      %o0, %o1, %o4   ! logical or of multiplier and multiplicand
  15         mov     %o0, %y         ! multiplier to Y register
  16         andncc  %o4, 0xfff, %o5 ! mask out lower 12 bits
  17         be      mul_shortway    ! can do it the short way
  18         andcc   %g0, %g0, %o4   ! zero the partial product and clear NV cc
  19         !
  20         ! long multiply
  21         !
  22         mulscc  %o4, %o1, %o4   ! first iteration of 33
  23         mulscc  %o4, %o1, %o4
  24         mulscc  %o4, %o1, %o4
  25         mulscc  %o4, %o1, %o4
  26         mulscc  %o4, %o1, %o4
  27         mulscc  %o4, %o1, %o4
  28         mulscc  %o4, %o1, %o4
  29         mulscc  %o4, %o1, %o4
  30         mulscc  %o4, %o1, %o4
  31         mulscc  %o4, %o1, %o4
  32         mulscc  %o4, %o1, %o4
  33         mulscc  %o4, %o1, %o4
  34         mulscc  %o4, %o1, %o4
  35         mulscc  %o4, %o1, %o4
  36         mulscc  %o4, %o1, %o4
  37         mulscc  %o4, %o1, %o4
  38         mulscc  %o4, %o1, %o4
  39         mulscc  %o4, %o1, %o4
  40         mulscc  %o4, %o1, %o4
  41         mulscc  %o4, %o1, %o4
  42         mulscc  %o4, %o1, %o4
  43         mulscc  %o4, %o1, %o4
  44         mulscc  %o4, %o1, %o4
  45         mulscc  %o4, %o1, %o4
  46         mulscc  %o4, %o1, %o4
  47         mulscc  %o4, %o1, %o4
  48         mulscc  %o4, %o1, %o4
  49         mulscc  %o4, %o1, %o4
  50         mulscc  %o4, %o1, %o4
  51         mulscc  %o4, %o1, %o4
  52         mulscc  %o4, %o1, %o4
  53         mulscc  %o4, %o1, %o4   ! 32nd iteration
  54         mulscc  %o4, %g0, %o4   ! last iteration only shifts
  55         ! the upper 32 bits of product are wrong, but we do not care
  56         retl
  57         rd      %y, %o0
  58         !
  59         ! short multiply
  60         !
  61 mul_shortway:
  62         mulscc  %o4, %o1, %o4   ! first iteration of 13
  63         mulscc  %o4, %o1, %o4
  64         mulscc  %o4, %o1, %o4
  65         mulscc  %o4, %o1, %o4
  66         mulscc  %o4, %o1, %o4
  67         mulscc  %o4, %o1, %o4
  68         mulscc  %o4, %o1, %o4
  69         mulscc  %o4, %o1, %o4
  70         mulscc  %o4, %o1, %o4
  71         mulscc  %o4, %o1, %o4
  72         mulscc  %o4, %o1, %o4
  73         mulscc  %o4, %o1, %o4   ! 12th iteration
  74         mulscc  %o4, %g0, %o4   ! last iteration only shifts
  75         rd      %y, %o5
  76         sll     %o4, 12, %o4    ! left shift partial product by 12 bits
  77         srl     %o5, 20, %o5    ! right shift partial product by 20 bits
  78         retl
  79         or      %o5, %o4, %o0   ! merge for true product
  80 #endif
  81
  82 #ifdef L_divsi3
  83 /*
  84  * Division and remainder, from Appendix E of the SPARC Version 8
  85  * Architecture Manual, with fixes from Gordon Irlam.
  86  */
  87
  88 /*
  89  * Input: dividend and divisor in %o0 and %o1 respectively.
  90  *
  91  * m4 parameters:
  92  *  .div        name of function to generate
  93  *  div         div=div => %o0 / %o1; div=rem => %o0 % %o1
  94  *  true                true=true => signed; true=false => unsigned
  95  *
  96  * Algorithm parameters:
  97  *  N           how many bits per iteration we try to get (4)
  98  *  WORDSIZE    total number of bits (32)
  99  *
 100  * Derived constants:
 101  *  TOPBITS     number of bits in the top decade of a number
 102  *
 103  * Important variables:
 104  *  Q           the partial quotient under development (initially 0)
 105  *  R           the remainder so far, initially the dividend
 106  *  ITER        number of main division loop iterations required;
 107  *              equal to ceil(log2(quotient) / N).  Note that this
 108  *              is the log base (2^N) of the quotient.
 109  *  V           the current comparand, initially divisor*2^(ITER*N-1)
 110  *
 111  * Cost:
 112  *  Current estimate for non-large dividend is
 113  *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
 114  *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
 115  *  different path, as the upper bits of the quotient must be developed
 116  *  one bit at a time.
 117  */
 118         .global .udiv
 119         .align 4
 120         .proc 4
 121         .text
 122 .udiv:
 123          b ready_to_divide
 124          mov 0, %g3             ! result is always positive
 125
 126         .global .div
 127         .align 4
 128         .proc 4
 129         .text
 130 .div:
 131         ! compute sign of result; if neither is negative, no problem
 132         orcc    %o1, %o0, %g0   ! either negative?
 133         bge     ready_to_divide ! no, go do the divide
 134         xor     %o1, %o0, %g3   ! compute sign in any case
 135         tst     %o1
 136         bge     1f
 137         tst     %o0
 138         ! %o1 is definitely negative; %o0 might also be negative
 139         bge     ready_to_divide ! if %o0 not negative...
 140         sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
 141 1:      ! %o0 is negative, %o1 is nonnegative
 142         sub     %g0, %o0, %o0   ! make %o0 nonnegative
 143
 144
 145 ready_to_divide:
 146
 147         ! Ready to divide.  Compute size of quotient; scale comparand.
 148         orcc    %o1, %g0, %o5
 149         bne     1f
 150         mov     %o0, %o3
 151
 152         ! Divide by zero trap.  If it returns, return 0 (about as
 153         ! wrong as possible, but that is what SunOS does...).
 154         ta      0x2             ! ST_DIV0
 155         retl
 156         clr     %o0
 157
 158 1:
 159         cmp     %o3, %o5                ! if %o1 exceeds %o0, done
 160         blu     got_result              ! (and algorithm fails otherwise)
 161         clr     %o2
 162         sethi   %hi(1 << (32 - 4 - 1)), %g1
 163         cmp     %o3, %g1
 164         blu     not_really_big
 165         clr     %o4
 166
 167         ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
 168         ! as our usual N-at-a-shot divide step will cause overflow and havoc.
 169         ! The number of bits in the result here is N*ITER+SC, where SC <= N.
 170         ! Compute ITER in an unorthodox manner: know we need to shift V into
 171         ! the top decade: so do not even bother to compare to R.
 172         1:
 173                 cmp     %o5, %g1
 174                 bgeu    3f
 175                 mov     1, %g2
 176                 sll     %o5, 4, %o5
 177                 b       1b
 178                 add     %o4, 1, %o4
 179
 180         ! Now compute %g2.
 181         2:      addcc   %o5, %o5, %o5
 182                 bcc     not_too_big
 183                 add     %g2, 1, %g2
 184
 185                 ! We get here if the %o1 overflowed while shifting.
 186                 ! This means that %o3 has the high-order bit set.
 187                 ! Restore %o5 and subtract from %o3.
 188                 sll     %g1, 4, %g1     ! high order bit
 189                 srl     %o5, 1, %o5     ! rest of %o5
 190                 add     %o5, %g1, %o5
 191                 b       do_single_div
 192                 sub     %g2, 1, %g2
 193
 194         not_too_big:
 195         3:      cmp     %o5, %o3
 196                 blu     2b
 197                 nop
 198                 be      do_single_div
 199                 nop
 200         /* NB: these are commented out in the V8-SPARC manual as well */
 201         /* (I do not understand this) */
 202         ! %o5 > %o3: went too far: back up 1 step
 203         !       srl     %o5, 1, %o5
 204         !       dec     %g2
 205         ! do single-bit divide steps
 206         !
 207         ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
 208         ! first divide step without thinking.  BUT, the others are conditional,
 209         ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
 210         ! order bit set in the first step, just falling into the regular
 211         ! division loop will mess up the first time around.
 212         ! So we unroll slightly...
 213         do_single_div:
 214                 subcc   %g2, 1, %g2
 215                 bl      end_regular_divide
 216                 nop
 217                 sub     %o3, %o5, %o3
 218                 mov     1, %o2
 219                 b       end_single_divloop
 220                 nop
 221         single_divloop:
 222                 sll     %o2, 1, %o2
 223                 bl      1f
 224                 srl     %o5, 1, %o5
 225                 ! %o3 >= 0
 226                 sub     %o3, %o5, %o3
 227                 b       2f
 228                 add     %o2, 1, %o2
 229         1:      ! %o3 < 0
 230                 add     %o3, %o5, %o3
 231                 sub     %o2, 1, %o2
 232         2:
 233         end_single_divloop:
 234                 subcc   %g2, 1, %g2
 235                 bge     single_divloop
 236                 tst     %o3
 237                 b,a     end_regular_divide
 238
 239 not_really_big:
 240 1:
 241         sll     %o5, 4, %o5
 242         cmp     %o5, %o3
 243         bleu    1b
 244         addcc   %o4, 1, %o4
 245         be      got_result
 246         sub     %o4, 1, %o4
 247
 248         tst     %o3     ! set up for initial iteration
 249 divloop:
 250         sll     %o2, 4, %o2
 251         ! depth 1, accumulated bits 0
 252         bl      L1.16
 253         srl     %o5,1,%o5
 254         ! remainder is positive
 255         subcc   %o3,%o5,%o3
 256         ! depth 2, accumulated bits 1
 257         bl      L2.17
 258         srl     %o5,1,%o5
 259         ! remainder is positive
 260         subcc   %o3,%o5,%o3
 261         ! depth 3, accumulated bits 3
 262         bl      L3.19
 263         srl     %o5,1,%o5
 264         ! remainder is positive
 265         subcc   %o3,%o5,%o3
 266         ! depth 4, accumulated bits 7
 267         bl      L4.23
 268         srl     %o5,1,%o5
 269         ! remainder is positive
 270         subcc   %o3,%o5,%o3
 271         b       9f
 272         add     %o2, (7*2+1), %o2
 273
 274 L4.23:
 275         ! remainder is negative
 276         addcc   %o3,%o5,%o3
 277         b       9f
 278         add     %o2, (7*2-1), %o2
 279
 280
 281 L3.19:
 282         ! remainder is negative
 283         addcc   %o3,%o5,%o3
 284         ! depth 4, accumulated bits 5
 285         bl      L4.21
 286         srl     %o5,1,%o5
 287         ! remainder is positive
 288         subcc   %o3,%o5,%o3
 289         b       9f
 290         add     %o2, (5*2+1), %o2
 291
 292 L4.21:
 293         ! remainder is negative
 294         addcc   %o3,%o5,%o3
 295         b       9f
 296         add     %o2, (5*2-1), %o2
 297
 298 L2.17:
 299         ! remainder is negative
 300         addcc   %o3,%o5,%o3
 301         ! depth 3, accumulated bits 1
 302         bl      L3.17
 303         srl     %o5,1,%o5
 304         ! remainder is positive
 305         subcc   %o3,%o5,%o3
 306         ! depth 4, accumulated bits 3
 307         bl      L4.19
 308         srl     %o5,1,%o5
 309         ! remainder is positive
 310         subcc   %o3,%o5,%o3
 311         b       9f
 312         add     %o2, (3*2+1), %o2
 313
 314 L4.19:
 315         ! remainder is negative
 316         addcc   %o3,%o5,%o3
 317         b       9f
 318         add     %o2, (3*2-1), %o2
 319
 320 L3.17:
 321         ! remainder is negative
 322         addcc   %o3,%o5,%o3
 323         ! depth 4, accumulated bits 1
 324         bl      L4.17
 325         srl     %o5,1,%o5
 326         ! remainder is positive
 327         subcc   %o3,%o5,%o3
 328         b       9f
 329         add     %o2, (1*2+1), %o2
 330
 331 L4.17:
 332         ! remainder is negative
 333         addcc   %o3,%o5,%o3
 334         b       9f
 335         add     %o2, (1*2-1), %o2
 336
 337 L1.16:
 338         ! remainder is negative
 339         addcc   %o3,%o5,%o3
 340         ! depth 2, accumulated bits -1
 341         bl      L2.15
 342         srl     %o5,1,%o5
 343         ! remainder is positive
 344         subcc   %o3,%o5,%o3
 345         ! depth 3, accumulated bits -1
 346         bl      L3.15
 347         srl     %o5,1,%o5
 348         ! remainder is positive
 349         subcc   %o3,%o5,%o3
 350         ! depth 4, accumulated bits -1
 351         bl      L4.15
 352         srl     %o5,1,%o5
 353         ! remainder is positive
 354         subcc   %o3,%o5,%o3
 355         b       9f
 356         add     %o2, (-1*2+1), %o2
 357
 358 L4.15:
 359         ! remainder is negative
 360         addcc   %o3,%o5,%o3
 361         b       9f
 362         add     %o2, (-1*2-1), %o2
 363
 364 L3.15:
 365         ! remainder is negative
 366         addcc   %o3,%o5,%o3
 367         ! depth 4, accumulated bits -3
 368         bl      L4.13
 369         srl     %o5,1,%o5
 370         ! remainder is positive
 371         subcc   %o3,%o5,%o3
 372         b       9f
 373         add     %o2, (-3*2+1), %o2
 374
 375 L4.13:
 376         ! remainder is negative
 377         addcc   %o3,%o5,%o3
 378         b       9f
 379         add     %o2, (-3*2-1), %o2
 380
 381 L2.15:
 382         ! remainder is negative
 383         addcc   %o3,%o5,%o3
 384         ! depth 3, accumulated bits -3
 385         bl      L3.13
 386         srl     %o5,1,%o5
 387         ! remainder is positive
 388         subcc   %o3,%o5,%o3
 389         ! depth 4, accumulated bits -5
 390         bl      L4.11
 391         srl     %o5,1,%o5
 392         ! remainder is positive
 393         subcc   %o3,%o5,%o3
 394         b       9f
 395         add     %o2, (-5*2+1), %o2
 396
 397 L4.11:
 398         ! remainder is negative
 399         addcc   %o3,%o5,%o3
 400         b       9f
 401         add     %o2, (-5*2-1), %o2
 402
 403 L3.13:
 404         ! remainder is negative
 405         addcc   %o3,%o5,%o3
 406         ! depth 4, accumulated bits -7
 407         bl      L4.9
 408         srl     %o5,1,%o5
 409         ! remainder is positive
 410         subcc   %o3,%o5,%o3
 411         b       9f
 412         add     %o2, (-7*2+1), %o2
 413
 414 L4.9:
 415         ! remainder is negative
 416         addcc   %o3,%o5,%o3
 417         b       9f
 418         add     %o2, (-7*2-1), %o2
 419
 420         9:
 421 end_regular_divide:
 422         subcc   %o4, 1, %o4
 423         bge     divloop
 424         tst     %o3
 425         bl,a    got_result
 426         ! non-restoring fixup here (one instruction only!)
 427         sub     %o2, 1, %o2
 428
 429
 430 got_result:
 431         ! check to see if answer should be < 0
 432         tst     %g3
 433         bl,a    1f
 434         sub %g0, %o2, %o2
 435 1:
 436         retl
 437         mov %o2, %o0
 438 #endif
 439
 440 #ifdef L_modsi3
 441 /* This implementation was taken from glibc:
 442  *
 443  * Input: dividend and divisor in %o0 and %o1 respectively.
 444  *
 445  * Algorithm parameters:
 446  *  N           how many bits per iteration we try to get (4)
 447  *  WORDSIZE    total number of bits (32)
 448  *
 449  * Derived constants:
 450  *  TOPBITS     number of bits in the top decade of a number
 451  *
 452  * Important variables:
 453  *  Q           the partial quotient under development (initially 0)
 454  *  R           the remainder so far, initially the dividend
 455  *  ITER        number of main division loop iterations required;
 456  *              equal to ceil(log2(quotient) / N).  Note that this
 457  *              is the log base (2^N) of the quotient.
 458  *  V           the current comparand, initially divisor*2^(ITER*N-1)
 459  *
 460  * Cost:
 461  *  Current estimate for non-large dividend is
 462  *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
 463  *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
 464  *  different path, as the upper bits of the quotient must be developed
 465  *  one bit at a time.
 466  */
 467 .text
 468         .align 4
 469         .global .urem
 470         .proc 4
 471 .urem:
 472         b       divide
 473         mov     0, %g3          ! result always positive
 474
 475         .align 4
 476         .global .rem
 477         .proc 4
 478 .rem:
 479         ! compute sign of result; if neither is negative, no problem
 480         orcc    %o1, %o0, %g0   ! either negative?
 481         bge     2f                      ! no, go do the divide
 482         mov     %o0, %g3                ! sign of remainder matches %o0
 483         tst     %o1
 484         bge     1f
 485         tst     %o0
 486         ! %o1 is definitely negative; %o0 might also be negative
 487         bge     2f                      ! if %o0 not negative...
 488         sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
 489 1:      ! %o0 is negative, %o1 is nonnegative
 490         sub     %g0, %o0, %o0   ! make %o0 nonnegative
 491 2:
 492
 493         ! Ready to divide.  Compute size of quotient; scale comparand.
 494 divide:
 495         orcc    %o1, %g0, %o5
 496         bne     1f
 497         mov     %o0, %o3
 498
 499                 ! Divide by zero trap.  If it returns, return 0 (about as
 500                 ! wrong as possible, but that is what SunOS does...).
 501                 ta      0x2   !ST_DIV0
 502                 retl
 503                 clr     %o0
 504
 505 1:
 506         cmp     %o3, %o5                ! if %o1 exceeds %o0, done
 507         blu     got_result              ! (and algorithm fails otherwise)
 508         clr     %o2
 509         sethi   %hi(1 << (32 - 4 - 1)), %g1
 510         cmp     %o3, %g1
 511         blu     not_really_big
 512         clr     %o4
 513
 514         ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
 515         ! as our usual N-at-a-shot divide step will cause overflow and havoc.
 516         ! The number of bits in the result here is N*ITER+SC, where SC <= N.
 517         ! Compute ITER in an unorthodox manner: know we need to shift V into
 518         ! the top decade: so do not even bother to compare to R.
 519         1:
 520                 cmp     %o5, %g1
 521                 bgeu    3f
 522                 mov     1, %g2
 523                 sll     %o5, 4, %o5
 524                 b       1b
 525                 add     %o4, 1, %o4
 526
 527         ! Now compute %g2.
 528         2:      addcc   %o5, %o5, %o5
 529                 bcc     not_too_big
 530                 add     %g2, 1, %g2
 531
 532                 ! We get here if the %o1 overflowed while shifting.
 533                 ! This means that %o3 has the high-order bit set.
 534                 ! Restore %o5 and subtract from %o3.
 535                 sll     %g1, 4, %g1     ! high order bit
 536                 srl     %o5, 1, %o5             ! rest of %o5
 537                 add     %o5, %g1, %o5
 538                 b       do_single_div
 539                 sub     %g2, 1, %g2
 540
 541         not_too_big:
 542         3:      cmp     %o5, %o3
 543                 blu     2b
 544                 nop
 545                 be      do_single_div
 546                 nop
 547         /* NB: these are commented out in the V8-SPARC manual as well */
 548         /* (I do not understand this) */
 549         ! %o5 > %o3: went too far: back up 1 step
 550         !       srl     %o5, 1, %o5
 551         !       dec     %g2
 552         ! do single-bit divide steps
 553         !
 554         ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
 555         ! first divide step without thinking.  BUT, the others are conditional,
 556         ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
 557         ! order bit set in the first step, just falling into the regular
 558         ! division loop will mess up the first time around.
 559         ! So we unroll slightly...
 560         do_single_div:
 561                 subcc   %g2, 1, %g2
 562                 bl      end_regular_divide
 563                 nop
 564                 sub     %o3, %o5, %o3
 565                 mov     1, %o2
 566                 b       end_single_divloop
 567                 nop
 568         single_divloop:
 569                 sll     %o2, 1, %o2
 570                 bl      1f
 571                 srl     %o5, 1, %o5
 572                 ! %o3 >= 0
 573                 sub     %o3, %o5, %o3
 574                 b       2f
 575                 add     %o2, 1, %o2
 576         1:      ! %o3 < 0
 577                 add     %o3, %o5, %o3
 578                 sub     %o2, 1, %o2
 579         2:
 580         end_single_divloop:
 581                 subcc   %g2, 1, %g2
 582                 bge     single_divloop
 583                 tst     %o3
 584                 b,a     end_regular_divide
 585
 586 not_really_big:
 587 1:
 588         sll     %o5, 4, %o5
 589         cmp     %o5, %o3
 590         bleu    1b
 591         addcc   %o4, 1, %o4
 592         be      got_result
 593         sub     %o4, 1, %o4
 594
 595         tst     %o3     ! set up for initial iteration
 596 divloop:
 597         sll     %o2, 4, %o2
 598                 ! depth 1, accumulated bits 0
 599         bl      L1.16
 600         srl     %o5,1,%o5
 601         ! remainder is positive
 602         subcc   %o3,%o5,%o3
 603         ! depth 2, accumulated bits 1
 604         bl      L2.17
 605         srl     %o5,1,%o5
 606         ! remainder is positive
 607         subcc   %o3,%o5,%o3
 608         ! depth 3, accumulated bits 3
 609         bl      L3.19
 610         srl     %o5,1,%o5
 611         ! remainder is positive
 612         subcc   %o3,%o5,%o3
 613         ! depth 4, accumulated bits 7
 614         bl      L4.23
 615         srl     %o5,1,%o5
 616         ! remainder is positive
 617         subcc   %o3,%o5,%o3
 618         b       9f
 619         add     %o2, (7*2+1), %o2
 620 L4.23:
 621         ! remainder is negative
 622         addcc   %o3,%o5,%o3
 623         b       9f
 624         add     %o2, (7*2-1), %o2
 625
 626 L3.19:
 627         ! remainder is negative
 628         addcc   %o3,%o5,%o3
 629         ! depth 4, accumulated bits 5
 630         bl      L4.21
 631         srl     %o5,1,%o5
 632         ! remainder is positive
 633         subcc   %o3,%o5,%o3
 634         b       9f
 635         add     %o2, (5*2+1), %o2
 636
 637 L4.21:
 638         ! remainder is negative
 639         addcc   %o3,%o5,%o3
 640         b       9f
 641         add     %o2, (5*2-1), %o2
 642
 643 L2.17:
 644         ! remainder is negative
 645         addcc   %o3,%o5,%o3
 646         ! depth 3, accumulated bits 1
 647         bl      L3.17
 648         srl     %o5,1,%o5
 649         ! remainder is positive
 650         subcc   %o3,%o5,%o3
 651         ! depth 4, accumulated bits 3
 652         bl      L4.19
 653         srl     %o5,1,%o5
 654         ! remainder is positive
 655         subcc   %o3,%o5,%o3
 656         b       9f
 657         add     %o2, (3*2+1), %o2
 658
 659 L4.19:
 660         ! remainder is negative
 661         addcc   %o3,%o5,%o3
 662         b       9f
 663         add     %o2, (3*2-1), %o2
 664
 665 L3.17:
 666         ! remainder is negative
 667         addcc   %o3,%o5,%o3
 668         ! depth 4, accumulated bits 1
 669         bl      L4.17
 670         srl     %o5,1,%o5
 671         ! remainder is positive
 672         subcc   %o3,%o5,%o3
 673         b       9f
 674         add     %o2, (1*2+1), %o2
 675
 676 L4.17:
 677         ! remainder is negative
 678         addcc   %o3,%o5,%o3
 679         b       9f
 680         add     %o2, (1*2-1), %o2
 681
 682 L1.16:
 683         ! remainder is negative
 684         addcc   %o3,%o5,%o3
 685         ! depth 2, accumulated bits -1
 686         bl      L2.15
 687         srl     %o5,1,%o5
 688         ! remainder is positive
 689         subcc   %o3,%o5,%o3
 690         ! depth 3, accumulated bits -1
 691         bl      L3.15
 692         srl     %o5,1,%o5
 693         ! remainder is positive
 694         subcc   %o3,%o5,%o3
 695         ! depth 4, accumulated bits -1
 696         bl      L4.15
 697         srl     %o5,1,%o5
 698         ! remainder is positive
 699         subcc   %o3,%o5,%o3
 700         b       9f
 701         add     %o2, (-1*2+1), %o2
 702
 703 L4.15:
 704         ! remainder is negative
 705         addcc   %o3,%o5,%o3
 706         b       9f
 707         add     %o2, (-1*2-1), %o2
 708
 709 L3.15:
 710         ! remainder is negative
 711         addcc   %o3,%o5,%o3
 712         ! depth 4, accumulated bits -3
 713         bl      L4.13
 714         srl     %o5,1,%o5
 715         ! remainder is positive
 716         subcc   %o3,%o5,%o3
 717         b       9f
 718         add     %o2, (-3*2+1), %o2
 719
 720 L4.13:
 721         ! remainder is negative
 722         addcc   %o3,%o5,%o3
 723         b       9f
 724         add     %o2, (-3*2-1), %o2
 725
 726 L2.15:
 727         ! remainder is negative
 728         addcc   %o3,%o5,%o3
 729         ! depth 3, accumulated bits -3
 730         bl      L3.13
 731         srl     %o5,1,%o5
 732         ! remainder is positive
 733         subcc   %o3,%o5,%o3
 734         ! depth 4, accumulated bits -5
 735         bl      L4.11
 736         srl     %o5,1,%o5
 737         ! remainder is positive
 738         subcc   %o3,%o5,%o3
 739         b       9f
 740         add     %o2, (-5*2+1), %o2
 741
 742 L4.11:
 743         ! remainder is negative
 744         addcc   %o3,%o5,%o3
 745         b       9f
 746         add     %o2, (-5*2-1), %o2
 747
 748 L3.13:
 749         ! remainder is negative
 750         addcc   %o3,%o5,%o3
 751         ! depth 4, accumulated bits -7
 752         bl      L4.9
 753         srl     %o5,1,%o5
 754         ! remainder is positive
 755         subcc   %o3,%o5,%o3
 756         b       9f
 757         add     %o2, (-7*2+1), %o2
 758
 759 L4.9:
 760         ! remainder is negative
 761         addcc   %o3,%o5,%o3
 762         b       9f
 763         add     %o2, (-7*2-1), %o2
 764
 765         9:
 766 end_regular_divide:
 767         subcc   %o4, 1, %o4
 768         bge     divloop
 769         tst     %o3
 770         bl,a    got_result
 771         ! non-restoring fixup here (one instruction only!)
 772         add     %o3, %o1, %o3
 773
 774 got_result:
 775         ! check to see if answer should be < 0
 776         tst     %g3
 777         bl,a    1f
 778         sub %g0, %o3, %o3
 779 1:
 780         retl
 781         mov %o3, %o0
 782
 783 #endif
 784