sysdeps/sparc/sparc32/urem.S

   1    /* This file is generated from divrem.m4; DO NOT EDIT! */
   2 /*
   3  * Division and remainder, from Appendix E of the Sparc Version 8
   4  * Architecture Manual, with fixes from Gordon Irlam.
   5  */
   6
   7 /*
   8  * Input: dividend and divisor in %o0 and %o1 respectively.
   9  *
  10  * m4 parameters:
  11  *  .urem       name of function to generate
  12  *  rem         rem=div => %o0 / %o1; rem=rem => %o0 % %o1
  13  *  false               false=true => signed; false=false => unsigned
  14  *
  15  * Algorithm parameters:
  16  *  N           how many bits per iteration we try to get (4)
  17  *  WORDSIZE    total number of bits (32)
  18  *
  19  * Derived constants:
  20  *  TOPBITS     number of bits in the top decade of a number
  21  *
  22  * Important variables:
  23  *  Q           the partial quotient under development (initially 0)
  24  *  R           the remainder so far, initially the dividend
  25  *  ITER        number of main division loop iterations required;
  26  *              equal to ceil(log2(quotient) / N).  Note that this
  27  *              is the log base (2^N) of the quotient.
  28  *  V           the current comparand, initially divisor*2^(ITER*N-1)
  29  *
  30  * Cost:
  31  *  Current estimate for non-large dividend is
  32  *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
  33  *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
  34  *  different path, as the upper bits of the quotient must be developed
  35  *  one bit at a time.
  36  */
  37
  38
  39
  40 #include <sysdep.h>
  41 #include <sys/trap.h>
  42
  43 ENTRY(.urem)
  44
  45         ! Ready to divide.  Compute size of quotient; scale comparand.
  46         orcc    %o1, %g0, %o5
  47         bne     1f
  48         mov     %o0, %o3
  49
  50                 ! Divide by zero trap.  If it returns, return 0 (about as
  51                 ! wrong as possible, but that is what SunOS does...).
  52                 ta      ST_DIV0
  53                 retl
  54                 clr     %o0
  55
  56 1:
  57         cmp     %o3, %o5                        ! if %o1 exceeds %o0, done
  58         blu     LOC(got_result)         ! (and algorithm fails otherwise)
  59         clr     %o2
  60         sethi   %hi(1 << (32 - 4 - 1)), %g1
  61         cmp     %o3, %g1
  62         blu     LOC(not_really_big)
  63         clr     %o4
  64
  65         ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
  66         ! as our usual N-at-a-shot divide step will cause overflow and havoc.
  67         ! The number of bits in the result here is N*ITER+SC, where SC <= N.
  68         ! Compute ITER in an unorthodox manner: know we need to shift V into
  69         ! the top decade: so do not even bother to compare to R.
  70         1:
  71                 cmp     %o5, %g1
  72                 bgeu    3f
  73                 mov     1, %g2
  74                 sll     %o5, 4, %o5
  75                 b       1b
  76                 add     %o4, 1, %o4
  77
  78         ! Now compute %g2.
  79         2:      addcc   %o5, %o5, %o5
  80                 bcc     LOC(not_too_big)
  81                 add     %g2, 1, %g2
  82
  83                 ! We get here if the %o1 overflowed while shifting.
  84                 ! This means that %o3 has the high-order bit set.
  85                 ! Restore %o5 and subtract from %o3.
  86                 sll     %g1, 4, %g1     ! high order bit
  87                 srl     %o5, 1, %o5             ! rest of %o5
  88                 add     %o5, %g1, %o5
  89                 b       LOC(do_single_div)
  90                 sub     %g2, 1, %g2
  91
  92         LOC(not_too_big):
  93         3:      cmp     %o5, %o3
  94                 blu     2b
  95                 nop
  96                 be      LOC(do_single_div)
  97                 nop
  98         /* NB: these are commented out in the V8-Sparc manual as well */
  99         /* (I do not understand this) */
 100         ! %o5 > %o3: went too far: back up 1 step
 101         !       srl     %o5, 1, %o5
 102         !       dec     %g2
 103         ! do single-bit divide steps
 104         !
 105         ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
 106         ! first divide step without thinking.  BUT, the others are conditional,
 107         ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
 108         ! order bit set in the first step, just falling into the regular
 109         ! division loop will mess up the first time around.
 110         ! So we unroll slightly...
 111         LOC(do_single_div):
 112                 subcc   %g2, 1, %g2
 113                 bl      LOC(end_regular_divide)
 114                 nop
 115                 sub     %o3, %o5, %o3
 116                 mov     1, %o2
 117                 b       LOC(end_single_divloop)
 118                 nop
 119         LOC(single_divloop):
 120                 sll     %o2, 1, %o2
 121                 bl      1f
 122                 srl     %o5, 1, %o5
 123                 ! %o3 >= 0
 124                 sub     %o3, %o5, %o3
 125                 b       2f
 126                 add     %o2, 1, %o2
 127         1:      ! %o3 < 0
 128                 add     %o3, %o5, %o3
 129                 sub     %o2, 1, %o2
 130         2:
 131         LOC(end_single_divloop):
 132                 subcc   %g2, 1, %g2
 133                 bge     LOC(single_divloop)
 134                 tst     %o3
 135                 b,a     LOC(end_regular_divide)
 136
 137 LOC(not_really_big):
 138 1:
 139         sll     %o5, 4, %o5
 140         cmp     %o5, %o3
 141         bleu    1b
 142         addcc   %o4, 1, %o4
 143         be      LOC(got_result)
 144         sub     %o4, 1, %o4
 145
 146         tst     %o3     ! set up for initial iteration
 147 LOC(divloop):
 148         sll     %o2, 4, %o2
 149                 ! depth 1, accumulated bits 0
 150         bl      LOC(1.16)
 151         srl     %o5,1,%o5
 152         ! remainder is positive
 153         subcc   %o3,%o5,%o3
 154                         ! depth 2, accumulated bits 1
 155         bl      LOC(2.17)
 156         srl     %o5,1,%o5
 157         ! remainder is positive
 158         subcc   %o3,%o5,%o3
 159                         ! depth 3, accumulated bits 3
 160         bl      LOC(3.19)
 161         srl     %o5,1,%o5
 162         ! remainder is positive
 163         subcc   %o3,%o5,%o3
 164                         ! depth 4, accumulated bits 7
 165         bl      LOC(4.23)
 166         srl     %o5,1,%o5
 167         ! remainder is positive
 168         subcc   %o3,%o5,%o3
 169                 b       9f
 170                 add     %o2, (7*2+1), %o2
 171
 172 LOC(4.23):
 173         ! remainder is negative
 174         addcc   %o3,%o5,%o3
 175                 b       9f
 176                 add     %o2, (7*2-1), %o2
 177
 178
 179 LOC(3.19):
 180         ! remainder is negative
 181         addcc   %o3,%o5,%o3
 182                         ! depth 4, accumulated bits 5
 183         bl      LOC(4.21)
 184         srl     %o5,1,%o5
 185         ! remainder is positive
 186         subcc   %o3,%o5,%o3
 187                 b       9f
 188                 add     %o2, (5*2+1), %o2
 189
 190 LOC(4.21):
 191         ! remainder is negative
 192         addcc   %o3,%o5,%o3
 193                 b       9f
 194                 add     %o2, (5*2-1), %o2
 195
 196
 197
 198 LOC(2.17):
 199         ! remainder is negative
 200         addcc   %o3,%o5,%o3
 201                         ! depth 3, accumulated bits 1
 202         bl      LOC(3.17)
 203         srl     %o5,1,%o5
 204         ! remainder is positive
 205         subcc   %o3,%o5,%o3
 206                         ! depth 4, accumulated bits 3
 207         bl      LOC(4.19)
 208         srl     %o5,1,%o5
 209         ! remainder is positive
 210         subcc   %o3,%o5,%o3
 211                 b       9f
 212                 add     %o2, (3*2+1), %o2
 213
 214 LOC(4.19):
 215         ! remainder is negative
 216         addcc   %o3,%o5,%o3
 217                 b       9f
 218                 add     %o2, (3*2-1), %o2
 219
 220
 221 LOC(3.17):
 222         ! remainder is negative
 223         addcc   %o3,%o5,%o3
 224                         ! depth 4, accumulated bits 1
 225         bl      LOC(4.17)
 226         srl     %o5,1,%o5
 227         ! remainder is positive
 228         subcc   %o3,%o5,%o3
 229                 b       9f
 230                 add     %o2, (1*2+1), %o2
 231
 232 LOC(4.17):
 233         ! remainder is negative
 234         addcc   %o3,%o5,%o3
 235                 b       9f
 236                 add     %o2, (1*2-1), %o2
 237
 238
 239
 240
 241 LOC(1.16):
 242         ! remainder is negative
 243         addcc   %o3,%o5,%o3
 244                         ! depth 2, accumulated bits -1
 245         bl      LOC(2.15)
 246         srl     %o5,1,%o5
 247         ! remainder is positive
 248         subcc   %o3,%o5,%o3
 249                         ! depth 3, accumulated bits -1
 250         bl      LOC(3.15)
 251         srl     %o5,1,%o5
 252         ! remainder is positive
 253         subcc   %o3,%o5,%o3
 254                         ! depth 4, accumulated bits -1
 255         bl      LOC(4.15)
 256         srl     %o5,1,%o5
 257         ! remainder is positive
 258         subcc   %o3,%o5,%o3
 259                 b       9f
 260                 add     %o2, (-1*2+1), %o2
 261
 262 LOC(4.15):
 263         ! remainder is negative
 264         addcc   %o3,%o5,%o3
 265                 b       9f
 266                 add     %o2, (-1*2-1), %o2
 267
 268
 269 LOC(3.15):
 270         ! remainder is negative
 271         addcc   %o3,%o5,%o3
 272                         ! depth 4, accumulated bits -3
 273         bl      LOC(4.13)
 274         srl     %o5,1,%o5
 275         ! remainder is positive
 276         subcc   %o3,%o5,%o3
 277                 b       9f
 278                 add     %o2, (-3*2+1), %o2
 279
 280 LOC(4.13):
 281         ! remainder is negative
 282         addcc   %o3,%o5,%o3
 283                 b       9f
 284                 add     %o2, (-3*2-1), %o2
 285
 286
 287
 288 LOC(2.15):
 289         ! remainder is negative
 290         addcc   %o3,%o5,%o3
 291                         ! depth 3, accumulated bits -3
 292         bl      LOC(3.13)
 293         srl     %o5,1,%o5
 294         ! remainder is positive
 295         subcc   %o3,%o5,%o3
 296                         ! depth 4, accumulated bits -5
 297         bl      LOC(4.11)
 298         srl     %o5,1,%o5
 299         ! remainder is positive
 300         subcc   %o3,%o5,%o3
 301                 b       9f
 302                 add     %o2, (-5*2+1), %o2
 303
 304 LOC(4.11):
 305         ! remainder is negative
 306         addcc   %o3,%o5,%o3
 307                 b       9f
 308                 add     %o2, (-5*2-1), %o2
 309
 310
 311 LOC(3.13):
 312         ! remainder is negative
 313         addcc   %o3,%o5,%o3
 314                         ! depth 4, accumulated bits -7
 315         bl      LOC(4.9)
 316         srl     %o5,1,%o5
 317         ! remainder is positive
 318         subcc   %o3,%o5,%o3
 319                 b       9f
 320                 add     %o2, (-7*2+1), %o2
 321
 322 LOC(4.9):
 323         ! remainder is negative
 324         addcc   %o3,%o5,%o3
 325                 b       9f
 326                 add     %o2, (-7*2-1), %o2
 327
 328
 329
 330
 331         9:
 332 LOC(end_regular_divide):
 333         subcc   %o4, 1, %o4
 334         bge     LOC(divloop)
 335         tst     %o3
 336         bl,a    LOC(got_result)
 337         ! non-restoring fixup here (one instruction only!)
 338         add     %o3, %o1, %o3
 339
 340
 341 LOC(got_result):
 342
 343         retl
 344         mov %o3, %o0
 345
 346 END(.urem)