ports/sysdeps/alpha/divqu.S

   1 /* Copyright (C) 2004-2014 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library.  If not, see
  16    <http://www.gnu.org/licenses/>.  */
  17
  18 #include "div_libc.h"
  19
  20
  21 /* 64-bit unsigned long divide.  These are not normal C functions.  Argument
  22    registers are t10 and t11, the result goes in t12.  Only t12 and AT may be
  23    clobbered.
  24
  25    Theory of operation here is that we can use the FPU divider for virtually
  26    all operands that we see: all dividend values between -2**53 and 2**53-1
  27    can be computed directly.  Note that divisor values need not be checked
  28    against that range because the rounded fp value will be close enough such
  29    that the quotient is < 1, which will properly be truncated to zero when we
  30    convert back to integer.
  31
  32    When the dividend is outside the range for which we can compute exact
  33    results, we use the fp quotent as an estimate from which we begin refining
  34    an exact integral value.  This reduces the number of iterations in the
  35    shift-and-subtract loop significantly.
  36
  37    The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
  38    for cvttq/c even without /sui being set.  It will not, however, properly
  39    raise the exception, so we don't have to worry about FPCR_INED being clear
  40    and so dying by SIGFPE.  */
  41
  42         .text
  43         .align  4
  44         .globl  __divqu
  45         .type   __divqu, @funcnoplt
  46         .usepv  __divqu, no
  47
  48         cfi_startproc
  49         cfi_return_column (RA)
  50 __divqu:
  51         lda     sp, -FRAME(sp)
  52         cfi_def_cfa_offset (FRAME)
  53         CALL_MCOUNT
  54
  55         /* Get the fp divide insn issued as quickly as possible.  After
  56            that's done, we have at least 22 cycles until its results are
  57            ready -- all the time in the world to figure out how we're
  58            going to use the results.  */
  59         stt     $f0, 0(sp)
  60         excb
  61         beq     Y, DIVBYZERO
  62
  63         stt     $f1, 8(sp)
  64         stt     $f3, 48(sp)
  65         cfi_rel_offset ($f0, 0)
  66         cfi_rel_offset ($f1, 8)
  67         cfi_rel_offset ($f3, 48)
  68         mf_fpcr $f3
  69
  70         _ITOFT2 X, $f0, 16, Y, $f1, 24
  71         cvtqt   $f0, $f0
  72         cvtqt   $f1, $f1
  73         blt     X, $x_is_neg
  74         divt/c  $f0, $f1, $f0
  75
  76         /* Check to see if Y was mis-converted as signed value.  */
  77         ldt     $f1, 8(sp)
  78         blt     Y, $y_is_neg
  79
  80         /* Check to see if X fit in the double as an exact value.  */
  81         srl     X, 53, AT
  82         bne     AT, $x_big
  83
  84         /* If we get here, we're expecting exact results from the division.
  85            Do nothing else besides convert and clean up.  */
  86         cvttq/c $f0, $f0
  87         excb
  88         mt_fpcr $f3
  89         _FTOIT  $f0, RV, 16
  90
  91         ldt     $f0, 0(sp)
  92         ldt     $f3, 48(sp)
  93         cfi_remember_state
  94         cfi_restore ($f0)
  95         cfi_restore ($f1)
  96         cfi_restore ($f3)
  97         cfi_def_cfa_offset (0)
  98         lda     sp, FRAME(sp)
  99         ret     $31, (RA), 1
 100
 101         .align  4
 102         cfi_restore_state
 103 $x_is_neg:
 104         /* If we get here, X is so big that bit 63 is set, which made the
 105            conversion come out negative.  Fix it up lest we not even get
 106            a good estimate.  */
 107         ldah    AT, 0x5f80              /* 2**64 as float.  */
 108         stt     $f2, 24(sp)
 109         cfi_rel_offset ($f2, 24)
 110         _ITOFS  AT, $f2, 16
 111
 112         .align  4
 113         addt    $f0, $f2, $f0
 114         unop
 115         divt/c  $f0, $f1, $f0
 116         unop
 117
 118         /* Ok, we've now the divide issued.  Continue with other checks.  */
 119         ldt     $f1, 8(sp)
 120         unop
 121         ldt     $f2, 24(sp)
 122         blt     Y, $y_is_neg
 123         cfi_restore ($f1)
 124         cfi_restore ($f2)
 125         cfi_remember_state      /* for y_is_neg */
 126
 127         .align  4
 128 $x_big:
 129         /* If we get here, X is large enough that we don't expect exact
 130            results, and neither X nor Y got mis-translated for the fp
 131            division.  Our task is to take the fp result, figure out how
 132            far it's off from the correct result and compute a fixup.  */
 133         stq     t0, 16(sp)
 134         stq     t1, 24(sp)
 135         stq     t2, 32(sp)
 136         stq     t3, 40(sp)
 137         cfi_rel_offset (t0, 16)
 138         cfi_rel_offset (t1, 24)
 139         cfi_rel_offset (t2, 32)
 140         cfi_rel_offset (t3, 40)
 141
 142 #define Q       RV              /* quotient */
 143 #define R       t0              /* remainder */
 144 #define SY      t1              /* scaled Y */
 145 #define S       t2              /* scalar */
 146 #define QY      t3              /* Q*Y */
 147
 148         cvttq/c $f0, $f0
 149         _FTOIT  $f0, Q, 8
 150         mulq    Q, Y, QY
 151
 152         .align  4
 153         stq     t4, 8(sp)
 154         excb
 155         ldt     $f0, 0(sp)
 156         mt_fpcr $f3
 157         cfi_rel_offset (t4, 8)
 158         cfi_restore ($f0)
 159
 160         subq    QY, X, R
 161         mov     Y, SY
 162         mov     1, S
 163         bgt     R, $q_high
 164
 165 $q_high_ret:
 166         subq    X, QY, R
 167         mov     Y, SY
 168         mov     1, S
 169         bgt     R, $q_low
 170
 171 $q_low_ret:
 172         ldq     t4, 8(sp)
 173         ldq     t0, 16(sp)
 174         ldq     t1, 24(sp)
 175         ldq     t2, 32(sp)
 176
 177         ldq     t3, 40(sp)
 178         ldt     $f3, 48(sp)
 179         lda     sp, FRAME(sp)
 180         cfi_remember_state
 181         cfi_restore (t0)
 182         cfi_restore (t1)
 183         cfi_restore (t2)
 184         cfi_restore (t3)
 185         cfi_restore (t4)
 186         cfi_restore ($f3)
 187         cfi_def_cfa_offset (0)
 188         ret     $31, (RA), 1
 189
 190         .align  4
 191         cfi_restore_state
 192         /* The quotient that we computed was too large.  We need to reduce
 193            it by S such that Y*S >= R.  Obviously the closer we get to the
 194            correct value the better, but overshooting high is ok, as we'll
 195            fix that up later.  */
 196 0:
 197         addq    SY, SY, SY
 198         addq    S, S, S
 199 $q_high:
 200         cmpult  SY, R, AT
 201         bne     AT, 0b
 202
 203         subq    Q, S, Q
 204         unop
 205         subq    QY, SY, QY
 206         br      $q_high_ret
 207
 208         .align  4
 209         /* The quotient that we computed was too small.  Divide Y by the
 210            current remainder (R) and add that to the existing quotient (Q).
 211            The expectation, of course, is that R is much smaller than X.  */
 212         /* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We
 213            already have a copy of Y in SY and the value 1 in S.  */
 214 0:
 215         addq    SY, SY, SY
 216         addq    S, S, S
 217 $q_low:
 218         cmpult  SY, R, AT
 219         bne     AT, 0b
 220
 221         /* Shift-down and subtract loop.  Each iteration compares our scaled
 222            Y (SY) with the remainder (R); if SY <= R then X is divisible by
 223            Y's scalar (S) so add it to the quotient (Q).  */
 224 2:      addq    Q, S, t3
 225         srl     S, 1, S
 226         cmpule  SY, R, AT
 227         subq    R, SY, t4
 228
 229         cmovne  AT, t3, Q
 230         cmovne  AT, t4, R
 231         srl     SY, 1, SY
 232         bne     S, 2b
 233
 234         br      $q_low_ret
 235
 236         .align  4
 237         cfi_restore_state
 238 $y_is_neg:
 239         /* If we get here, Y is so big that bit 63 is set.  The results
 240            from the divide will be completely wrong.  Fortunately, the
 241            quotient must be either 0 or 1, so just compute it directly.  */
 242         cmpule  Y, X, RV
 243         excb
 244         mt_fpcr $f3
 245         ldt     $f0, 0(sp)
 246         ldt     $f3, 48(sp)
 247         lda     sp, FRAME(sp)
 248         cfi_restore ($f0)
 249         cfi_restore ($f3)
 250         cfi_def_cfa_offset (0)
 251         ret     $31, (RA), 1
 252
 253         cfi_endproc
 254         .size   __divqu, .-__divqu
 255
 256         DO_DIVBYZERO