source/libs/gmp/gmp-src/mpn/sparc64/mod_1.c

   1 /* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder.
   2
   3 Copyright 1991, 1993, 1994, 1999-2001, 2003, 2010 Free Software Foundation,
   4 Inc.
   5
   6 This file is part of the GNU MP Library.
   7
   8 The GNU MP Library is free software; you can redistribute it and/or modify
   9 it under the terms of either:
  10
  11   * the GNU Lesser General Public License as published by the Free
  12     Software Foundation; either version 3 of the License, or (at your
  13     option) any later version.
  14
  15 or
  16
  17   * the GNU General Public License as published by the Free Software
  18     Foundation; either version 2 of the License, or (at your option) any
  19     later version.
  20
  21 or both in parallel, as here.
  22
  23 The GNU MP Library is distributed in the hope that it will be useful, but
  24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  26 for more details.
  27
  28 You should have received copies of the GNU General Public License and the
  29 GNU Lesser General Public License along with the GNU MP Library.  If not,
  30 see https://www.gnu.org/licenses/.  */
  31
  32 #include "gmp.h"
  33 #include "gmp-impl.h"
  34 #include "longlong.h"
  35
  36 #include "mpn/sparc64/sparc64.h"
  37
  38
  39 /*                 64-bit divisor   32-bit divisor
  40                     cycles/limb      cycles/limb
  41                      (approx)         (approx)
  42    Ultrasparc 2i:      160               120
  43 */
  44
  45
  46 /* 32-bit divisors are treated in special case code.  This requires 4 mulx
  47    per limb instead of 8 in the general case.
  48
  49    For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
  50    addressing, to get the two halves of each limb read in the correct order.
  51    This is kept in an adj variable.  Doing that measures about 6 c/l faster
  52    than just writing HALF_ENDIAN_ADJ(i) in the loop.  The latter shouldn't
  53    be 6 cycles worth of work, but perhaps it doesn't schedule well (on gcc
  54    3.2.1 at least).
  55
  56    A simple udivx/umulx loop for the 32-bit case was attempted for small
  57    sizes, but at size==2 it was only about the same speed and at size==3 was
  58    slower.  */
  59
  60 static mp_limb_t
  61 mpn_mod_1_anynorm (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
  62 {
  63   int        norm, norm_rshift;
  64   mp_limb_t  src_high_limb;
  65   mp_size_t  i;
  66
  67   ASSERT (size_limbs >= 0);
  68   ASSERT (d_limb != 0);
  69
  70   if (UNLIKELY (size_limbs == 0))
  71     return 0;
  72
  73   src_high_limb = src_limbptr[size_limbs-1];
  74
  75   /* udivx is good for size==1, and no need to bother checking limb<divisor,
  76      since if that's likely the caller should check */
  77   if (UNLIKELY (size_limbs == 1))
  78     return src_high_limb % d_limb;
  79
  80   if (d_limb <= CNST_LIMB(0xFFFFFFFF))
  81     {
  82       unsigned   *src, n1, n0, r, dummy_q, nshift, norm_rmask;
  83       mp_size_t  size, adj;
  84       mp_limb_t  dinv_limb;
  85
  86       size = 2 * size_limbs;    /* halfwords */
  87       src = (unsigned *) src_limbptr;
  88
  89       /* prospective initial remainder, if < d */
  90       r = src_high_limb >> 32;
  91
  92       /* If the length of the source is uniformly distributed, then there's
  93          a 50% chance of the high 32-bits being zero, which we can skip.  */
  94       if (r == 0)
  95         {
  96           r = (unsigned) src_high_limb;
  97           size--;
  98           ASSERT (size > 0);  /* because always even */
  99         }
 100
 101       /* Skip a division if high < divisor.  Having the test here before
 102          normalizing will still skip as often as possible.  */
 103       if (r < d_limb)
 104         {
 105           size--;
 106           ASSERT (size > 0);  /* because size==1 handled above */
 107         }
 108       else
 109         r = 0;
 110
 111       count_leading_zeros_32 (norm, d_limb);
 112       norm -= 32;
 113       d_limb <<= norm;
 114
 115       norm_rshift = 32 - norm;
 116       norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
 117       i = size-1;
 118       adj = HALF_ENDIAN_ADJ (i);
 119       n1 = src [i + adj];
 120       r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);
 121
 122       invert_half_limb (dinv_limb, d_limb);
 123       adj = -adj;
 124
 125       for (i--; i >= 0; i--)
 126         {
 127           n0 = src [i + adj];
 128           adj = -adj;
 129           nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
 130           udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);
 131           n1 = n0;
 132         }
 133
 134       /* same as loop, but without n0 */
 135       nshift = n1 << norm;
 136       udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);
 137
 138       ASSERT ((r & ((1 << norm) - 1)) == 0);
 139       return r >> norm;
 140     }
 141   else
 142     {
 143       mp_srcptr  src;
 144       mp_size_t  size;
 145       mp_limb_t  n1, n0, r, dinv, dummy_q, nshift, norm_rmask;
 146
 147       src = src_limbptr;
 148       size = size_limbs;
 149       r = src_high_limb;  /* initial remainder */
 150
 151       /* Skip a division if high < divisor.  Having the test here before
 152          normalizing will still skip as often as possible.  */
 153       if (r < d_limb)
 154         {
 155           size--;
 156           ASSERT (size > 0);  /* because size==1 handled above */
 157         }
 158       else
 159         r = 0;
 160
 161       count_leading_zeros (norm, d_limb);
 162       d_limb <<= norm;
 163
 164       norm_rshift = GMP_LIMB_BITS - norm;
 165       norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
 166
 167       src += size;
 168       n1 = *--src;
 169       r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);
 170
 171       invert_limb (dinv, d_limb);
 172
 173       for (i = size-2; i >= 0; i--)
 174         {
 175           n0 = *--src;
 176           nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
 177           udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);
 178           n1 = n0;
 179         }
 180
 181       /* same as loop, but without n0 */
 182       nshift = n1 << norm;
 183       udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);
 184
 185       ASSERT ((r & ((CNST_LIMB(1) << norm) - 1)) == 0);
 186       return r >> norm;
 187     }
 188 }
 189
 190 mp_limb_t
 191 mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
 192 {
 193   ASSERT (n >= 0);
 194   ASSERT (b != 0);
 195
 196   /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
 197      required by mpz/fdiv_r_ui.c and possibly other places.  */
 198   if (n == 0)
 199     return 0;
 200
 201   if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
 202     {
 203       if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
 204         {
 205           return mpn_mod_1_anynorm (ap, n, b);
 206         }
 207       else
 208         {
 209           mp_limb_t pre[4];
 210           mpn_mod_1_1p_cps (pre, b);
 211           return mpn_mod_1_1p (ap, n, b, pre);
 212         }
 213     }
 214   else
 215     {
 216       if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
 217         {
 218           return mpn_mod_1_anynorm (ap, n, b);
 219         }
 220       else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
 221         {
 222           mp_limb_t pre[4];
 223           mpn_mod_1_1p_cps (pre, b);
 224           return mpn_mod_1_1p (ap, n, b << pre[1], pre);
 225         }
 226       else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
 227         {
 228           mp_limb_t pre[5];
 229           mpn_mod_1s_2p_cps (pre, b);
 230           return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
 231         }
 232       else
 233         {
 234           mp_limb_t pre[7];
 235           mpn_mod_1s_4p_cps (pre, b);
 236           return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
 237         }
 238     }
 239 }