bignum.c

   1 /**********************************************************************
   2
   3   bignum.c -
   4
   5   $Author$
   6   created at: Fri Jun 10 00:48:55 JST 1994
   7
   8   Copyright (C) 1993-2007 Yukihiro Matsumoto
   9
  10 **********************************************************************/
  11
  12 #include "ruby/internal/config.h"
  13
  14 #include <ctype.h>
  15 #include <float.h>
  16 #include <math.h>
  17
  18 #ifdef HAVE_STRINGS_H
  19 # include <strings.h>
  20 #endif
  21
  22 #ifdef HAVE_IEEEFP_H
  23 # include <ieeefp.h>
  24 #endif
  25
  26 #if !defined(USE_GMP)
  27 #if defined(HAVE_LIBGMP) && defined(HAVE_GMP_H)
  28 # define USE_GMP 1
  29 #else
  30 # define USE_GMP 0
  31 #endif
  32 #endif
  33
  34 #include "id.h"
  35 #include "internal.h"
  36 #include "internal/bignum.h"
  37 #include "internal/complex.h"
  38 #include "internal/gc.h"
  39 #include "internal/numeric.h"
  40 #include "internal/object.h"
  41 #include "internal/sanitizers.h"
  42 #include "internal/variable.h"
  43 #include "internal/warnings.h"
  44 #include "ruby/thread.h"
  45 #include "ruby/util.h"
  46 #include "ruby_assert.h"
  47
  48 #if USE_GMP
  49 RBIMPL_WARNING_PUSH()
  50 # ifdef _MSC_VER
  51 RBIMPL_WARNING_IGNORED(4146) /* for mpn_neg() */
  52 # endif
  53 # include <gmp.h>
  54 RBIMPL_WARNING_POP()
  55 #endif
  56
  57 static const bool debug_integer_pack = (
  58 #ifdef DEBUG_INTEGER_PACK
  59     DEBUG_INTEGER_PACK+0
  60 #else
  61     RUBY_DEBUG
  62 #endif
  63     ) != 0;
  64
  65 const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
  66
  67 #ifndef SIZEOF_BDIGIT_DBL
  68 # if SIZEOF_INT*2 <= SIZEOF_LONG_LONG
  69 #  define SIZEOF_BDIGIT_DBL SIZEOF_LONG_LONG
  70 # else
  71 #  define SIZEOF_BDIGIT_DBL SIZEOF_LONG
  72 # endif
  73 #endif
  74
  75 STATIC_ASSERT(sizeof_bdigit_dbl, sizeof(BDIGIT_DBL) == SIZEOF_BDIGIT_DBL);
  76 STATIC_ASSERT(sizeof_bdigit_dbl_signed, sizeof(BDIGIT_DBL_SIGNED) == SIZEOF_BDIGIT_DBL);
  77 STATIC_ASSERT(sizeof_bdigit, SIZEOF_BDIGIT <= sizeof(BDIGIT));
  78 STATIC_ASSERT(sizeof_bdigit_and_dbl, SIZEOF_BDIGIT*2 <= SIZEOF_BDIGIT_DBL);
  79 STATIC_ASSERT(bdigit_signedness, 0 < (BDIGIT)-1);
  80 STATIC_ASSERT(bdigit_dbl_signedness, 0 < (BDIGIT_DBL)-1);
  81 STATIC_ASSERT(bdigit_dbl_signed_signedness, 0 > (BDIGIT_DBL_SIGNED)-1);
  82 STATIC_ASSERT(rbignum_embed_len_max, BIGNUM_EMBED_LEN_MAX <= (BIGNUM_EMBED_LEN_MASK >> BIGNUM_EMBED_LEN_SHIFT));
  83
  84 #if SIZEOF_BDIGIT < SIZEOF_LONG
  85 STATIC_ASSERT(sizeof_long_and_sizeof_bdigit, SIZEOF_LONG % SIZEOF_BDIGIT == 0);
  86 #else
  87 STATIC_ASSERT(sizeof_long_and_sizeof_bdigit, SIZEOF_BDIGIT % SIZEOF_LONG == 0);
  88 #endif
  89
  90 #ifdef WORDS_BIGENDIAN
  91 #   define HOST_BIGENDIAN_P 1
  92 #else
  93 #   define HOST_BIGENDIAN_P 0
  94 #endif
  95 /* (!LSHIFTABLE(d, n) ? 0 : (n)) is the same as n but suppress a warning, C4293, by Visual Studio.  */
  96 #define LSHIFTABLE(d, n) ((n) < sizeof(d) * CHAR_BIT)
  97 #define LSHIFTX(d, n) (!LSHIFTABLE(d, n) ? 0 : ((d) << (!LSHIFTABLE(d, n) ? 0 : (n))))
  98 #define CLEAR_LOWBITS(d, numbits) ((d) & LSHIFTX(~((d)*0), (numbits)))
  99 #define FILL_LOWBITS(d, numbits) ((d) | (LSHIFTX(((d)*0+1), (numbits))-1))
 100 #define POW2_P(x) (((x)&((x)-1))==0)
 101
 102 #define BDIGITS(x) (BIGNUM_DIGITS(x))
 103 #define BITSPERDIG (SIZEOF_BDIGIT*CHAR_BIT)
 104 #define BIGRAD ((BDIGIT_DBL)1 << BITSPERDIG)
 105 #define BIGRAD_HALF ((BDIGIT)(BIGRAD >> 1))
 106 #define BDIGIT_MSB(d) (((d) & BIGRAD_HALF) != 0)
 107 #define BIGUP(x) LSHIFTX(((x) + (BDIGIT_DBL)0), BITSPERDIG)
 108 #define BIGDN(x) RSHIFT((x),BITSPERDIG)
 109 #define BIGLO(x) ((BDIGIT)((x) & BDIGMAX))
 110 #define BDIGMAX ((BDIGIT)(BIGRAD-1))
 111 #define BDIGIT_DBL_MAX (~(BDIGIT_DBL)0)
 112
 113 #if SIZEOF_BDIGIT == 2
 114 #   define swap_bdigit(x) swap16(x)
 115 #elif SIZEOF_BDIGIT == 4
 116 #   define swap_bdigit(x) swap32(x)
 117 #elif SIZEOF_BDIGIT == 8
 118 #   define swap_bdigit(x) swap64(x)
 119 #endif
 120
 121 #define BIGZEROP(x) (BIGNUM_LEN(x) == 0 || \
 122                      (BDIGITS(x)[0] == 0 && \
 123                       (BIGNUM_LEN(x) == 1 || bigzero_p(x))))
 124 #define BIGSIZE(x) (BIGNUM_LEN(x) == 0 ? (size_t)0 : \
 125     BDIGITS(x)[BIGNUM_LEN(x)-1] ? \
 126         (size_t)(BIGNUM_LEN(x)*SIZEOF_BDIGIT - nlz(BDIGITS(x)[BIGNUM_LEN(x)-1])/CHAR_BIT) : \
 127     rb_absint_size(x, NULL))
 128
 129 #define BIGDIVREM_EXTRA_WORDS 1
 130 #define bdigit_roomof(n) roomof(n, SIZEOF_BDIGIT)
 131 #define BARY_ARGS(ary) ary, numberof(ary)
 132
 133 #define BARY_ADD(z, x, y) bary_add(BARY_ARGS(z), BARY_ARGS(x), BARY_ARGS(y))
 134 #define BARY_SUB(z, x, y) bary_sub(BARY_ARGS(z), BARY_ARGS(x), BARY_ARGS(y))
 135 #define BARY_SHORT_MUL(z, x, y) bary_short_mul(BARY_ARGS(z), BARY_ARGS(x), BARY_ARGS(y))
 136 #define BARY_DIVMOD(q, r, x, y) bary_divmod(BARY_ARGS(q), BARY_ARGS(r), BARY_ARGS(x), BARY_ARGS(y))
 137 #define BARY_ZERO_P(x) bary_zero_p(BARY_ARGS(x))
 138
 139 #define BIGNUM_SET_NEGATIVE_SIGN(b) BIGNUM_SET_SIGN(b, 0)
 140 #define BIGNUM_SET_POSITIVE_SIGN(b) BIGNUM_SET_SIGN(b, 1)
 141
 142 #define bignew(len,sign) bignew_1(rb_cInteger,(len),(sign))
 143
 144 #define BDIGITS_ZERO(ptr, n) do { \
 145   BDIGIT *bdigitz_zero_ptr = (ptr); \
 146   size_t bdigitz_zero_n = (n); \
 147   while (bdigitz_zero_n) { \
 148     *bdigitz_zero_ptr++ = 0; \
 149     bdigitz_zero_n--; \
 150   } \
 151 } while (0)
 152
 153 #define BARY_TRUNC(ds, n) do { \
 154         while (0 < (n) && (ds)[(n)-1] == 0) \
 155             (n)--; \
 156     } while (0)
 157
 158 #define KARATSUBA_BALANCED(xn, yn) ((yn)/2 < (xn))
 159 #define TOOM3_BALANCED(xn, yn) (((yn)+2)/3 * 2 < (xn))
 160
 161 #define GMP_MUL_DIGITS 20
 162 #define KARATSUBA_MUL_DIGITS 70
 163 #define TOOM3_MUL_DIGITS 150
 164
 165 #define GMP_DIV_DIGITS 20
 166 #define GMP_BIG2STR_DIGITS 20
 167 #define GMP_STR2BIG_DIGITS 20
 168 #if USE_GMP
 169 # define NAIVE_MUL_DIGITS GMP_MUL_DIGITS
 170 #else
 171 # define NAIVE_MUL_DIGITS KARATSUBA_MUL_DIGITS
 172 #endif
 173
 174 typedef void (mulfunc_t)(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn);
 175
 176 static mulfunc_t bary_mul_toom3_start;
 177 static mulfunc_t bary_mul_karatsuba_start;
 178 static BDIGIT bigdivrem_single(BDIGIT *qds, const BDIGIT *xds, size_t xn, BDIGIT y);
 179
 180 static VALUE bignew_1(VALUE klass, size_t len, int sign);
 181 static inline VALUE bigtrunc(VALUE x);
 182
 183 static VALUE bigsq(VALUE x);
 184 static inline VALUE power_cache_get_power(int base, int power_level, size_t *numdigits_ret);
 185
 186 #if SIZEOF_BDIGIT <= SIZEOF_INT
 187 static int nlz(BDIGIT x) { return nlz_int((unsigned int)x) - (SIZEOF_INT-SIZEOF_BDIGIT) * CHAR_BIT; }
 188 #elif SIZEOF_BDIGIT <= SIZEOF_LONG
 189 static int nlz(BDIGIT x) { return nlz_long((unsigned long)x) - (SIZEOF_LONG-SIZEOF_BDIGIT) * CHAR_BIT; }
 190 #elif SIZEOF_BDIGIT <= SIZEOF_LONG_LONG
 191 static int nlz(BDIGIT x) { return nlz_long_long((unsigned LONG_LONG)x) - (SIZEOF_LONG_LONG-SIZEOF_BDIGIT) * CHAR_BIT; }
 192 #elif SIZEOF_BDIGIT <= SIZEOF_INT128_T
 193 static int nlz(BDIGIT x) { return nlz_int128((uint128_t)x) - (SIZEOF_INT128_T-SIZEOF_BDIGIT) * CHAR_BIT; }
 194 #endif
 195
 196 #define U16(a) ((uint16_t)(a))
 197 #define U32(a) ((uint32_t)(a))
 198 #ifdef HAVE_UINT64_T
 199 #define U64(a,b) (((uint64_t)(a) << 32) | (b))
 200 #endif
 201 #ifdef HAVE_UINT128_T
 202 #define U128(a,b,c,d) (((uint128_t)U64(a,b) << 64) | U64(c,d))
 203 #endif
 204
 205 /* The following script, maxpow.rb, generates the tables follows.
 206
 207 def big(n, bits)
 208   ns = []
 209   ((bits+31)/32).times {
 210     ns << sprintf("0x%08x", n & 0xffff_ffff)
 211     n >>= 32
 212   }
 213   "U#{bits}(" + ns.reverse.join(",") + ")"
 214 end
 215 def values(ary, width, indent)
 216   lines = [""]
 217   ary.each {|e|
 218     lines << "" if !ary.last.empty? && width < (lines.last + e + ", ").length
 219     lines.last << e + ", "
 220   }
 221   lines.map {|line| " " * indent + line.chomp(" ") + "\n" }.join
 222 end
 223 [16,32,64,128].each {|bits|
 224   max = 2**bits-1
 225   exps = []
 226   nums = []
 227   2.upto(36) {|base|
 228     exp = 0
 229     n = 1
 230     while n * base <= max
 231       exp += 1
 232       n *= base
 233     end
 234     exps << exp.to_s
 235     nums << big(n, bits)
 236   }
 237   puts "#ifdef HAVE_UINT#{bits}_T"
 238   puts "static const int maxpow#{bits}_exp[35] = {"
 239   print values(exps, 70, 4)
 240   puts "};"
 241   puts "static const uint#{bits}_t maxpow#{bits}_num[35] = {"
 242   print values(nums, 70, 4)
 243   puts "};"
 244   puts "#endif"
 245 }
 246
 247  */
 248
 249 #if SIZEOF_BDIGIT_DBL == 2
 250 static const int maxpow16_exp[35] = {
 251     15, 10, 7, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3,
 252     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 253 };
 254 static const uint16_t maxpow16_num[35] = {
 255     U16(0x00008000), U16(0x0000e6a9), U16(0x00004000), U16(0x00003d09),
 256     U16(0x0000b640), U16(0x000041a7), U16(0x00008000), U16(0x0000e6a9),
 257     U16(0x00002710), U16(0x00003931), U16(0x00005100), U16(0x00006f91),
 258     U16(0x00009610), U16(0x0000c5c1), U16(0x00001000), U16(0x00001331),
 259     U16(0x000016c8), U16(0x00001acb), U16(0x00001f40), U16(0x0000242d),
 260     U16(0x00002998), U16(0x00002f87), U16(0x00003600), U16(0x00003d09),
 261     U16(0x000044a8), U16(0x00004ce3), U16(0x000055c0), U16(0x00005f45),
 262     U16(0x00006978), U16(0x0000745f), U16(0x00008000), U16(0x00008c61),
 263     U16(0x00009988), U16(0x0000a77b), U16(0x0000b640),
 264 };
 265 #elif SIZEOF_BDIGIT_DBL == 4
 266 static const int maxpow32_exp[35] = {
 267     31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7,
 268     7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 269 };
 270 static const uint32_t maxpow32_num[35] = {
 271     U32(0x80000000), U32(0xcfd41b91), U32(0x40000000), U32(0x48c27395),
 272     U32(0x81bf1000), U32(0x75db9c97), U32(0x40000000), U32(0xcfd41b91),
 273     U32(0x3b9aca00), U32(0x8c8b6d2b), U32(0x19a10000), U32(0x309f1021),
 274     U32(0x57f6c100), U32(0x98c29b81), U32(0x10000000), U32(0x18754571),
 275     U32(0x247dbc80), U32(0x3547667b), U32(0x4c4b4000), U32(0x6b5a6e1d),
 276     U32(0x94ace180), U32(0xcaf18367), U32(0x0b640000), U32(0x0e8d4a51),
 277     U32(0x1269ae40), U32(0x17179149), U32(0x1cb91000), U32(0x23744899),
 278     U32(0x2b73a840), U32(0x34e63b41), U32(0x40000000), U32(0x4cfa3cc1),
 279     U32(0x5c13d840), U32(0x6d91b519), U32(0x81bf1000),
 280 };
 281 #elif SIZEOF_BDIGIT_DBL == 8 && defined HAVE_UINT64_T
 282 static const int maxpow64_exp[35] = {
 283     63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 17, 17, 16, 16, 15, 15, 15,
 284     15, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12,
 285     12,
 286 };
 287 static const uint64_t maxpow64_num[35] = {
 288     U64(0x80000000,0x00000000), U64(0xa8b8b452,0x291fe821),
 289     U64(0x40000000,0x00000000), U64(0x6765c793,0xfa10079d),
 290     U64(0x41c21cb8,0xe1000000), U64(0x36427987,0x50226111),
 291     U64(0x80000000,0x00000000), U64(0xa8b8b452,0x291fe821),
 292     U64(0x8ac72304,0x89e80000), U64(0x4d28cb56,0xc33fa539),
 293     U64(0x1eca170c,0x00000000), U64(0x780c7372,0x621bd74d),
 294     U64(0x1e39a505,0x7d810000), U64(0x5b27ac99,0x3df97701),
 295     U64(0x10000000,0x00000000), U64(0x27b95e99,0x7e21d9f1),
 296     U64(0x5da0e1e5,0x3c5c8000), U64(0xd2ae3299,0xc1c4aedb),
 297     U64(0x16bcc41e,0x90000000), U64(0x2d04b7fd,0xd9c0ef49),
 298     U64(0x5658597b,0xcaa24000), U64(0xa0e20737,0x37609371),
 299     U64(0x0c29e980,0x00000000), U64(0x14adf4b7,0x320334b9),
 300     U64(0x226ed364,0x78bfa000), U64(0x383d9170,0xb85ff80b),
 301     U64(0x5a3c23e3,0x9c000000), U64(0x8e651373,0x88122bcd),
 302     U64(0xdd41bb36,0xd259e000), U64(0x0aee5720,0xee830681),
 303     U64(0x10000000,0x00000000), U64(0x172588ad,0x4f5f0981),
 304     U64(0x211e44f7,0xd02c1000), U64(0x2ee56725,0xf06e5c71),
 305     U64(0x41c21cb8,0xe1000000),
 306 };
 307 #elif SIZEOF_BDIGIT_DBL == 16 && defined HAVE_UINT128_T
 308 static const int maxpow128_exp[35] = {
 309     127, 80, 63, 55, 49, 45, 42, 40, 38, 37, 35, 34, 33, 32, 31, 31, 30,
 310     30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 25, 25, 25, 24,
 311     24,
 312 };
 313 static const uint128_t maxpow128_num[35] = {
 314     U128(0x80000000,0x00000000,0x00000000,0x00000000),
 315     U128(0x6f32f1ef,0x8b18a2bc,0x3cea5978,0x9c79d441),
 316     U128(0x40000000,0x00000000,0x00000000,0x00000000),
 317     U128(0xd0cf4b50,0xcfe20765,0xfff4b4e3,0xf741cf6d),
 318     U128(0x6558e2a0,0x921fe069,0x42860000,0x00000000),
 319     U128(0x5080c7b7,0xd0e31ba7,0x5911a67d,0xdd3d35e7),
 320     U128(0x40000000,0x00000000,0x00000000,0x00000000),
 321     U128(0x6f32f1ef,0x8b18a2bc,0x3cea5978,0x9c79d441),
 322     U128(0x4b3b4ca8,0x5a86c47a,0x098a2240,0x00000000),
 323     U128(0xffd1390a,0x0adc2fb8,0xdabbb817,0x4d95c99b),
 324     U128(0x2c6fdb36,0x4c25e6c0,0x00000000,0x00000000),
 325     U128(0x384bacd6,0x42c343b4,0xe90c4272,0x13506d29),
 326     U128(0x31f5db32,0xa34aced6,0x0bf13a0e,0x00000000),
 327     U128(0x20753ada,0xfd1e839f,0x53686d01,0x3143ee01),
 328     U128(0x10000000,0x00000000,0x00000000,0x00000000),
 329     U128(0x68ca11d6,0xb4f6d1d1,0xfaa82667,0x8073c2f1),
 330     U128(0x223e493b,0xb3bb69ff,0xa4b87d6c,0x40000000),
 331     U128(0xad62418d,0x14ea8247,0x01c4b488,0x6cc66f59),
 332     U128(0x2863c1f5,0xcdae42f9,0x54000000,0x00000000),
 333     U128(0xa63fd833,0xb9386b07,0x36039e82,0xbe651b25),
 334     U128(0x1d1f7a9c,0xd087a14d,0x28cdf3d5,0x10000000),
 335     U128(0x651b5095,0xc2ea8fc1,0xb30e2c57,0x77aaf7e1),
 336     U128(0x0ddef20e,0xff760000,0x00000000,0x00000000),
 337     U128(0x29c30f10,0x29939b14,0x6664242d,0x97d9f649),
 338     U128(0x786a435a,0xe9558b0e,0x6aaf6d63,0xa8000000),
 339     U128(0x0c5afe6f,0xf302bcbf,0x94fd9829,0xd87f5079),
 340     U128(0x1fce575c,0xe1692706,0x07100000,0x00000000),
 341     U128(0x4f34497c,0x8597e144,0x36e91802,0x00528229),
 342     U128(0xbf3a8e1d,0x41ef2170,0x7802130d,0x84000000),
 343     U128(0x0e7819e1,0x7f1eb0fb,0x6ee4fb89,0x01d9531f),
 344     U128(0x20000000,0x00000000,0x00000000,0x00000000),
 345     U128(0x4510460d,0xd9e879c0,0x14a82375,0x2f22b321),
 346     U128(0x91abce3c,0x4b4117ad,0xe76d35db,0x22000000),
 347     U128(0x08973ea3,0x55d75bc2,0x2e42c391,0x727d69e1),
 348     U128(0x10e425c5,0x6daffabc,0x35c10000,0x00000000),
 349 };
 350 #endif
 351
 352 static BDIGIT_DBL
 353 maxpow_in_bdigit_dbl(int base, int *exp_ret)
 354 {
 355     BDIGIT_DBL maxpow;
 356     int exponent;
 357
 358     RUBY_ASSERT(2 <= base && base <= 36);
 359
 360     {
 361 #if SIZEOF_BDIGIT_DBL == 2
 362         maxpow = maxpow16_num[base-2];
 363         exponent = maxpow16_exp[base-2];
 364 #elif SIZEOF_BDIGIT_DBL == 4
 365         maxpow = maxpow32_num[base-2];
 366         exponent = maxpow32_exp[base-2];
 367 #elif SIZEOF_BDIGIT_DBL == 8 && defined HAVE_UINT64_T
 368         maxpow = maxpow64_num[base-2];
 369         exponent = maxpow64_exp[base-2];
 370 #elif SIZEOF_BDIGIT_DBL == 16 && defined HAVE_UINT128_T
 371         maxpow = maxpow128_num[base-2];
 372         exponent = maxpow128_exp[base-2];
 373 #else
 374         maxpow = base;
 375         exponent = 1;
 376         while (maxpow <= BDIGIT_DBL_MAX / base) {
 377             maxpow *= base;
 378             exponent++;
 379         }
 380 #endif
 381     }
 382
 383     *exp_ret = exponent;
 384     return maxpow;
 385 }
 386
 387 static inline BDIGIT_DBL
 388 bary2bdigitdbl(const BDIGIT *ds, size_t n)
 389 {
 390     RUBY_ASSERT(n <= 2);
 391
 392     if (n == 2)
 393         return ds[0] | BIGUP(ds[1]);
 394     if (n == 1)
 395         return ds[0];
 396     return 0;
 397 }
 398
 399 static inline void
 400 bdigitdbl2bary(BDIGIT *ds, size_t n, BDIGIT_DBL num)
 401 {
 402     RUBY_ASSERT(n == 2);
 403
 404     ds[0] = BIGLO(num);
 405     ds[1] = (BDIGIT)BIGDN(num);
 406 }
 407
 408 static int
 409 bary_cmp(const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
 410 {
 411     size_t i;
 412     BARY_TRUNC(xds, xn);
 413     BARY_TRUNC(yds, yn);
 414
 415     if (xn < yn)
 416         return -1;
 417     if (xn > yn)
 418         return 1;
 419
 420     for (i = 0; i < xn; i++)
 421         if (xds[xn - i - 1] != yds[yn - i - 1])
 422             break;
 423     if (i == xn)
 424         return 0;
 425     return xds[xn - i - 1] < yds[yn - i - 1] ? -1 : 1;
 426 }
 427
 428 static BDIGIT
 429 bary_small_lshift(BDIGIT *zds, const BDIGIT *xds, size_t n, int shift)
 430 {
 431     size_t i;
 432     BDIGIT_DBL num = 0;
 433     RUBY_ASSERT(0 <= shift && shift < BITSPERDIG);
 434
 435     for (i=0; i<n; i++) {
 436         num = num | (BDIGIT_DBL)*xds++ << shift;
 437         *zds++ = BIGLO(num);
 438         num = BIGDN(num);
 439     }
 440     return BIGLO(num);
 441 }
 442
 443 static void
 444 bary_small_rshift(BDIGIT *zds, const BDIGIT *xds, size_t n, int shift, BDIGIT higher_bdigit)
 445 {
 446     size_t i;
 447     BDIGIT_DBL num = 0;
 448
 449     RUBY_ASSERT(0 <= shift && shift < BITSPERDIG);
 450
 451     num = BIGUP(higher_bdigit);
 452     for (i = 0; i < n; i++) {
 453         BDIGIT x = xds[n - i - 1];
 454         num = (num | x) >> shift;
 455         zds[n - i - 1] = BIGLO(num);
 456         num = BIGUP(x);
 457     }
 458 }
 459
 460 static int
 461 bary_zero_p(const BDIGIT *xds, size_t xn)
 462 {
 463     if (xn == 0)
 464         return 1;
 465     do {
 466         if (xds[--xn]) return 0;
 467     } while (xn);
 468     return 1;
 469 }
 470
 471 static void
 472 bary_neg(BDIGIT *ds, size_t n)
 473 {
 474     size_t i;
 475     for (i = 0; i < n; i++)
 476         ds[n - i - 1] = BIGLO(~ds[n - i - 1]);
 477 }
 478
 479 static int
 480 bary_2comp(BDIGIT *ds, size_t n)
 481 {
 482     size_t i;
 483     for (i = 0; i < n; i++) {
 484         if (ds[i] != 0) {
 485             goto non_zero;
 486         }
 487     }
 488     return 1;
 489
 490   non_zero:
 491     ds[i] = BIGLO(~ds[i] + 1);
 492     i++;
 493     for (; i < n; i++) {
 494         ds[i] = BIGLO(~ds[i]);
 495     }
 496     return 0;
 497 }
 498
 499 static void
 500 bary_swap(BDIGIT *ds, size_t num_bdigits)
 501 {
 502     BDIGIT *p1 = ds;
 503     BDIGIT *p2 = ds + num_bdigits - 1;
 504     for (; p1 < p2; p1++, p2--) {
 505         BDIGIT tmp = *p1;
 506         *p1 = *p2;
 507         *p2 = tmp;
 508     }
 509 }
 510
 511 #define INTEGER_PACK_WORDORDER_MASK \
 512     (INTEGER_PACK_MSWORD_FIRST | \
 513      INTEGER_PACK_LSWORD_FIRST)
 514 #define INTEGER_PACK_BYTEORDER_MASK \
 515     (INTEGER_PACK_MSBYTE_FIRST | \
 516      INTEGER_PACK_LSBYTE_FIRST | \
 517      INTEGER_PACK_NATIVE_BYTE_ORDER)
 518
 519 static void
 520 validate_integer_pack_format(size_t numwords, size_t wordsize, size_t nails, int flags, int supported_flags)
 521 {
 522     int wordorder_bits = flags & INTEGER_PACK_WORDORDER_MASK;
 523     int byteorder_bits = flags & INTEGER_PACK_BYTEORDER_MASK;
 524
 525     if (flags & ~supported_flags) {
 526         rb_raise(rb_eArgError, "unsupported flags specified");
 527     }
 528     if (wordorder_bits == 0) {
 529         if (1 < numwords)
 530             rb_raise(rb_eArgError, "word order not specified");
 531     }
 532     else if (wordorder_bits != INTEGER_PACK_MSWORD_FIRST &&
 533         wordorder_bits != INTEGER_PACK_LSWORD_FIRST)
 534         rb_raise(rb_eArgError, "unexpected word order");
 535     if (byteorder_bits == 0) {
 536         rb_raise(rb_eArgError, "byte order not specified");
 537     }
 538     else if (byteorder_bits != INTEGER_PACK_MSBYTE_FIRST &&
 539         byteorder_bits != INTEGER_PACK_LSBYTE_FIRST &&
 540         byteorder_bits != INTEGER_PACK_NATIVE_BYTE_ORDER)
 541         rb_raise(rb_eArgError, "unexpected byte order");
 542     if (wordsize == 0)
 543         rb_raise(rb_eArgError, "invalid wordsize: %"PRI_SIZE_PREFIX"u", wordsize);
 544     if (SSIZE_MAX < wordsize)
 545         rb_raise(rb_eArgError, "too big wordsize: %"PRI_SIZE_PREFIX"u", wordsize);
 546     if (wordsize <= nails / CHAR_BIT)
 547         rb_raise(rb_eArgError, "too big nails: %"PRI_SIZE_PREFIX"u", nails);
 548     if (SIZE_MAX / wordsize < numwords)
 549         rb_raise(rb_eArgError, "too big numwords * wordsize: %"PRI_SIZE_PREFIX"u * %"PRI_SIZE_PREFIX"u", numwords, wordsize);
 550 }
 551
 552 static void
 553 integer_pack_loop_setup(
 554     size_t numwords, size_t wordsize, size_t nails, int flags,
 555     size_t *word_num_fullbytes_ret,
 556     int *word_num_partialbits_ret,
 557     size_t *word_start_ret,
 558     ssize_t *word_step_ret,
 559     size_t *word_last_ret,
 560     size_t *byte_start_ret,
 561     int *byte_step_ret)
 562 {
 563     int wordorder_bits = flags & INTEGER_PACK_WORDORDER_MASK;
 564     int byteorder_bits = flags & INTEGER_PACK_BYTEORDER_MASK;
 565     size_t word_num_fullbytes;
 566     int word_num_partialbits;
 567     size_t word_start;
 568     ssize_t word_step;
 569     size_t word_last;
 570     size_t byte_start;
 571     int byte_step;
 572
 573     word_num_partialbits = CHAR_BIT - (int)(nails % CHAR_BIT);
 574     if (word_num_partialbits == CHAR_BIT)
 575         word_num_partialbits = 0;
 576     word_num_fullbytes = wordsize - (nails / CHAR_BIT);
 577     if (word_num_partialbits != 0) {
 578         word_num_fullbytes--;
 579     }
 580
 581     if (wordorder_bits == INTEGER_PACK_MSWORD_FIRST) {
 582         word_start = wordsize*(numwords-1);
 583         word_step = -(ssize_t)wordsize;
 584         word_last = 0;
 585     }
 586     else {
 587         word_start = 0;
 588         word_step = wordsize;
 589         word_last = wordsize*(numwords-1);
 590     }
 591
 592     if (byteorder_bits == INTEGER_PACK_NATIVE_BYTE_ORDER) {
 593 #ifdef WORDS_BIGENDIAN
 594         byteorder_bits = INTEGER_PACK_MSBYTE_FIRST;
 595 #else
 596         byteorder_bits = INTEGER_PACK_LSBYTE_FIRST;
 597 #endif
 598     }
 599     if (byteorder_bits == INTEGER_PACK_MSBYTE_FIRST) {
 600         byte_start = wordsize-1;
 601         byte_step = -1;
 602     }
 603     else {
 604         byte_start = 0;
 605         byte_step = 1;
 606     }
 607
 608     *word_num_partialbits_ret = word_num_partialbits;
 609     *word_num_fullbytes_ret = word_num_fullbytes;
 610     *word_start_ret = word_start;
 611     *word_step_ret = word_step;
 612     *word_last_ret = word_last;
 613     *byte_start_ret = byte_start;
 614     *byte_step_ret = byte_step;
 615 }
 616
 617 static inline void
 618 integer_pack_fill_dd(BDIGIT **dpp, BDIGIT **dep, BDIGIT_DBL *ddp, int *numbits_in_dd_p)
 619 {
 620     if (*dpp < *dep && BITSPERDIG <= (int)sizeof(*ddp) * CHAR_BIT - *numbits_in_dd_p) {
 621         *ddp |= (BDIGIT_DBL)(*(*dpp)++) << *numbits_in_dd_p;
 622         *numbits_in_dd_p += BITSPERDIG;
 623     }
 624     else if (*dpp == *dep) {
 625         /* higher bits are infinity zeros */
 626         *numbits_in_dd_p = (int)sizeof(*ddp) * CHAR_BIT;
 627     }
 628 }
 629
 630 static inline BDIGIT_DBL
 631 integer_pack_take_lowbits(int n, BDIGIT_DBL *ddp, int *numbits_in_dd_p)
 632 {
 633     BDIGIT_DBL ret;
 634     ret = (*ddp) & (((BDIGIT_DBL)1 << n) - 1);
 635     *ddp >>= n;
 636     *numbits_in_dd_p -= n;
 637     return ret;
 638 }
 639
 640 #if !defined(WORDS_BIGENDIAN)
 641 static int
 642 bytes_2comp(unsigned char *buf, size_t len)
 643 {
 644     size_t i;
 645     for (i = 0; i < len; i++) {
 646         signed char c = buf[i];
 647         signed int d = ~c;
 648         unsigned int e = d & 0xFF;
 649         buf[i] = e;
 650     }
 651     for (i = 0; i < len; i++) {
 652         buf[i]++;
 653         if (buf[i] != 0)
 654             return 0;
 655     }
 656     return 1;
 657 }
 658 #endif
 659
 660 static int
 661 bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
 662 {
 663     BDIGIT *dp, *de;
 664     unsigned char *buf, *bufend;
 665
 666     dp = ds;
 667     de = ds + num_bdigits;
 668
 669     validate_integer_pack_format(numwords, wordsize, nails, flags,
 670             INTEGER_PACK_MSWORD_FIRST|
 671             INTEGER_PACK_LSWORD_FIRST|
 672             INTEGER_PACK_MSBYTE_FIRST|
 673             INTEGER_PACK_LSBYTE_FIRST|
 674             INTEGER_PACK_NATIVE_BYTE_ORDER|
 675             INTEGER_PACK_2COMP|
 676             INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
 677
 678     while (dp < de && de[-1] == 0)
 679         de--;
 680     if (dp == de) {
 681         sign = 0;
 682     }
 683
 684     if (!(flags & INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION)) {
 685         if (sign == 0) {
 686             MEMZERO(words, unsigned char, numwords * wordsize);
 687             return 0;
 688         }
 689         if (nails == 0 && numwords == 1) {
 690             int need_swap = wordsize != 1 &&
 691                 (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_NATIVE_BYTE_ORDER &&
 692                 ((flags & INTEGER_PACK_MSBYTE_FIRST) ? !HOST_BIGENDIAN_P : HOST_BIGENDIAN_P);
 693             if (0 < sign || !(flags & INTEGER_PACK_2COMP)) {
 694                 BDIGIT d;
 695                 if (wordsize == 1) {
 696                     *((unsigned char *)words) = (unsigned char)(d = dp[0]);
 697                     return ((1 < de - dp || CLEAR_LOWBITS(d, 8) != 0) ? 2 : 1) * sign;
 698                 }
 699 #if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGIT
 700                 if (wordsize == 2 && (uintptr_t)words % RUBY_ALIGNOF(uint16_t) == 0) {
 701                     uint16_t u = (uint16_t)(d = dp[0]);
 702                     if (need_swap) u = swap16(u);
 703                     *((uint16_t *)words) = u;
 704                     return ((1 < de - dp || CLEAR_LOWBITS(d, 16) != 0) ? 2 : 1) * sign;
 705                 }
 706 #endif
 707 #if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGIT
 708                 if (wordsize == 4 && (uintptr_t)words % RUBY_ALIGNOF(uint32_t) == 0) {
 709                     uint32_t u = (uint32_t)(d = dp[0]);
 710                     if (need_swap) u = swap32(u);
 711                     *((uint32_t *)words) = u;
 712                     return ((1 < de - dp || CLEAR_LOWBITS(d, 32) != 0) ? 2 : 1) * sign;
 713                 }
 714 #endif
 715 #if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGIT
 716                 if (wordsize == 8 && (uintptr_t)words % RUBY_ALIGNOF(uint64_t) == 0) {
 717                     uint64_t u = (uint64_t)(d = dp[0]);
 718                     if (need_swap) u = swap64(u);
 719                     *((uint64_t *)words) = u;
 720                     return ((1 < de - dp || CLEAR_LOWBITS(d, 64) != 0) ? 2 : 1) * sign;
 721                 }
 722 #endif
 723             }
 724             else { /* sign < 0 && (flags & INTEGER_PACK_2COMP) */
 725                 BDIGIT_DBL_SIGNED d;
 726                 if (wordsize == 1) {
 727                     *((unsigned char *)words) = (unsigned char)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
 728                     return (1 < de - dp || FILL_LOWBITS(d, 8) != -1) ? -2 : -1;
 729                 }
 730 #if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGIT
 731                 if (wordsize == 2 && (uintptr_t)words % RUBY_ALIGNOF(uint16_t) == 0) {
 732                     uint16_t u = (uint16_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
 733                     if (need_swap) u = swap16(u);
 734                     *((uint16_t *)words) = u;
 735                     return (wordsize == SIZEOF_BDIGIT && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
 736                         (1 < de - dp || FILL_LOWBITS(d, 16) != -1) ? -2 : -1;
 737                 }
 738 #endif
 739 #if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGIT
 740                 if (wordsize == 4 && (uintptr_t)words % RUBY_ALIGNOF(uint32_t) == 0) {
 741                     uint32_t u = (uint32_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
 742                     if (need_swap) u = swap32(u);
 743                     *((uint32_t *)words) = u;
 744                     return (wordsize == SIZEOF_BDIGIT && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
 745                         (1 < de - dp || FILL_LOWBITS(d, 32) != -1) ? -2 : -1;
 746                 }
 747 #endif
 748 #if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGIT
 749                 if (wordsize == 8 && (uintptr_t)words % RUBY_ALIGNOF(uint64_t) == 0) {
 750                     uint64_t u = (uint64_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
 751                     if (need_swap) u = swap64(u);
 752                     *((uint64_t *)words) = u;
 753                     return (wordsize == SIZEOF_BDIGIT && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
 754                         (1 < de - dp || FILL_LOWBITS(d, 64) != -1) ? -2 : -1;
 755                 }
 756 #endif
 757             }
 758         }
 759 #if !defined(WORDS_BIGENDIAN)
 760         if (nails == 0 && SIZEOF_BDIGIT == sizeof(BDIGIT) &&
 761             (flags & INTEGER_PACK_WORDORDER_MASK) == INTEGER_PACK_LSWORD_FIRST &&
 762             (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_MSBYTE_FIRST) {
 763             size_t src_size = (de - dp) * SIZEOF_BDIGIT;
 764             size_t dst_size = numwords * wordsize;
 765             int overflow = 0;
 766             while (0 < src_size && ((unsigned char *)ds)[src_size-1] == 0)
 767                 src_size--;
 768             if (src_size <= dst_size) {
 769                 MEMCPY(words, dp, char, src_size);
 770                 MEMZERO((char*)words + src_size, char, dst_size - src_size);
 771             }
 772             else {
 773                 MEMCPY(words, dp, char, dst_size);
 774                 overflow = 1;
 775             }
 776             if (sign < 0 && (flags & INTEGER_PACK_2COMP)) {
 777                 int zero_p = bytes_2comp(words, dst_size);
 778                 if (zero_p && overflow) {
 779                     unsigned char *p = (unsigned char *)dp;
 780                     if (dst_size == src_size-1 &&
 781                         p[dst_size] == 1) {
 782                         overflow = 0;
 783                     }
 784                 }
 785             }
 786             if (overflow)
 787                 sign *= 2;
 788             return sign;
 789         }
 790 #endif
 791         if (nails == 0 && SIZEOF_BDIGIT == sizeof(BDIGIT) &&
 792             wordsize % SIZEOF_BDIGIT == 0 && (uintptr_t)words % RUBY_ALIGNOF(BDIGIT) == 0) {
 793             size_t bdigits_per_word = wordsize / SIZEOF_BDIGIT;
 794             size_t src_num_bdigits = de - dp;
 795             size_t dst_num_bdigits = numwords * bdigits_per_word;
 796             int overflow = 0;
 797             int mswordfirst_p = (flags & INTEGER_PACK_MSWORD_FIRST) != 0;
 798             int msbytefirst_p = (flags & INTEGER_PACK_NATIVE_BYTE_ORDER) ? HOST_BIGENDIAN_P :
 799                 (flags & INTEGER_PACK_MSBYTE_FIRST) != 0;
 800             if (src_num_bdigits <= dst_num_bdigits) {
 801                 MEMCPY(words, dp, BDIGIT, src_num_bdigits);
 802                 BDIGITS_ZERO((BDIGIT*)words + src_num_bdigits, dst_num_bdigits - src_num_bdigits);
 803             }
 804             else {
 805                 MEMCPY(words, dp, BDIGIT, dst_num_bdigits);
 806                 overflow = 1;
 807             }
 808             if (sign < 0 && (flags & INTEGER_PACK_2COMP)) {
 809                 int zero_p = bary_2comp(words, dst_num_bdigits);
 810                 if (zero_p && overflow &&
 811                     dst_num_bdigits == src_num_bdigits-1 &&
 812                     dp[dst_num_bdigits] == 1)
 813                     overflow = 0;
 814             }
 815             if (msbytefirst_p != HOST_BIGENDIAN_P) {
 816                 size_t i;
 817                 for (i = 0; i < dst_num_bdigits; i++) {
 818                     BDIGIT d = ((BDIGIT*)words)[i];
 819                     ((BDIGIT*)words)[i] = swap_bdigit(d);
 820                 }
 821             }
 822             if (mswordfirst_p ? !msbytefirst_p : msbytefirst_p) {
 823                 size_t i;
 824                 BDIGIT *p = words;
 825                 for (i = 0; i < numwords; i++) {
 826                     bary_swap(p, bdigits_per_word);
 827                     p += bdigits_per_word;
 828                 }
 829             }
 830             if (mswordfirst_p) {
 831                 bary_swap(words, dst_num_bdigits);
 832             }
 833             if (overflow)
 834                 sign *= 2;
 835             return sign;
 836         }
 837     }
 838
 839     buf = words;
 840     bufend = buf + numwords * wordsize;
 841
 842     if (buf == bufend) {
 843         /* overflow if non-zero*/
 844         if (!(flags & INTEGER_PACK_2COMP) || 0 <= sign)
 845             sign *= 2;
 846         else {
 847             if (de - dp == 1 && dp[0] == 1)
 848                 sign = -1; /* val == -1 == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
 849             else
 850                 sign = -2; /* val < -1 == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
 851         }
 852     }
 853     else if (dp == de) {
 854         memset(buf, '\0', bufend - buf);
 855     }
 856     else if (dp < de && buf < bufend) {
 857         int word_num_partialbits;
 858         size_t word_num_fullbytes;
 859
 860         ssize_t word_step;
 861         size_t byte_start;
 862         int byte_step;
 863
 864         size_t word_start, word_last;
 865         unsigned char *wordp, *last_wordp;
 866         BDIGIT_DBL dd;
 867         int numbits_in_dd;
 868
 869         integer_pack_loop_setup(numwords, wordsize, nails, flags,
 870             &word_num_fullbytes, &word_num_partialbits,
 871             &word_start, &word_step, &word_last, &byte_start, &byte_step);
 872
 873         wordp = buf + word_start;
 874         last_wordp = buf + word_last;
 875
 876         dd = 0;
 877         numbits_in_dd = 0;
 878
 879 #define FILL_DD \
 880     integer_pack_fill_dd(&dp, &de, &dd, &numbits_in_dd)
 881 #define TAKE_LOWBITS(n) \
 882     integer_pack_take_lowbits(n, &dd, &numbits_in_dd)
 883
 884         while (1) {
 885             size_t index_in_word = 0;
 886             unsigned char *bytep = wordp + byte_start;
 887             while (index_in_word < word_num_fullbytes) {
 888                 FILL_DD;
 889                 *bytep = TAKE_LOWBITS(CHAR_BIT);
 890                 bytep += byte_step;
 891                 index_in_word++;
 892             }
 893             if (word_num_partialbits) {
 894                 FILL_DD;
 895                 *bytep = TAKE_LOWBITS(word_num_partialbits);
 896                 bytep += byte_step;
 897                 index_in_word++;
 898             }
 899             while (index_in_word < wordsize) {
 900                 *bytep = 0;
 901                 bytep += byte_step;
 902                 index_in_word++;
 903             }
 904
 905             if (wordp == last_wordp)
 906                 break;
 907
 908             wordp += word_step;
 909         }
 910         FILL_DD;
 911         /* overflow tests */
 912         if (dp != de || 1 < dd) {
 913             /* 2**(numwords*(wordsize*CHAR_BIT-nails)+1) <= abs(val) */
 914             sign *= 2;
 915         }
 916         else if (dd == 1) {
 917             /* 2**(numwords*(wordsize*CHAR_BIT-nails)) <= abs(val) < 2**(numwords*(wordsize*CHAR_BIT-nails)+1) */
 918             if (!(flags & INTEGER_PACK_2COMP) || 0 <= sign)
 919                 sign *= 2;
 920             else { /* overflow_2comp && sign == -1 */
 921                 /* test lower bits are all zero. */
 922                 dp = ds;
 923                 while (dp < de && *dp == 0)
 924                     dp++;
 925                 if (de - dp == 1 && /* only one non-zero word. */
 926                     POW2_P(*dp)) /* *dp contains only one bit set. */
 927                     sign = -1; /* val == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
 928                 else
 929                     sign = -2; /* val < -2**(numwords*(wordsize*CHAR_BIT-nails)) */
 930             }
 931         }
 932     }
 933
 934     if ((flags & INTEGER_PACK_2COMP) && (sign < 0 && numwords != 0)) {
 935         int word_num_partialbits;
 936         size_t word_num_fullbytes;
 937
 938         ssize_t word_step;
 939         size_t byte_start;
 940         int byte_step;
 941
 942         size_t word_start, word_last;
 943         unsigned char *wordp, *last_wordp;
 944
 945         unsigned int partialbits_mask;
 946         int carry;
 947
 948         integer_pack_loop_setup(numwords, wordsize, nails, flags,
 949             &word_num_fullbytes, &word_num_partialbits,
 950             &word_start, &word_step, &word_last, &byte_start, &byte_step);
 951
 952         partialbits_mask = (1 << word_num_partialbits) - 1;
 953
 954         buf = words;
 955         wordp = buf + word_start;
 956         last_wordp = buf + word_last;
 957
 958         carry = 1;
 959         while (1) {
 960             size_t index_in_word = 0;
 961             unsigned char *bytep = wordp + byte_start;
 962             while (index_in_word < word_num_fullbytes) {
 963                 carry += (unsigned char)~*bytep;
 964                 *bytep = (unsigned char)carry;
 965                 carry >>= CHAR_BIT;
 966                 bytep += byte_step;
 967                 index_in_word++;
 968             }
 969             if (word_num_partialbits) {
 970                 carry += (*bytep & partialbits_mask) ^ partialbits_mask;
 971                 *bytep = carry & partialbits_mask;
 972                 carry >>= word_num_partialbits;
 973                 bytep += byte_step;
 974                 index_in_word++;
 975             }
 976
 977             if (wordp == last_wordp)
 978                 break;
 979
 980             wordp += word_step;
 981         }
 982     }
 983
 984     return sign;
 985 #undef FILL_DD
 986 #undef TAKE_LOWBITS
 987 }
 988
 989 static size_t
 990 integer_unpack_num_bdigits_small(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
 991 {
 992     /* nlp_bits stands for number of leading padding bits */
 993     size_t num_bits = (wordsize * CHAR_BIT - nails) * numwords;
 994     size_t num_bdigits = roomof(num_bits, BITSPERDIG);
 995     *nlp_bits_ret = (int)(num_bdigits * BITSPERDIG - num_bits);
 996     return num_bdigits;
 997 }
 998
 999 static size_t
1000 integer_unpack_num_bdigits_generic(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
1001 {
1002     /* BITSPERDIG = SIZEOF_BDIGIT * CHAR_BIT */
1003     /* num_bits = (wordsize * CHAR_BIT - nails) * numwords */
1004     /* num_bdigits = roomof(num_bits, BITSPERDIG) */
1005
1006     /* num_bits = CHAR_BIT * (wordsize * numwords) - nails * numwords = CHAR_BIT * num_bytes1 - nails * numwords */
1007     size_t num_bytes1 = wordsize * numwords;
1008
1009     /* q1 * CHAR_BIT + r1 = numwords */
1010     size_t q1 = numwords / CHAR_BIT;
1011     size_t r1 = numwords % CHAR_BIT;
1012
1013     /* num_bits = CHAR_BIT * num_bytes1 - nails * (q1 * CHAR_BIT + r1) = CHAR_BIT * num_bytes2 - nails * r1 */
1014     size_t num_bytes2 = num_bytes1 - nails * q1;
1015
1016     /* q2 * CHAR_BIT + r2 = nails */
1017     size_t q2 = nails / CHAR_BIT;
1018     size_t r2 = nails % CHAR_BIT;
1019
1020     /* num_bits = CHAR_BIT * num_bytes2 - (q2 * CHAR_BIT + r2) * r1 = CHAR_BIT * num_bytes3 - r1 * r2 */
1021     size_t num_bytes3 = num_bytes2 - q2 * r1;
1022
1023     /* q3 * BITSPERDIG + r3 = num_bytes3 */
1024     size_t q3 = num_bytes3 / BITSPERDIG;
1025     size_t r3 = num_bytes3 % BITSPERDIG;
1026
1027     /* num_bits = CHAR_BIT * (q3 * BITSPERDIG + r3) - r1 * r2 = BITSPERDIG * num_digits1 + CHAR_BIT * r3 - r1 * r2 */
1028     size_t num_digits1 = CHAR_BIT * q3;
1029
1030     /*
1031      * if CHAR_BIT * r3 >= r1 * r2
1032      *   CHAR_BIT * r3 - r1 * r2 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2))
1033      *   q4 * BITSPERDIG + r4 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2)
1034      *   num_bits = BITSPERDIG * num_digits1 + CHAR_BIT * BITSPERDIG - (q4 * BITSPERDIG + r4) = BITSPERDIG * num_digits2 - r4
1035      * else
1036      *   q4 * BITSPERDIG + r4 = -(CHAR_BIT * r3 - r1 * r2)
1037      *   num_bits = BITSPERDIG * num_digits1 - (q4 * BITSPERDIG + r4) = BITSPERDIG * num_digits2 - r4
1038      * end
1039      */
1040
1041     if (CHAR_BIT * r3 >= r1 * r2) {
1042         size_t tmp1 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2);
1043         size_t q4 = tmp1 / BITSPERDIG;
1044         int r4 = (int)(tmp1 % BITSPERDIG);
1045         size_t num_digits2 = num_digits1 + CHAR_BIT - q4;
1046         *nlp_bits_ret = r4;
1047         return num_digits2;
1048     }
1049     else {
1050         size_t tmp1 = r1 * r2 - CHAR_BIT * r3;
1051         size_t q4 = tmp1 / BITSPERDIG;
1052         int r4 = (int)(tmp1 % BITSPERDIG);
1053         size_t num_digits2 = num_digits1 - q4;
1054         *nlp_bits_ret = r4;
1055         return num_digits2;
1056     }
1057 }
1058
1059 static size_t
1060 integer_unpack_num_bdigits(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
1061 {
1062     size_t num_bdigits;
1063
1064     if (numwords <= (SIZE_MAX - (BITSPERDIG-1)) / CHAR_BIT / wordsize) {
1065         num_bdigits = integer_unpack_num_bdigits_small(numwords, wordsize, nails, nlp_bits_ret);
1066         if (debug_integer_pack) {
1067             int nlp_bits1;
1068             size_t num_bdigits1 = integer_unpack_num_bdigits_generic(numwords, wordsize, nails, &nlp_bits1);
1069             RUBY_ASSERT(num_bdigits == num_bdigits1);
1070             RUBY_ASSERT(*nlp_bits_ret == nlp_bits1);
1071             (void)num_bdigits1;
1072         }
1073     }
1074     else {
1075         num_bdigits = integer_unpack_num_bdigits_generic(numwords, wordsize, nails, nlp_bits_ret);
1076     }
1077     return num_bdigits;
1078 }
1079
1080 static inline void
1081 integer_unpack_push_bits(int data, int numbits, BDIGIT_DBL *ddp, int *numbits_in_dd_p, BDIGIT **dpp)
1082 {
1083     (*ddp) |= ((BDIGIT_DBL)data) << (*numbits_in_dd_p);
1084     *numbits_in_dd_p += numbits;
1085     while (BITSPERDIG <= *numbits_in_dd_p) {
1086         *(*dpp)++ = BIGLO(*ddp);
1087         *ddp = BIGDN(*ddp);
1088         *numbits_in_dd_p -= BITSPERDIG;
1089     }
1090 }
1091
1092 static int
1093 integer_unpack_single_bdigit(BDIGIT u, size_t size, int flags, BDIGIT *dp)
1094 {
1095     int sign;
1096     if (flags & INTEGER_PACK_2COMP) {
1097         sign = (flags & INTEGER_PACK_NEGATIVE) ?
1098             ((size == SIZEOF_BDIGIT && u == 0) ? -2 : -1) :
1099             ((u >> (size * CHAR_BIT - 1)) ? -1 : 1);
1100         if (sign < 0) {
1101             u |= LSHIFTX(BDIGMAX, size * CHAR_BIT);
1102             u = BIGLO(1 + ~u);
1103         }
1104     }
1105     else
1106         sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
1107     *dp = u;
1108     return sign;
1109 }
1110
1111 #ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1112 #define reinterpret_cast(type, value) (type) \
1113     __builtin_assume_aligned((value), sizeof(*(type)NULL));
1114 #else
1115 #define reinterpret_cast(type, value) (type)value
1116 #endif
1117
1118 static int
1119 bary_unpack_internal(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags, int nlp_bits)
1120 {
1121     int sign;
1122     const unsigned char *buf = words;
1123     BDIGIT *dp;
1124     BDIGIT *de;
1125
1126     dp = bdigits;
1127     de = dp + num_bdigits;
1128
1129     if (!(flags & INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION)) {
1130         if (nails == 0 && numwords == 1) {
1131             int need_swap = wordsize != 1 &&
1132                 (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_NATIVE_BYTE_ORDER &&
1133                 ((flags & INTEGER_PACK_MSBYTE_FIRST) ? !HOST_BIGENDIAN_P : HOST_BIGENDIAN_P);
1134             if (wordsize == 1) {
1135                 return integer_unpack_single_bdigit(*(uint8_t *)buf, sizeof(uint8_t), flags, dp);
1136             }
1137 #if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGIT
1138             if (wordsize == 2 && (uintptr_t)words % RUBY_ALIGNOF(uint16_t) == 0) {
1139                 uint16_t u = *reinterpret_cast(const uint16_t *, buf);
1140                 return integer_unpack_single_bdigit(need_swap ? swap16(u) : u, sizeof(uint16_t), flags, dp);
1141             }
1142 #endif
1143 #if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGIT
1144             if (wordsize == 4 && (uintptr_t)words % RUBY_ALIGNOF(uint32_t) == 0) {
1145                 uint32_t u = *reinterpret_cast(const uint32_t *, buf);
1146                 return integer_unpack_single_bdigit(need_swap ? swap32(u) : u, sizeof(uint32_t), flags, dp);
1147             }
1148 #endif
1149 #if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGIT
1150             if (wordsize == 8 && (uintptr_t)words % RUBY_ALIGNOF(uint64_t) == 0) {
1151                 uint64_t u = *reinterpret_cast(const uint64_t *, buf);
1152                 return integer_unpack_single_bdigit(need_swap ? swap64(u) : u, sizeof(uint64_t), flags, dp);
1153             }
1154 #endif
1155 #undef reinterpret_cast
1156         }
1157 #if !defined(WORDS_BIGENDIAN)
1158         if (nails == 0 && SIZEOF_BDIGIT == sizeof(BDIGIT) &&
1159             (flags & INTEGER_PACK_WORDORDER_MASK) == INTEGER_PACK_LSWORD_FIRST &&
1160             (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_MSBYTE_FIRST) {
1161             size_t src_size = numwords * wordsize;
1162             size_t dst_size = num_bdigits * SIZEOF_BDIGIT;
1163             MEMCPY(dp, words, char, src_size);
1164             if (flags & INTEGER_PACK_2COMP) {
1165                 if (flags & INTEGER_PACK_NEGATIVE) {
1166                     int zero_p;
1167                     memset((char*)dp + src_size, 0xff, dst_size - src_size);
1168                     zero_p = bary_2comp(dp, num_bdigits);
1169                     sign = zero_p ? -2 : -1;
1170                 }
1171                 else if (buf[src_size-1] >> (CHAR_BIT-1)) {
1172                     memset((char*)dp + src_size, 0xff, dst_size - src_size);
1173                     bary_2comp(dp, num_bdigits);
1174                     sign = -1;
1175                 }
1176                 else {
1177                     MEMZERO((char*)dp + src_size, char, dst_size - src_size);
1178                     sign = 1;
1179                 }
1180             }
1181             else {
1182                 MEMZERO((char*)dp + src_size, char, dst_size - src_size);
1183                 sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
1184             }
1185             return sign;
1186         }
1187 #endif
1188         if (nails == 0 && SIZEOF_BDIGIT == sizeof(BDIGIT) &&
1189             wordsize % SIZEOF_BDIGIT == 0) {
1190             size_t bdigits_per_word = wordsize / SIZEOF_BDIGIT;
1191             int mswordfirst_p = (flags & INTEGER_PACK_MSWORD_FIRST) != 0;
1192             int msbytefirst_p = (flags & INTEGER_PACK_NATIVE_BYTE_ORDER) ? HOST_BIGENDIAN_P :
1193                 (flags & INTEGER_PACK_MSBYTE_FIRST) != 0;
1194             MEMCPY(dp, words, BDIGIT, numwords*bdigits_per_word);
1195             if (mswordfirst_p) {
1196                 bary_swap(dp, num_bdigits);
1197             }
1198             if (mswordfirst_p ? !msbytefirst_p : msbytefirst_p) {
1199                 size_t i;
1200                 BDIGIT *p = dp;
1201                 for (i = 0; i < numwords; i++) {
1202                     bary_swap(p, bdigits_per_word);
1203                     p += bdigits_per_word;
1204                 }
1205             }
1206             if (msbytefirst_p != HOST_BIGENDIAN_P) {
1207                 BDIGIT *p;
1208                 for (p = dp; p < de; p++) {
1209                     BDIGIT d = *p;
1210                     *p = swap_bdigit(d);
1211                 }
1212             }
1213             if (flags & INTEGER_PACK_2COMP) {
1214                 if (flags & INTEGER_PACK_NEGATIVE) {
1215                     int zero_p = bary_2comp(dp, num_bdigits);
1216                     sign = zero_p ? -2 : -1;
1217                 }
1218                 else if (BDIGIT_MSB(de[-1])) {
1219                     bary_2comp(dp, num_bdigits);
1220                     sign = -1;
1221                 }
1222                 else {
1223                     sign = 1;
1224                 }
1225             }
1226             else {
1227                 sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
1228             }
1229             return sign;
1230         }
1231     }
1232
1233     if (num_bdigits != 0) {
1234         int word_num_partialbits;
1235         size_t word_num_fullbytes;
1236
1237         ssize_t word_step;
1238         size_t byte_start;
1239         int byte_step;
1240
1241         size_t word_start, word_last;
1242         const unsigned char *wordp, *last_wordp;
1243         BDIGIT_DBL dd;
1244         int numbits_in_dd;
1245
1246         integer_pack_loop_setup(numwords, wordsize, nails, flags,
1247             &word_num_fullbytes, &word_num_partialbits,
1248             &word_start, &word_step, &word_last, &byte_start, &byte_step);
1249
1250         wordp = buf + word_start;
1251         last_wordp = buf + word_last;
1252
1253         dd = 0;
1254         numbits_in_dd = 0;
1255
1256 #define PUSH_BITS(data, numbits) \
1257         integer_unpack_push_bits(data, numbits, &dd, &numbits_in_dd, &dp)
1258
1259         while (1) {
1260             size_t index_in_word = 0;
1261             const unsigned char *bytep = wordp + byte_start;
1262             while (index_in_word < word_num_fullbytes) {
1263                 PUSH_BITS(*bytep, CHAR_BIT);
1264                 bytep += byte_step;
1265                 index_in_word++;
1266             }
1267             if (word_num_partialbits) {
1268                 PUSH_BITS(*bytep & ((1 << word_num_partialbits) - 1), word_num_partialbits);
1269                 bytep += byte_step;
1270                 index_in_word++;
1271             }
1272
1273             if (wordp == last_wordp)
1274                 break;
1275
1276             wordp += word_step;
1277         }
1278         if (dd)
1279             *dp++ = (BDIGIT)dd;
1280         RUBY_ASSERT(dp <= de);
1281         while (dp < de)
1282             *dp++ = 0;
1283 #undef PUSH_BITS
1284     }
1285
1286     if (!(flags & INTEGER_PACK_2COMP)) {
1287         sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
1288     }
1289     else {
1290         if (nlp_bits) {
1291             if ((flags & INTEGER_PACK_NEGATIVE) ||
1292                 (bdigits[num_bdigits-1] >> (BITSPERDIG - nlp_bits - 1))) {
1293                 bdigits[num_bdigits-1] |= BIGLO(BDIGMAX << (BITSPERDIG - nlp_bits));
1294                 sign = -1;
1295             }
1296             else {
1297                 sign = 1;
1298             }
1299         }
1300         else {
1301             if (flags & INTEGER_PACK_NEGATIVE) {
1302                 sign = bary_zero_p(bdigits, num_bdigits) ? -2 : -1;
1303             }
1304             else {
1305                 if (num_bdigits != 0 && BDIGIT_MSB(bdigits[num_bdigits-1]))
1306                     sign = -1;
1307                 else
1308                     sign = 1;
1309             }
1310         }
1311         if (sign == -1 && num_bdigits != 0) {
1312             bary_2comp(bdigits, num_bdigits);
1313         }
1314     }
1315
1316     return sign;
1317 }
1318
1319 static void
1320 bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
1321 {
1322     size_t num_bdigits0;
1323     int nlp_bits;
1324     int sign;
1325
1326     validate_integer_pack_format(numwords, wordsize, nails, flags,
1327             INTEGER_PACK_MSWORD_FIRST|
1328             INTEGER_PACK_LSWORD_FIRST|
1329             INTEGER_PACK_MSBYTE_FIRST|
1330             INTEGER_PACK_LSBYTE_FIRST|
1331             INTEGER_PACK_NATIVE_BYTE_ORDER|
1332             INTEGER_PACK_2COMP|
1333             INTEGER_PACK_FORCE_BIGNUM|
1334             INTEGER_PACK_NEGATIVE|
1335             INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
1336
1337     num_bdigits0 = integer_unpack_num_bdigits(numwords, wordsize, nails, &nlp_bits);
1338
1339     RUBY_ASSERT(num_bdigits0 <= num_bdigits);
1340
1341     sign = bary_unpack_internal(bdigits, num_bdigits0, words, numwords, wordsize, nails, flags, nlp_bits);
1342
1343     if (num_bdigits0 < num_bdigits) {
1344         BDIGITS_ZERO(bdigits + num_bdigits0, num_bdigits - num_bdigits0);
1345         if (sign == -2) {
1346             bdigits[num_bdigits0] = 1;
1347         }
1348     }
1349 }
1350
1351 static int
1352 bary_subb(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, int borrow)
1353 {
1354     BDIGIT_DBL_SIGNED num;
1355     size_t i;
1356     size_t sn;
1357
1358     RUBY_ASSERT(xn <= zn);
1359     RUBY_ASSERT(yn <= zn);
1360
1361     sn = xn < yn ? xn : yn;
1362
1363     num = borrow ? -1 : 0;
1364     for (i = 0; i < sn; i++) {
1365         num += (BDIGIT_DBL_SIGNED)xds[i] - yds[i];
1366         zds[i] = BIGLO(num);
1367         num = BIGDN(num);
1368     }
1369     if (yn <= xn) {
1370         for (; i < xn; i++) {
1371             if (num == 0) goto num_is_zero;
1372             num += xds[i];
1373             zds[i] = BIGLO(num);
1374             num = BIGDN(num);
1375         }
1376     }
1377     else {
1378         for (; i < yn; i++) {
1379             num -= yds[i];
1380             zds[i] = BIGLO(num);
1381             num = BIGDN(num);
1382         }
1383     }
1384     if (num == 0) goto num_is_zero;
1385     for (; i < zn; i++) {
1386         zds[i] = BDIGMAX;
1387     }
1388     return 1;
1389
1390   num_is_zero:
1391     if (xds == zds && xn == zn)
1392         return 0;
1393     for (; i < xn; i++) {
1394         zds[i] = xds[i];
1395     }
1396     for (; i < zn; i++) {
1397         zds[i] = 0;
1398     }
1399     return 0;
1400 }
1401
1402 static int
1403 bary_sub(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
1404 {
1405     return bary_subb(zds, zn, xds, xn, yds, yn, 0);
1406 }
1407
1408 static int
1409 bary_sub_one(BDIGIT *zds, size_t zn)
1410 {
1411     return bary_subb(zds, zn, zds, zn, NULL, 0, 1);
1412 }
1413
1414 static int
1415 bary_addc(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, int carry)
1416 {
1417     BDIGIT_DBL num;
1418     size_t i;
1419
1420     RUBY_ASSERT(xn <= zn);
1421     RUBY_ASSERT(yn <= zn);
1422
1423     if (xn > yn) {
1424         const BDIGIT *tds;
1425         tds = xds; xds = yds; yds = tds;
1426         i = xn; xn = yn; yn = i;
1427     }
1428
1429     num = carry ? 1 : 0;
1430     for (i = 0; i < xn; i++) {
1431         num += (BDIGIT_DBL)xds[i] + yds[i];
1432         zds[i] = BIGLO(num);
1433         num = BIGDN(num);
1434     }
1435     for (; i < yn; i++) {
1436         if (num == 0) goto num_is_zero;
1437         num += yds[i];
1438         zds[i] = BIGLO(num);
1439         num = BIGDN(num);
1440     }
1441     for (; i < zn; i++) {
1442         if (num == 0) goto num_is_zero;
1443         zds[i] = BIGLO(num);
1444         num = BIGDN(num);
1445     }
1446     return num != 0;
1447
1448   num_is_zero:
1449     if (yds == zds && yn == zn)
1450         return 0;
1451     for (; i < yn; i++) {
1452         zds[i] = yds[i];
1453     }
1454     for (; i < zn; i++) {
1455         zds[i] = 0;
1456     }
1457     return 0;
1458 }
1459
1460 static int
1461 bary_add(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
1462 {
1463     return bary_addc(zds, zn, xds, xn, yds, yn, 0);
1464 }
1465
1466 static int
1467 bary_add_one(BDIGIT *ds, size_t n)
1468 {
1469     size_t i;
1470     for (i = 0; i < n; i++) {
1471         BDIGIT_DBL n = ds[i];
1472         n += 1;
1473         ds[i] = BIGLO(n);
1474         if (ds[i] != 0)
1475             return 0;
1476     }
1477     return 1;
1478 }
1479
1480 static void
1481 bary_mul_single(BDIGIT *zds, size_t zn, BDIGIT x, BDIGIT y)
1482 {
1483     BDIGIT_DBL n;
1484
1485     RUBY_ASSERT(2 <= zn);
1486
1487     n = (BDIGIT_DBL)x * y;
1488     bdigitdbl2bary(zds, 2, n);
1489     BDIGITS_ZERO(zds + 2, zn - 2);
1490 }
1491
1492 static int
1493 bary_muladd_1xN(BDIGIT *zds, size_t zn, BDIGIT x, const BDIGIT *yds, size_t yn)
1494 {
1495     BDIGIT_DBL n;
1496     BDIGIT_DBL dd;
1497     size_t j;
1498
1499     RUBY_ASSERT(zn > yn);
1500
1501     if (x == 0)
1502         return 0;
1503     dd = x;
1504     n = 0;
1505     for (j = 0; j < yn; j++) {
1506         BDIGIT_DBL ee = n + dd * yds[j];
1507         if (ee) {
1508             n = zds[j] + ee;
1509             zds[j] = BIGLO(n);
1510             n = BIGDN(n);
1511         }
1512         else {
1513             n = 0;
1514         }
1515
1516     }
1517     for (; j < zn; j++) {
1518         if (n == 0)
1519             break;
1520         n += zds[j];
1521         zds[j] = BIGLO(n);
1522         n = BIGDN(n);
1523     }
1524     return n != 0;
1525 }
1526
1527 static BDIGIT_DBL_SIGNED
1528 bigdivrem_mulsub(BDIGIT *zds, size_t zn, BDIGIT x, const BDIGIT *yds, size_t yn)
1529 {
1530     size_t i;
1531     BDIGIT_DBL t2;
1532     BDIGIT_DBL_SIGNED num;
1533
1534     RUBY_ASSERT(zn == yn + 1);
1535
1536     num = 0;
1537     t2 = 0;
1538     i = 0;
1539
1540     do {
1541         BDIGIT_DBL_SIGNED ee;
1542         t2 += (BDIGIT_DBL)yds[i] * x;
1543         ee = num - BIGLO(t2);
1544         num = (BDIGIT_DBL_SIGNED)zds[i] + ee;
1545         if (ee) zds[i] = BIGLO(num);
1546         num = BIGDN(num);
1547         t2 = BIGDN(t2);
1548     } while (++i < yn);
1549     num -= (BDIGIT_DBL_SIGNED)t2;
1550     num += (BDIGIT_DBL_SIGNED)zds[yn]; /* borrow from high digit; don't update */
1551     return num;
1552 }
1553
1554 static int
1555 bary_mulsub_1xN(BDIGIT *zds, size_t zn, BDIGIT x, const BDIGIT *yds, size_t yn)
1556 {
1557     BDIGIT_DBL_SIGNED num;
1558
1559     RUBY_ASSERT(zn == yn + 1);
1560
1561     num = bigdivrem_mulsub(zds, zn, x, yds, yn);
1562     zds[yn] = BIGLO(num);
1563     if (BIGDN(num))
1564         return 1;
1565     return 0;
1566 }
1567
1568 static void
1569 bary_mul_normal(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
1570 {
1571     size_t i;
1572
1573     RUBY_ASSERT(xn + yn <= zn);
1574
1575     BDIGITS_ZERO(zds, zn);
1576     for (i = 0; i < xn; i++) {
1577         bary_muladd_1xN(zds+i, zn-i, xds[i], yds, yn);
1578     }
1579 }
1580
1581 VALUE
1582 rb_big_mul_normal(VALUE x, VALUE y)
1583 {
1584     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), zn = xn + yn;
1585     VALUE z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
1586     bary_mul_normal(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn);
1587     RB_GC_GUARD(x);
1588     RB_GC_GUARD(y);
1589     return z;
1590 }
1591
1592 /* efficient squaring (2 times faster than normal multiplication)
1593  * ref: Handbook of Applied Cryptography, Algorithm 14.16
1594  *      https://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
1595  */
1596 static void
1597 bary_sq_fast(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn)
1598 {
1599     size_t i, j;
1600     BDIGIT_DBL c, v, w;
1601     BDIGIT vl;
1602     int vh;
1603
1604     RUBY_ASSERT(xn * 2 <= zn);
1605
1606     BDIGITS_ZERO(zds, zn);
1607
1608     if (xn == 0)
1609         return;
1610
1611     for (i = 0; i < xn-1; i++) {
1612         v = (BDIGIT_DBL)xds[i];
1613         if (!v)
1614             continue;
1615         c = (BDIGIT_DBL)zds[i + i] + v * v;
1616         zds[i + i] = BIGLO(c);
1617         c = BIGDN(c);
1618         v *= 2;
1619         vl = BIGLO(v);
1620         vh = (int)BIGDN(v);
1621         for (j = i + 1; j < xn; j++) {
1622             w = (BDIGIT_DBL)xds[j];
1623             c += (BDIGIT_DBL)zds[i + j] + vl * w;
1624             zds[i + j] = BIGLO(c);
1625             c = BIGDN(c);
1626             if (vh)
1627                 c += w;
1628         }
1629         if (c) {
1630             c += (BDIGIT_DBL)zds[i + xn];
1631             zds[i + xn] = BIGLO(c);
1632             c = BIGDN(c);
1633             if (c)
1634                 zds[i + xn + 1] += (BDIGIT)c;
1635         }
1636     }
1637
1638     /* i == xn-1 */
1639     v = (BDIGIT_DBL)xds[i];
1640     if (!v)
1641         return;
1642     c = (BDIGIT_DBL)zds[i + i] + v * v;
1643     zds[i + i] = BIGLO(c);
1644     c = BIGDN(c);
1645     if (c) {
1646         zds[i + xn] += BIGLO(c);
1647     }
1648 }
1649
1650 VALUE
1651 rb_big_sq_fast(VALUE x)
1652 {
1653     size_t xn = BIGNUM_LEN(x), zn = 2 * xn;
1654     VALUE z = bignew(zn, 1);
1655     bary_sq_fast(BDIGITS(z), zn, BDIGITS(x), xn);
1656     RB_GC_GUARD(x);
1657     return z;
1658 }
1659
1660 static inline size_t
1661 max_size(size_t a, size_t b)
1662 {
1663     return (a > b ? a : b);
1664 }
1665
1666 /* balancing multiplication by slicing larger argument */
1667 static void
1668 bary_mul_balance_with_mulfunc(BDIGIT *const zds, const size_t zn,
1669                               const BDIGIT *const xds, const size_t xn,
1670                               const BDIGIT *const yds, const size_t yn,
1671                               BDIGIT *wds, size_t wn, mulfunc_t *const mulfunc)
1672 {
1673     VALUE work = 0;
1674     size_t n;
1675
1676     RUBY_ASSERT(xn + yn <= zn);
1677     RUBY_ASSERT(xn <= yn);
1678     RUBY_ASSERT(!KARATSUBA_BALANCED(xn, yn) || !TOOM3_BALANCED(xn, yn));
1679
1680     BDIGITS_ZERO(zds, xn);
1681
1682     if (wn < xn) {
1683         /* The condition when a new buffer is needed:
1684          * 1. (2(xn+r) > zn-(yn-r)) => (2xn+r > zn-yn), at the last
1685          *    iteration (or r == 0)
1686          * 2. (2(xn+xn) > zn-(yn-r-xn)) => (3xn-r > zn-yn), at the
1687          *    previous iteration.
1688          */
1689         const size_t r = yn % xn;
1690         if (2*xn + yn + max_size(xn-r, r) > zn) {
1691             wn = xn;
1692             wds = ALLOCV_N(BDIGIT, work, wn);
1693         }
1694     }
1695
1696     n = 0;
1697     while (yn > n) {
1698         const size_t r = (xn > (yn - n) ? (yn - n) : xn);
1699         const size_t tn = (xn + r);
1700         if (2 * (xn + r) <= zn - n) {
1701             BDIGIT *const tds = zds + n + xn + r;
1702             mulfunc(tds, tn, xds, xn, yds + n, r, wds, wn);
1703             BDIGITS_ZERO(zds + n + xn, r);
1704             bary_add(zds + n, tn,
1705                      zds + n, tn,
1706                      tds, tn);
1707         }
1708         else {
1709             BDIGIT *const tds = zds + n;
1710             if (wn < xn) {
1711                 /* xn is invariant, only once here */
1712 #if 0
1713                 wn = xn;
1714                 wds = ALLOCV_N(BDIGIT, work, wn);
1715 #else
1716                 rb_bug("wds is not enough: %" PRIdSIZE " for %" PRIdSIZE, wn, xn);
1717 #endif
1718             }
1719             MEMCPY(wds, zds + n, BDIGIT, xn);
1720             mulfunc(tds, tn, xds, xn, yds + n, r, wds+xn, wn-xn);
1721             bary_add(zds + n, tn,
1722                      zds + n, tn,
1723                      wds, xn);
1724         }
1725         n += r;
1726     }
1727     BDIGITS_ZERO(zds+xn+yn, zn - (xn+yn));
1728
1729     if (work)
1730         ALLOCV_END(work);
1731 }
1732
1733 VALUE
1734 rb_big_mul_balance(VALUE x, VALUE y)
1735 {
1736     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), zn = xn + yn;
1737     VALUE z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
1738     bary_mul_balance_with_mulfunc(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn, NULL, 0, bary_mul_toom3_start);
1739     RB_GC_GUARD(x);
1740     RB_GC_GUARD(y);
1741     return z;
1742 }
1743
1744 /* multiplication by karatsuba method */
1745 static void
1746 bary_mul_karatsuba(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
1747 {
1748     VALUE work = 0;
1749
1750     size_t n;
1751     int sub_p, borrow, carry1, carry2, carry3;
1752
1753     int odd_y = 0;
1754     int odd_xy = 0;
1755     int sq;
1756
1757     const BDIGIT *xds0, *xds1, *yds0, *yds1;
1758     BDIGIT *zds0, *zds1, *zds2, *zds3;
1759
1760     RUBY_ASSERT(xn + yn <= zn);
1761     RUBY_ASSERT(xn <= yn);
1762     RUBY_ASSERT(yn < 2 * xn);
1763
1764     sq = xds == yds && xn == yn;
1765
1766     if (yn & 1) {
1767         odd_y = 1;
1768         yn--;
1769         if (yn < xn) {
1770             odd_xy = 1;
1771             xn--;
1772         }
1773     }
1774
1775     n = yn / 2;
1776
1777     RUBY_ASSERT(n < xn);
1778
1779     if (wn < n) {
1780         /* This function itself needs only n BDIGITs for work area.
1781          * However this function calls bary_mul_karatsuba and
1782          * bary_mul_balance recursively.
1783          * 2n BDIGITs are enough to avoid allocations in
1784          * the recursively called functions.
1785          */
1786         wn = 2*n;
1787         wds = ALLOCV_N(BDIGIT, work, wn);
1788     }
1789
1790     /* Karatsuba algorithm:
1791      *
1792      * x = x0 + r*x1
1793      * y = y0 + r*y1
1794      * z = x*y
1795      *   = (x0 + r*x1) * (y0 + r*y1)
1796      *   = x0*y0 + r*(x1*y0 + x0*y1) + r*r*x1*y1
1797      *   = x0*y0 + r*(x0*y0 + x1*y1 - (x1-x0)*(y1-y0)) + r*r*x1*y1
1798      *   = x0*y0 + r*(x0*y0 + x1*y1 - (x0-x1)*(y0-y1)) + r*r*x1*y1
1799      */
1800
1801     xds0 = xds;
1802     xds1 = xds + n;
1803     yds0 = yds;
1804     yds1 = yds + n;
1805     zds0 = zds;
1806     zds1 = zds + n;
1807     zds2 = zds + 2*n;
1808     zds3 = zds + 3*n;
1809
1810     sub_p = 1;
1811
1812     /* zds0:? zds1:? zds2:? zds3:? wds:? */
1813
1814     if (bary_sub(zds0, n, xds, n, xds+n, xn-n)) {
1815         bary_2comp(zds0, n);
1816         sub_p = !sub_p;
1817     }
1818
1819     /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:? */
1820
1821     if (sq) {
1822         sub_p = 1;
1823         bary_mul_karatsuba_start(zds1, 2*n, zds0, n, zds0, n, wds, wn);
1824     }
1825     else {
1826         if (bary_sub(wds, n, yds, n, yds+n, n)) {
1827             bary_2comp(wds, n);
1828             sub_p = !sub_p;
1829         }
1830
1831         /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:|y1-y0| */
1832
1833         bary_mul_karatsuba_start(zds1, 2*n, zds0, n, wds, n, wds+n, wn-n);
1834     }
1835
1836     /* zds0:|x1-x0| zds1,zds2:|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
1837
1838     borrow = 0;
1839     if (sub_p) {
1840         borrow = !bary_2comp(zds1, 2*n);
1841     }
1842     /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
1843
1844     MEMCPY(wds, zds1, BDIGIT, n);
1845
1846     /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
1847
1848     bary_mul_karatsuba_start(zds0, 2*n, xds0, n, yds0, n, wds+n, wn-n);
1849
1850     /* zds0,zds1:x0*y0 zds2:hi(-?|x1-x0|*|y1-y0|) zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
1851
1852     carry1 = bary_add(wds, n, wds, n, zds0, n);
1853     carry1 = bary_addc(zds2, n, zds2, n, zds1, n, carry1);
1854
1855     /* zds0,zds1:x0*y0 zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
1856
1857     carry2 = bary_add(zds1, n, zds1, n, wds, n);
1858
1859     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
1860
1861     MEMCPY(wds, zds2, BDIGIT, n);
1862
1863     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:_ zds3:? wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
1864
1865     bary_mul_karatsuba_start(zds2, zn-2*n, xds1, xn-n, yds1, n, wds+n, wn-n);
1866
1867     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
1868
1869     carry3 = bary_add(zds1, n, zds1, n, zds2, n);
1870
1871     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
1872
1873     carry3 = bary_addc(zds2, n, zds2, n, zds3, (4*n < zn ? n : zn-3*n), carry3);
1874
1875     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1) wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
1876
1877     bary_add(zds2, zn-2*n, zds2, zn-2*n, wds, n);
1878
1879     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1)+hi(x0*y0-?|x1-x0|*|y1-y0|) wds:_ */
1880
1881     if (carry2)
1882         bary_add_one(zds2, zn-2*n);
1883
1884     if (carry1 + carry3 - borrow < 0)
1885         bary_sub_one(zds3, zn-3*n);
1886     else if (carry1 + carry3 - borrow > 0) {
1887         BDIGIT c = carry1 + carry3 - borrow;
1888         bary_add(zds3, zn-3*n, zds3, zn-3*n, &c, 1);
1889     }
1890
1891     /*
1892     if (SIZEOF_BDIGIT * zn <= 16) {
1893         uint128_t z, x, y;
1894         ssize_t i;
1895         for (x = 0, i = xn-1; 0 <= i; i--) { x <<= SIZEOF_BDIGIT*CHAR_BIT; x |= xds[i]; }
1896         for (y = 0, i = yn-1; 0 <= i; i--) { y <<= SIZEOF_BDIGIT*CHAR_BIT; y |= yds[i]; }
1897         for (z = 0, i = zn-1; 0 <= i; i--) { z <<= SIZEOF_BDIGIT*CHAR_BIT; z |= zds[i]; }
1898         RUBY_ASSERT(z == x * y);
1899     }
1900     */
1901
1902     if (odd_xy) {
1903         bary_muladd_1xN(zds+yn, zn-yn, yds[yn], xds, xn);
1904         bary_muladd_1xN(zds+xn, zn-xn, xds[xn], yds, yn+1);
1905     }
1906     else if (odd_y) {
1907         bary_muladd_1xN(zds+yn, zn-yn, yds[yn], xds, xn);
1908     }
1909
1910     if (work)
1911         ALLOCV_END(work);
1912 }
1913
1914 VALUE
1915 rb_big_mul_karatsuba(VALUE x, VALUE y)
1916 {
1917     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), zn = xn + yn;
1918     VALUE z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
1919     if (!((xn <= yn && yn < 2) || KARATSUBA_BALANCED(xn, yn)))
1920         rb_raise(rb_eArgError, "unexpected bignum length for karatsuba");
1921     bary_mul_karatsuba(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn, NULL, 0);
1922     RB_GC_GUARD(x);
1923     RB_GC_GUARD(y);
1924     return z;
1925 }
1926
1927 static void
1928 bary_mul_toom3(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
1929 {
1930     size_t n;
1931     size_t wnc;
1932     VALUE work = 0;
1933
1934     /* "p" stands for "positive".  Actually it means "non-negative", though. */
1935     size_t x0n; const BDIGIT *x0ds;
1936     size_t x1n; const BDIGIT *x1ds;
1937     size_t x2n; const BDIGIT *x2ds;
1938     size_t y0n; const BDIGIT *y0ds;
1939     size_t y1n; const BDIGIT *y1ds;
1940     size_t y2n; const BDIGIT *y2ds;
1941
1942     size_t u1n; BDIGIT *u1ds; int u1p;
1943     size_t u2n; BDIGIT *u2ds; int u2p;
1944     size_t u3n; BDIGIT *u3ds; int u3p;
1945
1946     size_t v1n; BDIGIT *v1ds; int v1p;
1947     size_t v2n; BDIGIT *v2ds; int v2p;
1948     size_t v3n; BDIGIT *v3ds; int v3p;
1949
1950     size_t t0n; BDIGIT *t0ds; int t0p;
1951     size_t t1n; BDIGIT *t1ds; int t1p;
1952     size_t t2n; BDIGIT *t2ds; int t2p;
1953     size_t t3n; BDIGIT *t3ds; int t3p;
1954     size_t t4n; BDIGIT *t4ds; int t4p;
1955
1956     size_t z0n; BDIGIT *z0ds;
1957     size_t z1n; BDIGIT *z1ds; int z1p;
1958     size_t z2n; BDIGIT *z2ds; int z2p;
1959     size_t z3n; BDIGIT *z3ds; int z3p;
1960     size_t z4n; BDIGIT *z4ds;
1961
1962     size_t zzn; BDIGIT *zzds;
1963
1964     int sq = xds == yds && xn == yn;
1965
1966     RUBY_ASSERT(xn <= yn);  /* assume y >= x */
1967     RUBY_ASSERT(xn + yn <= zn);
1968
1969     n = (yn + 2) / 3;
1970     RUBY_ASSERT(2*n < xn);
1971
1972     wnc = 0;
1973
1974     wnc += (u1n = n+1); /* BITSPERDIG*n+2 bits */
1975     wnc += (u2n = n+1); /* BITSPERDIG*n+1 bits */
1976     wnc += (u3n = n+1); /* BITSPERDIG*n+3 bits */
1977     wnc += (v1n = n+1); /* BITSPERDIG*n+2 bits */
1978     wnc += (v2n = n+1); /* BITSPERDIG*n+1 bits */
1979     wnc += (v3n = n+1); /* BITSPERDIG*n+3 bits */
1980
1981     wnc += (t0n = 2*n); /* BITSPERDIG*2*n bits */
1982     wnc += (t1n = 2*n+2); /* BITSPERDIG*2*n+4 bits but bary_mul needs u1n+v1n */
1983     wnc += (t2n = 2*n+2); /* BITSPERDIG*2*n+2 bits but bary_mul needs u2n+v2n */
1984     wnc += (t3n = 2*n+2); /* BITSPERDIG*2*n+6 bits but bary_mul needs u3n+v3n */
1985     wnc += (t4n = 2*n); /* BITSPERDIG*2*n bits */
1986
1987     wnc += (z1n = 2*n+1); /* BITSPERDIG*2*n+5 bits */
1988     wnc += (z2n = 2*n+1); /* BITSPERDIG*2*n+6 bits */
1989     wnc += (z3n = 2*n+1); /* BITSPERDIG*2*n+8 bits */
1990
1991     if (wn < wnc) {
1992         wn = wnc * 3 / 2; /* Allocate working memory for whole recursion at once. */
1993         wds = ALLOCV_N(BDIGIT, work, wn);
1994     }
1995
1996     u1ds = wds; wds += u1n;
1997     u2ds = wds; wds += u2n;
1998     u3ds = wds; wds += u3n;
1999
2000     v1ds = wds; wds += v1n;
2001     v2ds = wds; wds += v2n;
2002     v3ds = wds; wds += v3n;
2003
2004     t0ds = wds; wds += t0n;
2005     t1ds = wds; wds += t1n;
2006     t2ds = wds; wds += t2n;
2007     t3ds = wds; wds += t3n;
2008     t4ds = wds; wds += t4n;
2009
2010     z1ds = wds; wds += z1n;
2011     z2ds = wds; wds += z2n;
2012     z3ds = wds; wds += z3n;
2013
2014     wn -= wnc;
2015
2016     zzds = u1ds;
2017     zzn = 6*n+1;
2018
2019     x0n = n;
2020     x1n = n;
2021     x2n = xn - 2*n;
2022     x0ds = xds;
2023     x1ds = xds + n;
2024     x2ds = xds + 2*n;
2025
2026     if (sq) {
2027         y0n = x0n;
2028         y1n = x1n;
2029         y2n = x2n;
2030         y0ds = x0ds;
2031         y1ds = x1ds;
2032         y2ds = x2ds;
2033     }
2034     else {
2035         y0n = n;
2036         y1n = n;
2037         y2n = yn - 2*n;
2038         y0ds = yds;
2039         y1ds = yds + n;
2040         y2ds = yds + 2*n;
2041     }
2042
2043     /*
2044      * ref. https://en.wikipedia.org/wiki/Toom%E2%80%93Cook_multiplication
2045      *
2046      * x(b) = x0 * b^0 + x1 * b^1 + x2 * b^2
2047      * y(b) = y0 * b^0 + y1 * b^1 + y2 * b^2
2048      *
2049      * z(b) = x(b) * y(b)
2050      * z(b) = z0 * b^0 + z1 * b^1 + z2 * b^2 + z3 * b^3 + z4 * b^4
2051      * where:
2052      *   z0 = x0 * y0
2053      *   z1 = x0 * y1 + x1 * y0
2054      *   z2 = x0 * y2 + x1 * y1 + x2 * y0
2055      *   z3 = x1 * y2 + x2 * y1
2056      *   z4 = x2 * y2
2057      *
2058      * Toom3 method (a.k.a. Toom-Cook method):
2059      * (Step1) calculating 5 points z(b0), z(b1), z(b2), z(b3), z(b4),
2060      * where:
2061      *   b0 = 0, b1 = 1, b2 = -1, b3 = -2, b4 = inf,
2062      *   z(0)   = x(0)   * y(0)   = x0 * y0
2063      *   z(1)   = x(1)   * y(1)   = (x0 + x1 + x2) * (y0 + y1 + y2)
2064      *   z(-1)  = x(-1)  * y(-1)  = (x0 - x1 + x2) * (y0 - y1 + y2)
2065      *   z(-2)  = x(-2)  * y(-2)  = (x0 - 2 * (x1 - 2 * x2)) * (y0 - 2 * (y1 - 2 * y2))
2066      *   z(inf) = x(inf) * y(inf) = x2 * y2
2067      *
2068      * (Step2) interpolating z0, z1, z2, z3 and z4.
2069      *
2070      * (Step3) Substituting base value into b of the polynomial z(b),
2071      */
2072
2073     /*
2074      * [Step1] calculating 5 points z(b0), z(b1), z(b2), z(b3), z(b4)
2075      */
2076
2077     /* u1 <- x0 + x2 */
2078     bary_add(u1ds, u1n, x0ds, x0n, x2ds, x2n);
2079     u1p = 1;
2080
2081     /* x(-1) : u2 <- u1 - x1 = x0 - x1 + x2 */
2082     if (bary_sub(u2ds, u2n, u1ds, u1n, x1ds, x1n)) {
2083         bary_2comp(u2ds, u2n);
2084         u2p = 0;
2085     }
2086     else {
2087         u2p = 1;
2088     }
2089
2090     /* x(1) : u1 <- u1 + x1 = x0 + x1 + x2 */
2091     bary_add(u1ds, u1n, u1ds, u1n, x1ds, x1n);
2092
2093     /* x(-2) : u3 <- 2 * (u2 + x2) - x0 = x0 - 2 * (x1 - 2 * x2) */
2094     u3p = 1;
2095     if (u2p) {
2096         bary_add(u3ds, u3n, u2ds, u2n, x2ds, x2n);
2097     }
2098     else if (bary_sub(u3ds, u3n, x2ds, x2n, u2ds, u2n)) {
2099         bary_2comp(u3ds, u3n);
2100         u3p = 0;
2101     }
2102     bary_small_lshift(u3ds, u3ds, u3n, 1);
2103     if (!u3p) {
2104         bary_add(u3ds, u3n, u3ds, u3n, x0ds, x0n);
2105     }
2106     else if (bary_sub(u3ds, u3n, u3ds, u3n, x0ds, x0n)) {
2107         bary_2comp(u3ds, u3n);
2108         u3p = 0;
2109     }
2110
2111     if (sq) {
2112         v1n = u1n; v1ds = u1ds; v1p = u1p;
2113         v2n = u2n; v2ds = u2ds; v2p = u2p;
2114         v3n = u3n; v3ds = u3ds; v3p = u3p;
2115     }
2116     else {
2117         /* v1 <- y0 + y2 */
2118         bary_add(v1ds, v1n, y0ds, y0n, y2ds, y2n);
2119         v1p = 1;
2120
2121         /* y(-1) : v2 <- v1 - y1 = y0 - y1 + y2 */
2122         v2p = 1;
2123         if (bary_sub(v2ds, v2n, v1ds, v1n, y1ds, y1n)) {
2124             bary_2comp(v2ds, v2n);
2125             v2p = 0;
2126         }
2127
2128         /* y(1) : v1 <- v1 + y1 = y0 + y1 + y2 */
2129         bary_add(v1ds, v1n, v1ds, v1n, y1ds, y1n);
2130
2131         /* y(-2) : v3 <- 2 * (v2 + y2) - y0 = y0 - 2 * (y1 - 2 * y2) */
2132         v3p = 1;
2133         if (v2p) {
2134             bary_add(v3ds, v3n, v2ds, v2n, y2ds, y2n);
2135         }
2136         else if (bary_sub(v3ds, v3n, y2ds, y2n, v2ds, v2n)) {
2137             bary_2comp(v3ds, v3n);
2138             v3p = 0;
2139         }
2140         bary_small_lshift(v3ds, v3ds, v3n, 1);
2141         if (!v3p) {
2142             bary_add(v3ds, v3n, v3ds, v3n, y0ds, y0n);
2143         }
2144         else if (bary_sub(v3ds, v3n, v3ds, v3n, y0ds, y0n)) {
2145             bary_2comp(v3ds, v3n);
2146             v3p = 0;
2147         }
2148     }
2149
2150     /* z(0) : t0 <- x0 * y0 */
2151     bary_mul_toom3_start(t0ds, t0n, x0ds, x0n, y0ds, y0n, wds, wn);
2152     t0p = 1;
2153
2154     /* z(1) : t1 <- u1 * v1 */
2155     bary_mul_toom3_start(t1ds, t1n, u1ds, u1n, v1ds, v1n, wds, wn);
2156     t1p = u1p == v1p;
2157     RUBY_ASSERT(t1ds[t1n-1] == 0);
2158     t1n--;
2159
2160     /* z(-1) : t2 <- u2 * v2 */
2161     bary_mul_toom3_start(t2ds, t2n, u2ds, u2n, v2ds, v2n, wds, wn);
2162     t2p = u2p == v2p;
2163     RUBY_ASSERT(t2ds[t2n-1] == 0);
2164     t2n--;
2165
2166     /* z(-2) : t3 <- u3 * v3 */
2167     bary_mul_toom3_start(t3ds, t3n, u3ds, u3n, v3ds, v3n, wds, wn);
2168     t3p = u3p == v3p;
2169     RUBY_ASSERT(t3ds[t3n-1] == 0);
2170     t3n--;
2171
2172     /* z(inf) : t4 <- x2 * y2 */
2173     bary_mul_toom3_start(t4ds, t4n, x2ds, x2n, y2ds, y2n, wds, wn);
2174     t4p = 1;
2175
2176     /*
2177      * [Step2] interpolating z0, z1, z2, z3 and z4.
2178      */
2179
2180     /* z0 <- z(0) == t0 */
2181     z0n = t0n; z0ds = t0ds;
2182
2183     /* z4 <- z(inf) == t4 */
2184     z4n = t4n; z4ds = t4ds;
2185
2186     /* z3 <- (z(-2) - z(1)) / 3 == (t3 - t1) / 3 */
2187     if (t3p == t1p) {
2188         z3p = t3p;
2189         if (bary_sub(z3ds, z3n, t3ds, t3n, t1ds, t1n)) {
2190             bary_2comp(z3ds, z3n);
2191             z3p = !z3p;
2192         }
2193     }
2194     else {
2195         z3p = t3p;
2196         bary_add(z3ds, z3n, t3ds, t3n, t1ds, t1n);
2197     }
2198     bigdivrem_single(z3ds, z3ds, z3n, 3);
2199
2200     /* z1 <- (z(1) - z(-1)) / 2 == (t1 - t2) / 2 */
2201     if (t1p == t2p) {
2202         z1p = t1p;
2203         if (bary_sub(z1ds, z1n, t1ds, t1n, t2ds, t2n)) {
2204             bary_2comp(z1ds, z1n);
2205             z1p = !z1p;
2206         }
2207     }
2208     else {
2209         z1p = t1p;
2210         bary_add(z1ds, z1n, t1ds, t1n, t2ds, t2n);
2211     }
2212     bary_small_rshift(z1ds, z1ds, z1n, 1, 0);
2213
2214     /* z2 <- z(-1) - z(0) == t2 - t0 */
2215     if (t2p == t0p) {
2216         z2p = t2p;
2217         if (bary_sub(z2ds, z2n, t2ds, t2n, t0ds, t0n)) {
2218             bary_2comp(z2ds, z2n);
2219             z2p = !z2p;
2220         }
2221     }
2222     else {
2223         z2p = t2p;
2224         bary_add(z2ds, z2n, t2ds, t2n, t0ds, t0n);
2225     }
2226
2227     /* z3 <- (z2 - z3) / 2 + 2 * z(inf) == (z2 - z3) / 2 + 2 * t4 */
2228     if (z2p == z3p) {
2229         z3p = z2p;
2230         if (bary_sub(z3ds, z3n, z2ds, z2n, z3ds, z3n)) {
2231             bary_2comp(z3ds, z3n);
2232             z3p = !z3p;
2233         }
2234     }
2235     else {
2236         z3p = z2p;
2237         bary_add(z3ds, z3n, z2ds, z2n, z3ds, z3n);
2238     }
2239     bary_small_rshift(z3ds, z3ds, z3n, 1, 0);
2240     if (z3p == t4p) {
2241         bary_muladd_1xN(z3ds, z3n, 2, t4ds, t4n);
2242     }
2243     else {
2244         if (bary_mulsub_1xN(z3ds, z3n, 2, t4ds, t4n)) {
2245             bary_2comp(z3ds, z3n);
2246             z3p = !z3p;
2247         }
2248     }
2249
2250     /* z2 <- z2 + z1 - z(inf) == z2 + z1 - t4 */
2251     if (z2p == z1p) {
2252         bary_add(z2ds, z2n, z2ds, z2n, z1ds, z1n);
2253     }
2254     else {
2255         if (bary_sub(z2ds, z2n, z2ds, z2n, z1ds, z1n)) {
2256             bary_2comp(z2ds, z2n);
2257             z2p = !z2p;
2258         }
2259     }
2260
2261     if (z2p == t4p) {
2262         if (bary_sub(z2ds, z2n, z2ds, z2n, t4ds, t4n)) {
2263             bary_2comp(z2ds, z2n);
2264             z2p = !z2p;
2265         }
2266     }
2267     else {
2268         bary_add(z2ds, z2n, z2ds, z2n, t4ds, t4n);
2269     }
2270
2271     /* z1 <- z1 - z3 */
2272     if (z1p == z3p) {
2273         if (bary_sub(z1ds, z1n, z1ds, z1n, z3ds, z3n)) {
2274             bary_2comp(z1ds, z1n);
2275             z1p = !z1p;
2276         }
2277     }
2278     else {
2279         bary_add(z1ds, z1n, z1ds, z1n, z3ds, z3n);
2280     }
2281
2282     /*
2283      * [Step3] Substituting base value into b of the polynomial z(b),
2284      */
2285
2286     MEMCPY(zzds, z0ds, BDIGIT, z0n);
2287     BDIGITS_ZERO(zzds + z0n, 4*n - z0n);
2288     MEMCPY(zzds + 4*n, z4ds, BDIGIT, z4n);
2289     BDIGITS_ZERO(zzds + 4*n + z4n, zzn - (4*n + z4n));
2290     if (z1p)
2291         bary_add(zzds +   n, zzn -   n, zzds +   n, zzn -   n, z1ds, z1n);
2292     else
2293         bary_sub(zzds +   n, zzn -   n, zzds +   n, zzn -   n, z1ds, z1n);
2294     if (z2p)
2295         bary_add(zzds + 2*n, zzn - 2*n, zzds + 2*n, zzn - 2*n, z2ds, z2n);
2296     else
2297         bary_sub(zzds + 2*n, zzn - 2*n, zzds + 2*n, zzn - 2*n, z2ds, z2n);
2298     if (z3p)
2299         bary_add(zzds + 3*n, zzn - 3*n, zzds + 3*n, zzn - 3*n, z3ds, z3n);
2300     else
2301         bary_sub(zzds + 3*n, zzn - 3*n, zzds + 3*n, zzn - 3*n, z3ds, z3n);
2302
2303     BARY_TRUNC(zzds, zzn);
2304     MEMCPY(zds, zzds, BDIGIT, zzn);
2305     BDIGITS_ZERO(zds + zzn, zn - zzn);
2306
2307     if (work)
2308         ALLOCV_END(work);
2309 }
2310
2311 VALUE
2312 rb_big_mul_toom3(VALUE x, VALUE y)
2313 {
2314     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), zn = xn + yn;
2315     VALUE z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
2316     if (xn > yn || yn < 3 || !TOOM3_BALANCED(xn,yn))
2317         rb_raise(rb_eArgError, "unexpected bignum length for toom3");
2318     bary_mul_toom3(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn, NULL, 0);
2319     RB_GC_GUARD(x);
2320     RB_GC_GUARD(y);
2321     return z;
2322 }
2323
2324 #if USE_GMP
2325 static inline void
2326 bdigits_to_mpz(mpz_t mp, const BDIGIT *digits, size_t len)
2327 {
2328     const size_t nails = (sizeof(BDIGIT)-SIZEOF_BDIGIT)*CHAR_BIT;
2329     mpz_import(mp, len, -1, sizeof(BDIGIT), 0, nails, digits);
2330 }
2331
2332 static inline void
2333 bdigits_from_mpz(mpz_t mp, BDIGIT *digits, size_t *len)
2334 {
2335     const size_t nails = (sizeof(BDIGIT)-SIZEOF_BDIGIT)*CHAR_BIT;
2336     mpz_export(digits, len, -1, sizeof(BDIGIT), 0, nails, mp);
2337 }
2338
2339 static void
2340 bary_mul_gmp(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2341 {
2342     mpz_t x, y, z;
2343     size_t count;
2344
2345     RUBY_ASSERT(xn + yn <= zn);
2346
2347     mpz_init(x);
2348     mpz_init(y);
2349     mpz_init(z);
2350     bdigits_to_mpz(x, xds, xn);
2351     if (xds == yds && xn == yn) {
2352         mpz_mul(z, x, x);
2353     }
2354     else {
2355         bdigits_to_mpz(y, yds, yn);
2356         mpz_mul(z, x, y);
2357     }
2358     bdigits_from_mpz(z, zds, &count);
2359     BDIGITS_ZERO(zds+count, zn-count);
2360     mpz_clear(x);
2361     mpz_clear(y);
2362     mpz_clear(z);
2363 }
2364
2365 VALUE
2366 rb_big_mul_gmp(VALUE x, VALUE y)
2367 {
2368     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), zn = xn + yn;
2369     VALUE z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
2370     bary_mul_gmp(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn);
2371     RB_GC_GUARD(x);
2372     RB_GC_GUARD(y);
2373     return z;
2374 }
2375 #endif
2376
2377 static void
2378 bary_short_mul(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2379 {
2380     RUBY_ASSERT(xn + yn <= zn);
2381
2382     if (xn == 1 && yn == 1) {
2383         bary_mul_single(zds, zn, xds[0], yds[0]);
2384     }
2385     else {
2386         bary_mul_normal(zds, zn, xds, xn, yds, yn);
2387         rb_thread_check_ints();
2388     }
2389 }
2390
2391 /* determine whether a bignum is sparse or not by random sampling */
2392 static inline int
2393 bary_sparse_p(const BDIGIT *ds, size_t n)
2394 {
2395     long c = 0;
2396
2397     if (          ds[2 * n / 5]) c++;
2398     if (c <= 1 && ds[    n / 2]) c++;
2399     if (c <= 1 && ds[3 * n / 5]) c++;
2400
2401     return (c <= 1) ? 1 : 0;
2402 }
2403
2404 static int
2405 bary_mul_precheck(BDIGIT **zdsp, size_t *znp, const BDIGIT **xdsp, size_t *xnp, const BDIGIT **ydsp, size_t *ynp)
2406 {
2407     size_t nlsz; /* number of least significant zero BDIGITs */
2408
2409     BDIGIT *zds = *zdsp;
2410     size_t zn = *znp;
2411     const BDIGIT *xds = *xdsp;
2412     size_t xn = *xnp;
2413     const BDIGIT *yds = *ydsp;
2414     size_t yn = *ynp;
2415
2416     RUBY_ASSERT(xn + yn <= zn);
2417
2418     nlsz = 0;
2419
2420     while (0 < xn) {
2421         if (xds[xn-1] == 0) {
2422             xn--;
2423         }
2424         else {
2425             do {
2426                 if (xds[0] != 0)
2427                     break;
2428                 xds++;
2429                 xn--;
2430                 nlsz++;
2431             } while (0 < xn);
2432             break;
2433         }
2434     }
2435
2436     while (0 < yn) {
2437         if (yds[yn-1] == 0) {
2438             yn--;
2439         }
2440         else {
2441             do {
2442                 if (yds[0] != 0)
2443                     break;
2444                 yds++;
2445                 yn--;
2446                 nlsz++;
2447             } while (0 < yn);
2448             break;
2449         }
2450     }
2451
2452     if (nlsz) {
2453         BDIGITS_ZERO(zds, nlsz);
2454         zds += nlsz;
2455         zn -= nlsz;
2456     }
2457
2458     /* make sure that y is longer than x */
2459     if (xn > yn) {
2460         const BDIGIT *tds;
2461         size_t tn;
2462         tds = xds; xds = yds; yds = tds;
2463         tn = xn; xn = yn; yn = tn;
2464     }
2465     RUBY_ASSERT(xn <= yn);
2466
2467     if (xn <= 1) {
2468         if (xn == 0) {
2469             BDIGITS_ZERO(zds, zn);
2470             return 1;
2471         }
2472
2473         if (xds[0] == 1) {
2474             MEMCPY(zds, yds, BDIGIT, yn);
2475             BDIGITS_ZERO(zds+yn, zn-yn);
2476             return 1;
2477         }
2478         if (POW2_P(xds[0])) {
2479             zds[yn] = bary_small_lshift(zds, yds, yn, bit_length(xds[0])-1);
2480             BDIGITS_ZERO(zds+yn+1, zn-yn-1);
2481             return 1;
2482         }
2483         if (yn == 1 && yds[0] == 1) {
2484             zds[0] = xds[0];
2485             BDIGITS_ZERO(zds+1, zn-1);
2486             return 1;
2487         }
2488         bary_mul_normal(zds, zn, xds, xn, yds, yn);
2489         return 1;
2490     }
2491
2492     *zdsp = zds;
2493     *znp = zn;
2494     *xdsp = xds;
2495     *xnp = xn;
2496     *ydsp = yds;
2497     *ynp = yn;
2498
2499     return 0;
2500 }
2501
2502 static void
2503 bary_mul_karatsuba_branch(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
2504 {
2505     /* normal multiplication when x is small */
2506     if (xn < KARATSUBA_MUL_DIGITS) {
2507         goto normal;
2508     }
2509
2510     /* normal multiplication when x or y is a sparse bignum */
2511     if (bary_sparse_p(xds, xn)) goto normal;
2512     if (bary_sparse_p(yds, yn)) {
2513         bary_short_mul(zds, zn, yds, yn, xds, xn);
2514         return;
2515     }
2516
2517     /* balance multiplication by slicing y when x is much smaller than y */
2518     if (!KARATSUBA_BALANCED(xn, yn)) {
2519         bary_mul_balance_with_mulfunc(zds, zn, xds, xn, yds, yn, wds, wn, bary_mul_karatsuba_start);
2520         return;
2521     }
2522
2523     /* multiplication by karatsuba method */
2524     bary_mul_karatsuba(zds, zn, xds, xn, yds, yn, wds, wn);
2525     return;
2526
2527   normal:
2528     if (xds == yds && xn == yn) {
2529         bary_sq_fast(zds, zn, xds, xn);
2530     }
2531     else {
2532         bary_short_mul(zds, zn, xds, xn, yds, yn);
2533     }
2534 }
2535
2536 static void
2537 bary_mul_karatsuba_start(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
2538 {
2539     if (bary_mul_precheck(&zds, &zn, &xds, &xn, &yds, &yn))
2540         return;
2541
2542     bary_mul_karatsuba_branch(zds, zn, xds, xn, yds, yn, wds, wn);
2543 }
2544
2545 static void
2546 bary_mul_toom3_branch(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
2547 {
2548     if (xn < TOOM3_MUL_DIGITS) {
2549         bary_mul_karatsuba_branch(zds, zn, xds, xn, yds, yn, wds, wn);
2550         return;
2551     }
2552
2553     if (!TOOM3_BALANCED(xn, yn)) {
2554         bary_mul_balance_with_mulfunc(zds, zn, xds, xn, yds, yn, wds, wn, bary_mul_toom3_start);
2555         return;
2556     }
2557
2558     bary_mul_toom3(zds, zn, xds, xn, yds, yn, wds, wn);
2559 }
2560
2561 static void
2562 bary_mul_toom3_start(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
2563 {
2564     if (bary_mul_precheck(&zds, &zn, &xds, &xn, &yds, &yn))
2565         return;
2566
2567     bary_mul_toom3_branch(zds, zn, xds, xn, yds, yn, wds, wn);
2568 }
2569
2570 static void
2571 bary_mul(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2572 {
2573     if (xn <= yn) {
2574         if (xn < NAIVE_MUL_DIGITS) {
2575             if (xds == yds && xn == yn)
2576                 bary_sq_fast(zds, zn, xds, xn);
2577             else
2578                 bary_short_mul(zds, zn, xds, xn, yds, yn);
2579             return;
2580         }
2581     }
2582     else {
2583         if (yn < NAIVE_MUL_DIGITS) {
2584             bary_short_mul(zds, zn, yds, yn, xds, xn);
2585             return;
2586         }
2587     }
2588
2589 #if USE_GMP
2590     bary_mul_gmp(zds, zn, xds, xn, yds, yn);
2591 #else
2592     bary_mul_toom3_start(zds, zn, xds, xn, yds, yn, NULL, 0);
2593 #endif
2594 }
2595
2596 struct big_div_struct {
2597     size_t yn, zn;
2598     BDIGIT *yds, *zds;
2599     volatile VALUE stop;
2600 };
2601
2602 static void *
2603 bigdivrem1(void *ptr)
2604 {
2605     struct big_div_struct *bds = (struct big_div_struct*)ptr;
2606     size_t yn = bds->yn;
2607     size_t zn = bds->zn;
2608     BDIGIT *yds = bds->yds, *zds = bds->zds;
2609     BDIGIT_DBL_SIGNED num;
2610     BDIGIT q;
2611
2612     do {
2613         if (bds->stop) {
2614             bds->zn = zn;
2615             return 0;
2616         }
2617         if (zds[zn-1] == yds[yn-1]) q = BDIGMAX;
2618         else q = (BDIGIT)((BIGUP(zds[zn-1]) + zds[zn-2])/yds[yn-1]);
2619         if (q) {
2620             num = bigdivrem_mulsub(zds+zn-(yn+1), yn+1,
2621                                    q,
2622                                    yds, yn);
2623             while (num) { /* "add back" required */
2624                 q--;
2625                 num = bary_add(zds+zn-(yn+1), yn,
2626                                zds+zn-(yn+1), yn,
2627                                yds, yn);
2628                 num--;
2629             }
2630         }
2631         zn--;
2632         zds[zn] = q;
2633     } while (zn > yn);
2634     return 0;
2635 }
2636
2637 /* async-signal-safe */
2638 static void
2639 rb_big_stop(void *ptr)
2640 {
2641     struct big_div_struct *bds = ptr;
2642     bds->stop = Qtrue;
2643 }
2644
2645 static BDIGIT
2646 bigdivrem_single1(BDIGIT *qds, const BDIGIT *xds, size_t xn, BDIGIT x_higher_bdigit, BDIGIT y)
2647 {
2648     RUBY_ASSERT(0 < xn);
2649     RUBY_ASSERT(x_higher_bdigit < y);
2650     if (POW2_P(y)) {
2651         BDIGIT r;
2652         r = xds[0] & (y-1);
2653         bary_small_rshift(qds, xds, xn, bit_length(y)-1, x_higher_bdigit);
2654         return r;
2655     }
2656     else {
2657         size_t i;
2658         BDIGIT_DBL t2;
2659         t2 = x_higher_bdigit;
2660         for (i = 0; i < xn; i++) {
2661             t2 = BIGUP(t2) + xds[xn - i - 1];
2662             qds[xn - i - 1] = (BDIGIT)(t2 / y);
2663             t2 %= y;
2664         }
2665         return (BDIGIT)t2;
2666     }
2667 }
2668
2669 static BDIGIT
2670 bigdivrem_single(BDIGIT *qds, const BDIGIT *xds, size_t xn, BDIGIT y)
2671 {
2672     return bigdivrem_single1(qds, xds, xn, 0, y);
2673 }
2674
2675 static void
2676 bigdivrem_restoring(BDIGIT *zds, size_t zn, BDIGIT *yds, size_t yn)
2677 {
2678     struct big_div_struct bds;
2679     size_t ynzero;
2680
2681     RUBY_ASSERT(yn < zn);
2682     RUBY_ASSERT(BDIGIT_MSB(yds[yn-1]));
2683     RUBY_ASSERT(zds[zn-1] < yds[yn-1]);
2684
2685     for (ynzero = 0; !yds[ynzero]; ynzero++);
2686
2687     if (ynzero+1 == yn) {
2688         BDIGIT r;
2689         r = bigdivrem_single1(zds+yn, zds+ynzero, zn-yn, zds[zn-1], yds[ynzero]);
2690         zds[ynzero] = r;
2691         return;
2692     }
2693
2694     bds.yn = yn - ynzero;
2695     bds.zds = zds + ynzero;
2696     bds.yds = yds + ynzero;
2697     bds.stop = Qfalse;
2698     bds.zn = zn - ynzero;
2699     if (bds.zn > 10000 || bds.yn > 10000) {
2700       retry:
2701         bds.stop = Qfalse;
2702         rb_nogvl(bigdivrem1, &bds, rb_big_stop, &bds, RB_NOGVL_UBF_ASYNC_SAFE);
2703
2704         if (bds.stop == Qtrue) {
2705             /* execute trap handler, but exception was not raised. */
2706             goto retry;
2707         }
2708     }
2709     else {
2710         bigdivrem1(&bds);
2711     }
2712 }
2713
2714 static void
2715 bary_divmod_normal(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2716 {
2717     int shift;
2718     BDIGIT *zds, *yyds;
2719     size_t zn;
2720     VALUE tmpyz = 0;
2721
2722     RUBY_ASSERT(yn < xn || (xn == yn && yds[yn - 1] <= xds[xn - 1]));
2723     RUBY_ASSERT(qds ? (xn - yn + 1) <= qn : 1);
2724     RUBY_ASSERT(rds ? yn <= rn : 1);
2725
2726     zn = xn + BIGDIVREM_EXTRA_WORDS;
2727
2728     shift = nlz(yds[yn-1]);
2729     if (shift) {
2730         int alloc_y = !rds;
2731         int alloc_z = !qds || qn < zn;
2732         if (alloc_y && alloc_z) {
2733             yyds = ALLOCV_N(BDIGIT, tmpyz, yn+zn);
2734             zds = yyds + yn;
2735         }
2736         else {
2737             yyds = alloc_y ? ALLOCV_N(BDIGIT, tmpyz, yn) : rds;
2738             zds = alloc_z ? ALLOCV_N(BDIGIT, tmpyz, zn) : qds;
2739         }
2740         zds[xn] = bary_small_lshift(zds, xds, xn, shift);
2741         bary_small_lshift(yyds, yds, yn, shift);
2742     }
2743     else {
2744         if (qds && zn <= qn)
2745             zds = qds;
2746         else
2747             zds = ALLOCV_N(BDIGIT, tmpyz, zn);
2748         MEMCPY(zds, xds, BDIGIT, xn);
2749         zds[xn] = 0;
2750         /* bigdivrem_restoring will not modify y.
2751          * So use yds directly.  */
2752         yyds = (BDIGIT *)yds;
2753     }
2754
2755     bigdivrem_restoring(zds, zn, yyds, yn);
2756
2757     if (rds) {
2758         if (shift)
2759             bary_small_rshift(rds, zds, yn, shift, 0);
2760         else
2761             MEMCPY(rds, zds, BDIGIT, yn);
2762         BDIGITS_ZERO(rds+yn, rn-yn);
2763     }
2764
2765     if (qds) {
2766         size_t j = zn - yn;
2767         MEMMOVE(qds, zds+yn, BDIGIT, j);
2768         BDIGITS_ZERO(qds+j, qn-j);
2769     }
2770
2771     if (tmpyz)
2772         ALLOCV_END(tmpyz);
2773 }
2774
2775 VALUE
2776 rb_big_divrem_normal(VALUE x, VALUE y)
2777 {
2778     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), qn, rn;
2779     BDIGIT *xds = BDIGITS(x), *yds = BDIGITS(y), *qds, *rds;
2780     VALUE q, r;
2781
2782     BARY_TRUNC(yds, yn);
2783     if (yn == 0)
2784         rb_num_zerodiv();
2785     BARY_TRUNC(xds, xn);
2786
2787     if (xn < yn || (xn == yn && xds[xn - 1] < yds[yn - 1]))
2788         return rb_assoc_new(LONG2FIX(0), x);
2789
2790     qn = xn + BIGDIVREM_EXTRA_WORDS;
2791     q = bignew(qn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
2792     qds = BDIGITS(q);
2793
2794     rn = yn;
2795     r = bignew(rn, BIGNUM_SIGN(x));
2796     rds = BDIGITS(r);
2797
2798     bary_divmod_normal(qds, qn, rds, rn, xds, xn, yds, yn);
2799
2800     bigtrunc(q);
2801     bigtrunc(r);
2802
2803     RB_GC_GUARD(x);
2804     RB_GC_GUARD(y);
2805
2806     return rb_assoc_new(q, r);
2807 }
2808
2809 #if USE_GMP
2810 static void
2811 bary_divmod_gmp(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2812 {
2813     mpz_t x, y, q, r;
2814     size_t count;
2815
2816     RUBY_ASSERT(yn < xn || (xn == yn && yds[yn - 1] <= xds[xn - 1]));
2817     RUBY_ASSERT(qds ? (xn - yn + 1) <= qn : 1);
2818     RUBY_ASSERT(rds ? yn <= rn : 1);
2819     RUBY_ASSERT(qds || rds);
2820
2821     mpz_init(x);
2822     mpz_init(y);
2823     if (qds) mpz_init(q);
2824     if (rds) mpz_init(r);
2825
2826     bdigits_to_mpz(x, xds, xn);
2827     bdigits_to_mpz(y, yds, yn);
2828
2829     if (!rds) {
2830         mpz_fdiv_q(q, x, y);
2831     }
2832     else if (!qds) {
2833         mpz_fdiv_r(r, x, y);
2834     }
2835     else {
2836         mpz_fdiv_qr(q, r, x, y);
2837     }
2838
2839     mpz_clear(x);
2840     mpz_clear(y);
2841
2842     if (qds) {
2843         bdigits_from_mpz(q, qds, &count);
2844         BDIGITS_ZERO(qds+count, qn-count);
2845         mpz_clear(q);
2846     }
2847
2848     if (rds) {
2849         bdigits_from_mpz(r, rds, &count);
2850         BDIGITS_ZERO(rds+count, rn-count);
2851         mpz_clear(r);
2852     }
2853 }
2854
2855 VALUE
2856 rb_big_divrem_gmp(VALUE x, VALUE y)
2857 {
2858     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), qn, rn;
2859     BDIGIT *xds = BDIGITS(x), *yds = BDIGITS(y), *qds, *rds;
2860     VALUE q, r;
2861
2862     BARY_TRUNC(yds, yn);
2863     if (yn == 0)
2864         rb_num_zerodiv();
2865     BARY_TRUNC(xds, xn);
2866
2867     if (xn < yn || (xn == yn && xds[xn - 1] < yds[yn - 1]))
2868         return rb_assoc_new(LONG2FIX(0), x);
2869
2870     qn = xn - yn + 1;
2871     q = bignew(qn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
2872     qds = BDIGITS(q);
2873
2874     rn = yn;
2875     r = bignew(rn, BIGNUM_SIGN(x));
2876     rds = BDIGITS(r);
2877
2878     bary_divmod_gmp(qds, qn, rds, rn, xds, xn, yds, yn);
2879
2880     bigtrunc(q);
2881     bigtrunc(r);
2882
2883     RB_GC_GUARD(x);
2884     RB_GC_GUARD(y);
2885
2886     return rb_assoc_new(q, r);
2887 }
2888 #endif
2889
2890 static void
2891 bary_divmod_branch(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2892 {
2893 #if USE_GMP
2894     if (GMP_DIV_DIGITS < xn) {
2895         bary_divmod_gmp(qds, qn, rds, rn, xds, xn, yds, yn);
2896         return;
2897     }
2898 #endif
2899     bary_divmod_normal(qds, qn, rds, rn, xds, xn, yds, yn);
2900 }
2901
2902 static void
2903 bary_divmod(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2904 {
2905     RUBY_ASSERT(xn <= qn);
2906     RUBY_ASSERT(yn <= rn);
2907
2908     BARY_TRUNC(yds, yn);
2909     if (yn == 0)
2910         rb_num_zerodiv();
2911
2912     BARY_TRUNC(xds, xn);
2913     if (xn == 0) {
2914         BDIGITS_ZERO(qds, qn);
2915         BDIGITS_ZERO(rds, rn);
2916         return;
2917     }
2918
2919     if (xn < yn || (xn == yn && xds[xn - 1] < yds[yn - 1])) {
2920         MEMCPY(rds, xds, BDIGIT, xn);
2921         BDIGITS_ZERO(rds+xn, rn-xn);
2922         BDIGITS_ZERO(qds, qn);
2923     }
2924     else if (yn == 1) {
2925         MEMCPY(qds, xds, BDIGIT, xn);
2926         BDIGITS_ZERO(qds+xn, qn-xn);
2927         rds[0] = bigdivrem_single(qds, xds, xn, yds[0]);
2928         BDIGITS_ZERO(rds+1, rn-1);
2929     }
2930     else if (xn == 2 && yn == 2) {
2931         BDIGIT_DBL x = bary2bdigitdbl(xds, 2);
2932         BDIGIT_DBL y = bary2bdigitdbl(yds, 2);
2933         BDIGIT_DBL q = x / y;
2934         BDIGIT_DBL r = x % y;
2935         qds[0] = BIGLO(q);
2936         qds[1] = BIGLO(BIGDN(q));
2937         BDIGITS_ZERO(qds+2, qn-2);
2938         rds[0] = BIGLO(r);
2939         rds[1] = BIGLO(BIGDN(r));
2940         BDIGITS_ZERO(rds+2, rn-2);
2941     }
2942     else {
2943         bary_divmod_branch(qds, qn, rds, rn, xds, xn, yds, yn);
2944     }
2945 }
2946
2947
2948 #ifndef BIGNUM_DEBUG
2949 # define BIGNUM_DEBUG (0+RUBY_DEBUG)
2950 #endif
2951
2952 static int
2953 bigzero_p(VALUE x)
2954 {
2955     return bary_zero_p(BDIGITS(x), BIGNUM_LEN(x));
2956 }
2957
2958 int
2959 rb_bigzero_p(VALUE x)
2960 {
2961     return BIGZEROP(x);
2962 }
2963
2964 int
2965 rb_cmpint(VALUE val, VALUE a, VALUE b)
2966 {
2967     if (NIL_P(val)) {
2968         rb_cmperr(a, b);
2969     }
2970     if (FIXNUM_P(val)) {
2971         long l = FIX2LONG(val);
2972         if (l > 0) return 1;
2973         if (l < 0) return -1;
2974         return 0;
2975     }
2976     if (RB_BIGNUM_TYPE_P(val)) {
2977         if (BIGZEROP(val)) return 0;
2978         if (BIGNUM_SIGN(val)) return 1;
2979         return -1;
2980     }
2981     if (RTEST(rb_funcall(val, '>', 1, INT2FIX(0)))) return 1;
2982     if (RTEST(rb_funcall(val, '<', 1, INT2FIX(0)))) return -1;
2983     return 0;
2984 }
2985
2986 #define BIGNUM_SET_LEN(b,l) \
2987     (BIGNUM_EMBED_P(b) ? \
2988      (void)(RBASIC(b)->flags = \
2989             (RBASIC(b)->flags & ~BIGNUM_EMBED_LEN_MASK) | \
2990             ((l) << BIGNUM_EMBED_LEN_SHIFT)) : \
2991      (void)(RBIGNUM(b)->as.heap.len = (l)))
2992
2993 static void
2994 rb_big_realloc(VALUE big, size_t len)
2995 {
2996     BDIGIT *ds;
2997     if (BIGNUM_EMBED_P(big)) {
2998         if (BIGNUM_EMBED_LEN_MAX < len) {
2999             ds = ALLOC_N(BDIGIT, len);
3000             MEMCPY(ds, RBIGNUM(big)->as.ary, BDIGIT, BIGNUM_EMBED_LEN_MAX);
3001             RBIGNUM(big)->as.heap.len = BIGNUM_LEN(big);
3002             RBIGNUM(big)->as.heap.digits = ds;
3003             FL_UNSET_RAW(big, BIGNUM_EMBED_FLAG);
3004         }
3005     }
3006     else {
3007         if (len <= BIGNUM_EMBED_LEN_MAX) {
3008             ds = RBIGNUM(big)->as.heap.digits;
3009             FL_SET_RAW(big, BIGNUM_EMBED_FLAG);
3010             BIGNUM_SET_LEN(big, len);
3011             (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)RBIGNUM(big)->as.ary, sizeof(RBIGNUM(big)->as.ary));
3012             if (ds) {
3013                 MEMCPY(RBIGNUM(big)->as.ary, ds, BDIGIT, len);
3014                 xfree(ds);
3015             }
3016         }
3017         else {
3018             if (BIGNUM_LEN(big) == 0) {
3019                 RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len);
3020             }
3021             else {
3022                 REALLOC_N(RBIGNUM(big)->as.heap.digits, BDIGIT, len);
3023             }
3024         }
3025     }
3026 }
3027
3028 void
3029 rb_big_resize(VALUE big, size_t len)
3030 {
3031     rb_big_realloc(big, len);
3032     BIGNUM_SET_LEN(big, len);
3033 }
3034
3035 static VALUE
3036 bignew_1(VALUE klass, size_t len, int sign)
3037 {
3038     NEWOBJ_OF(big, struct RBignum, klass,
3039             T_BIGNUM | (RGENGC_WB_PROTECTED_BIGNUM ? FL_WB_PROTECTED : 0), sizeof(struct RBignum), 0);
3040     VALUE bigv = (VALUE)big;
3041     BIGNUM_SET_SIGN(bigv, sign);
3042     if (len <= BIGNUM_EMBED_LEN_MAX) {
3043         FL_SET_RAW(bigv, BIGNUM_EMBED_FLAG);
3044         BIGNUM_SET_LEN(bigv, len);
3045         (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)big->as.ary, sizeof(big->as.ary));
3046     }
3047     else {
3048         big->as.heap.digits = ALLOC_N(BDIGIT, len);
3049         big->as.heap.len = len;
3050     }
3051     OBJ_FREEZE(bigv);
3052     return bigv;
3053 }
3054
3055 VALUE
3056 rb_big_new(size_t len, int sign)
3057 {
3058     return bignew(len, sign != 0);
3059 }
3060
3061 VALUE
3062 rb_big_clone(VALUE x)
3063 {
3064     size_t len = BIGNUM_LEN(x);
3065     VALUE z = bignew_1(CLASS_OF(x), len, BIGNUM_SIGN(x));
3066
3067     MEMCPY(BDIGITS(z), BDIGITS(x), BDIGIT, len);
3068     return z;
3069 }
3070
3071 static void
3072 big_extend_carry(VALUE x)
3073 {
3074     rb_big_resize(x, BIGNUM_LEN(x)+1);
3075     BDIGITS(x)[BIGNUM_LEN(x)-1] = 1;
3076 }
3077
3078 /* modify a bignum by 2's complement */
3079 static void
3080 get2comp(VALUE x)
3081 {
3082     long i = BIGNUM_LEN(x);
3083     BDIGIT *ds = BDIGITS(x);
3084
3085     if (bary_2comp(ds, i)) {
3086         big_extend_carry(x);
3087     }
3088 }
3089
3090 void
3091 rb_big_2comp(VALUE x)                   /* get 2's complement */
3092 {
3093     get2comp(x);
3094 }
3095
3096 static BDIGIT
3097 abs2twocomp(VALUE *xp, long *n_ret)
3098 {
3099     VALUE x = *xp;
3100     long n = BIGNUM_LEN(x);
3101     BDIGIT *ds = BDIGITS(x);
3102     BDIGIT hibits = 0;
3103
3104     BARY_TRUNC(ds, n);
3105
3106     if (n != 0 && BIGNUM_NEGATIVE_P(x)) {
3107         VALUE z = bignew_1(CLASS_OF(x), n, 0);
3108         MEMCPY(BDIGITS(z), ds, BDIGIT, n);
3109         bary_2comp(BDIGITS(z), n);
3110         hibits = BDIGMAX;
3111         *xp = z;
3112     }
3113     *n_ret = n;
3114     return hibits;
3115 }
3116
3117 static void
3118 twocomp2abs_bang(VALUE x, int hibits)
3119 {
3120     BIGNUM_SET_SIGN(x, !hibits);
3121     if (hibits) {
3122         get2comp(x);
3123     }
3124 }
3125
3126 static inline VALUE
3127 bigtrunc(VALUE x)
3128 {
3129     size_t len = BIGNUM_LEN(x);
3130     BDIGIT *ds = BDIGITS(x);
3131
3132     if (len == 0) return x;
3133     while (--len && !ds[len]);
3134     if (BIGNUM_LEN(x) > len+1) {
3135         rb_big_resize(x, len+1);
3136     }
3137     return x;
3138 }
3139
3140 static inline VALUE
3141 bigfixize(VALUE x)
3142 {
3143     size_t n = BIGNUM_LEN(x);
3144     BDIGIT *ds = BDIGITS(x);
3145 #if SIZEOF_BDIGIT < SIZEOF_LONG
3146     unsigned long u;
3147 #else
3148     BDIGIT u;
3149 #endif
3150
3151     BARY_TRUNC(ds, n);
3152
3153     if (n == 0) return INT2FIX(0);
3154
3155 #if SIZEOF_BDIGIT < SIZEOF_LONG
3156     if (sizeof(long)/SIZEOF_BDIGIT < n)
3157         goto return_big;
3158     else {
3159         int i = (int)n;
3160         u = 0;
3161         while (i--) {
3162             u = (unsigned long)(BIGUP(u) + ds[i]);
3163         }
3164     }
3165 #else /* SIZEOF_BDIGIT >= SIZEOF_LONG */
3166     if (1 < n)
3167         goto return_big;
3168     else
3169         u = ds[0];
3170 #endif
3171
3172     if (BIGNUM_POSITIVE_P(x)) {
3173         if (POSFIXABLE(u)) return LONG2FIX((long)u);
3174     }
3175     else {
3176         if (u <= -FIXNUM_MIN) return LONG2FIX(-(long)u);
3177     }
3178
3179   return_big:
3180     rb_big_resize(x, n);
3181     return x;
3182 }
3183
3184 static VALUE
3185 bignorm(VALUE x)
3186 {
3187     if (RB_BIGNUM_TYPE_P(x)) {
3188         x = bigfixize(x);
3189     }
3190     return x;
3191 }
3192
3193 VALUE
3194 rb_big_norm(VALUE x)
3195 {
3196     return bignorm(x);
3197 }
3198
3199 VALUE
3200 rb_uint2big(uintptr_t n)
3201 {
3202     long i;
3203     VALUE big = bignew(bdigit_roomof(SIZEOF_VALUE), 1);
3204     BDIGIT *digits = BDIGITS(big);
3205
3206 #if SIZEOF_BDIGIT >= SIZEOF_VALUE
3207     digits[0] = n;
3208 #else
3209     for (i = 0; i < bdigit_roomof(SIZEOF_VALUE); i++) {
3210         digits[i] = BIGLO(n);
3211         n = BIGDN(n);
3212     }
3213 #endif
3214
3215     i = bdigit_roomof(SIZEOF_VALUE);
3216     while (--i && !digits[i]) ;
3217     BIGNUM_SET_LEN(big, i+1);
3218     return big;
3219 }
3220
3221 VALUE
3222 rb_int2big(intptr_t n)
3223 {
3224     long neg = 0;
3225     VALUE u;
3226     VALUE big;
3227
3228     if (n < 0) {
3229         u = 1 + (VALUE)(-(n + 1)); /* u = -n avoiding overflow */
3230         neg = 1;
3231     }
3232     else {
3233         u = n;
3234     }
3235     big = rb_uint2big(u);
3236     if (neg) {
3237         BIGNUM_SET_NEGATIVE_SIGN(big);
3238     }
3239     return big;
3240 }
3241
3242 VALUE
3243 rb_uint2inum(uintptr_t n)
3244 {
3245     if (POSFIXABLE(n)) return LONG2FIX(n);
3246     return rb_uint2big(n);
3247 }
3248
3249 VALUE
3250 rb_int2inum(intptr_t n)
3251 {
3252     if (FIXABLE(n)) return LONG2FIX(n);
3253     return rb_int2big(n);
3254 }
3255
3256 void
3257 rb_big_pack(VALUE val, unsigned long *buf, long num_longs)
3258 {
3259     rb_integer_pack(val, buf, num_longs, sizeof(long), 0,
3260             INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER|
3261             INTEGER_PACK_2COMP);
3262 }
3263
3264 VALUE
3265 rb_big_unpack(unsigned long *buf, long num_longs)
3266 {
3267     return rb_integer_unpack(buf, num_longs, sizeof(long), 0,
3268             INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER|
3269             INTEGER_PACK_2COMP);
3270 }
3271
3272 /*
3273  * Calculate the number of bytes to be required to represent
3274  * the absolute value of the integer given as _val_.
3275  *
3276  * [val] an integer.
3277  * [nlz_bits_ret] number of leading zero bits in the most significant byte is returned if not NULL.
3278  *
3279  * This function returns ((val_numbits * CHAR_BIT + CHAR_BIT - 1) / CHAR_BIT)
3280  * where val_numbits is the number of bits of abs(val).
3281  * This function should not overflow.
3282  *
3283  * If nlz_bits_ret is not NULL,
3284  * (return_value * CHAR_BIT - val_numbits) is stored in *nlz_bits_ret.
3285  * In this case, 0 <= *nlz_bits_ret < CHAR_BIT.
3286  *
3287  */
3288 size_t
3289 rb_absint_size(VALUE val, int *nlz_bits_ret)
3290 {
3291     BDIGIT *dp;
3292     BDIGIT *de;
3293     BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
3294
3295     int num_leading_zeros;
3296
3297     val = rb_to_int(val);
3298
3299     if (FIXNUM_P(val)) {
3300         long v = FIX2LONG(val);
3301         if (v < 0) {
3302             v = -v;
3303         }
3304 #if SIZEOF_BDIGIT >= SIZEOF_LONG
3305         fixbuf[0] = v;
3306 #else
3307         {
3308             int i;
3309             for (i = 0; i < numberof(fixbuf); i++) {
3310                 fixbuf[i] = BIGLO(v);
3311                 v = BIGDN(v);
3312             }
3313         }
3314 #endif
3315         dp = fixbuf;
3316         de = fixbuf + numberof(fixbuf);
3317     }
3318     else {
3319         dp = BDIGITS(val);
3320         de = dp + BIGNUM_LEN(val);
3321     }
3322     while (dp < de && de[-1] == 0)
3323         de--;
3324     if (dp == de) {
3325         if (nlz_bits_ret)
3326             *nlz_bits_ret = 0;
3327         return 0;
3328     }
3329     num_leading_zeros = nlz(de[-1]);
3330     if (nlz_bits_ret)
3331         *nlz_bits_ret = num_leading_zeros % CHAR_BIT;
3332     return (de - dp) * SIZEOF_BDIGIT - num_leading_zeros / CHAR_BIT;
3333 }
3334
3335 static size_t
3336 absint_numwords_small(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret)
3337 {
3338     size_t val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte;
3339     size_t div = val_numbits / word_numbits;
3340     size_t mod = val_numbits % word_numbits;
3341     size_t numwords;
3342     size_t nlz_bits;
3343     numwords = mod == 0 ? div : div + 1;
3344     nlz_bits = mod == 0 ? 0 : word_numbits - mod;
3345     *nlz_bits_ret = nlz_bits;
3346     return numwords;
3347 }
3348
3349 static size_t
3350 absint_numwords_generic(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret)
3351 {
3352     static const BDIGIT char_bit[1] = { CHAR_BIT };
3353     BDIGIT numbytes_bary[bdigit_roomof(sizeof(numbytes))];
3354     BDIGIT val_numbits_bary[bdigit_roomof(sizeof(numbytes) + 1)];
3355     BDIGIT nlz_bits_in_msbyte_bary[1];
3356     BDIGIT word_numbits_bary[bdigit_roomof(sizeof(word_numbits))];
3357     BDIGIT div_bary[numberof(val_numbits_bary) + BIGDIVREM_EXTRA_WORDS];
3358     BDIGIT mod_bary[numberof(word_numbits_bary)];
3359     BDIGIT one[1] = { 1 };
3360     size_t nlz_bits;
3361     size_t mod;
3362     int sign;
3363     size_t numwords;
3364
3365     nlz_bits_in_msbyte_bary[0] = nlz_bits_in_msbyte;
3366
3367     /*
3368      * val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte
3369      * div, mod = val_numbits.divmod(word_numbits)
3370      * numwords = mod == 0 ? div : div + 1
3371      * nlz_bits = mod == 0 ? 0 : word_numbits - mod
3372      */
3373
3374     bary_unpack(BARY_ARGS(numbytes_bary), &numbytes, 1, sizeof(numbytes), 0,
3375         INTEGER_PACK_NATIVE_BYTE_ORDER);
3376     BARY_SHORT_MUL(val_numbits_bary, numbytes_bary, char_bit);
3377     if (nlz_bits_in_msbyte)
3378         BARY_SUB(val_numbits_bary, val_numbits_bary, nlz_bits_in_msbyte_bary);
3379     bary_unpack(BARY_ARGS(word_numbits_bary), &word_numbits, 1, sizeof(word_numbits), 0,
3380         INTEGER_PACK_NATIVE_BYTE_ORDER);
3381     BARY_DIVMOD(div_bary, mod_bary, val_numbits_bary, word_numbits_bary);
3382     if (BARY_ZERO_P(mod_bary)) {
3383         nlz_bits = 0;
3384     }
3385     else {
3386         BARY_ADD(div_bary, div_bary, one);
3387         bary_pack(+1, BARY_ARGS(mod_bary), &mod, 1, sizeof(mod), 0,
3388             INTEGER_PACK_NATIVE_BYTE_ORDER);
3389         nlz_bits = word_numbits - mod;
3390     }
3391     sign = bary_pack(+1, BARY_ARGS(div_bary), &numwords, 1, sizeof(numwords), 0,
3392         INTEGER_PACK_NATIVE_BYTE_ORDER);
3393
3394     if (sign == 2) {
3395 #if defined __GNUC__ && (__GNUC__ == 4 && __GNUC_MINOR__ == 4)
3396         *nlz_bits_ret = 0;
3397 #endif
3398         return (size_t)-1;
3399     }
3400     *nlz_bits_ret = nlz_bits;
3401     return numwords;
3402 }
3403
3404 /*
3405  * Calculate the number of words to be required to represent
3406  * the absolute value of the integer given as _val_.
3407  *
3408  * [val] an integer.
3409  * [word_numbits] number of bits in a word.
3410  * [nlz_bits_ret] number of leading zero bits in the most significant word is returned if not NULL.
3411  *
3412  * This function returns ((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)
3413  * where val_numbits is the number of bits of abs(val).
3414  *
3415  * This function can overflow.
3416  * When overflow occur, (size_t)-1 is returned.
3417  *
3418  * If nlz_bits_ret is not NULL and overflow is not occur,
3419  * (return_value * word_numbits - val_numbits) is stored in *nlz_bits_ret.
3420  * In this case, 0 <= *nlz_bits_ret < word_numbits.
3421  *
3422  */
3423 size_t
3424 rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
3425 {
3426     size_t numbytes;
3427     int nlz_bits_in_msbyte;
3428     size_t numwords;
3429     size_t nlz_bits = 0;
3430
3431     if (word_numbits == 0)
3432         return (size_t)-1;
3433
3434     numbytes = rb_absint_size(val, &nlz_bits_in_msbyte);
3435
3436     if (numbytes <= SIZE_MAX / CHAR_BIT) {
3437         numwords = absint_numwords_small(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits);
3438         if (debug_integer_pack) {
3439             size_t numwords0, nlz_bits0;
3440             numwords0 = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits0);
3441             RUBY_ASSERT(numwords0 == numwords);
3442             RUBY_ASSERT(nlz_bits0 == nlz_bits);
3443             (void)numwords0;
3444         }
3445     }
3446     else {
3447         numwords = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits);
3448     }
3449     if (numwords == (size_t)-1)
3450         return numwords;
3451
3452     if (nlz_bits_ret)
3453         *nlz_bits_ret = nlz_bits;
3454
3455     return numwords;
3456 }
3457
3458 /* Test abs(val) consists only a bit or not.
3459  *
3460  * Returns 1 if abs(val) == 1 << n for some n >= 0.
3461  * Returns 0 otherwise.
3462  *
3463  * rb_absint_singlebit_p can be used to determine required buffer size
3464  * for rb_integer_pack used with INTEGER_PACK_2COMP (two's complement).
3465  *
3466  * Following example calculates number of bits required to
3467  * represent val in two's complement number, without sign bit.
3468  *
3469  *   size_t size;
3470  *   int neg = FIXNUM_P(val) ? FIX2LONG(val) < 0 : BIGNUM_NEGATIVE_P(val);
3471  *   size = rb_absint_numwords(val, 1, NULL)
3472  *   if (size == (size_t)-1) ...overflow...
3473  *   if (neg && rb_absint_singlebit_p(val))
3474  *     size--;
3475  *
3476  * Following example calculates number of bytes required to
3477  * represent val in two's complement number, with sign bit.
3478  *
3479  *   size_t size;
3480  *   int neg = FIXNUM_P(val) ? FIX2LONG(val) < 0 : BIGNUM_NEGATIVE_P(val);
3481  *   int nlz_bits;
3482  *   size = rb_absint_size(val, &nlz_bits);
3483  *   if (nlz_bits == 0 && !(neg && rb_absint_singlebit_p(val)))
3484  *     size++;
3485  */
3486 int
3487 rb_absint_singlebit_p(VALUE val)
3488 {
3489     BDIGIT *dp;
3490     BDIGIT *de;
3491     BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
3492     BDIGIT d;
3493
3494     val = rb_to_int(val);
3495
3496     if (FIXNUM_P(val)) {
3497         long v = FIX2LONG(val);
3498         if (v < 0) {
3499             v = -v;
3500         }
3501 #if SIZEOF_BDIGIT >= SIZEOF_LONG
3502         fixbuf[0] = v;
3503 #else
3504         {
3505             int i;
3506             for (i = 0; i < numberof(fixbuf); i++) {
3507                 fixbuf[i] = BIGLO(v);
3508                 v = BIGDN(v);
3509             }
3510         }
3511 #endif
3512         dp = fixbuf;
3513         de = fixbuf + numberof(fixbuf);
3514     }
3515     else {
3516         dp = BDIGITS(val);
3517         de = dp + BIGNUM_LEN(val);
3518     }
3519     while (dp < de && de[-1] == 0)
3520         de--;
3521     while (dp < de && dp[0] == 0)
3522         dp++;
3523     if (dp == de) /* no bit set. */
3524         return 0;
3525     if (dp != de-1) /* two non-zero words. two bits set, at least. */
3526         return 0;
3527     d = *dp;
3528     return POW2_P(d);
3529 }
3530
3531
3532 /*
3533  * Export an integer into a buffer.
3534  *
3535  * This function fills the buffer specified by _words_ and _numwords_ as
3536  * val in the format specified by _wordsize_, _nails_ and _flags_.
3537  *
3538  * [val] Fixnum, Bignum or another integer like object which has to_int method.
3539  * [words] buffer to export abs(val).
3540  * [numwords] the size of given buffer as number of words.
3541  * [wordsize] the size of word as number of bytes.
3542  * [nails] number of padding bits in a word.
3543  *   Most significant nails bits of each word are filled by zero.
3544  * [flags] bitwise or of constants which name starts "INTEGER_PACK_".
3545  *
3546  * flags:
3547  * [INTEGER_PACK_MSWORD_FIRST] Store the most significant word as the first word.
3548  * [INTEGER_PACK_LSWORD_FIRST] Store the least significant word as the first word.
3549  * [INTEGER_PACK_MSBYTE_FIRST] Store the most significant byte in a word as the first byte in the word.
3550  * [INTEGER_PACK_LSBYTE_FIRST] Store the least significant byte in a word as the first byte in the word.
3551  * [INTEGER_PACK_NATIVE_BYTE_ORDER] INTEGER_PACK_MSBYTE_FIRST or INTEGER_PACK_LSBYTE_FIRST corresponding to the host's endian.
3552  * [INTEGER_PACK_2COMP] Use 2's complement representation.
3553  * [INTEGER_PACK_LITTLE_ENDIAN] Same as INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST
3554  * [INTEGER_PACK_BIG_ENDIAN] Same as INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST
3555  * [INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION] Use generic implementation (for test and debug).
3556  *
3557  * This function fills the buffer specified by _words_
3558  * as abs(val) if INTEGER_PACK_2COMP is not specified in _flags_.
3559  * If INTEGER_PACK_2COMP is specified, 2's complement representation of val is
3560  * filled in the buffer.
3561  *
3562  * This function returns the signedness and overflow condition.
3563  * The overflow condition depends on INTEGER_PACK_2COMP.
3564  *
3565  * INTEGER_PACK_2COMP is not specified:
3566  *   -2 : negative overflow.  val <= -2**(numwords*(wordsize*CHAR_BIT-nails))
3567  *   -1 : negative without overflow.  -2**(numwords*(wordsize*CHAR_BIT-nails)) < val < 0
3568  *   0 : zero.  val == 0
3569  *   1 : positive without overflow.  0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))
3570  *   2 : positive overflow.  2**(numwords*(wordsize*CHAR_BIT-nails)) <= val
3571  *
3572  * INTEGER_PACK_2COMP is specified:
3573  *   -2 : negative overflow.  val < -2**(numwords*(wordsize*CHAR_BIT-nails))
3574  *   -1 : negative without overflow.  -2**(numwords*(wordsize*CHAR_BIT-nails)) <= val < 0
3575  *   0 : zero.  val == 0
3576  *   1 : positive without overflow.  0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))
3577  *   2 : positive overflow.  2**(numwords*(wordsize*CHAR_BIT-nails)) <= val
3578  *
3579  * The value, -2**(numwords*(wordsize*CHAR_BIT-nails)), is representable
3580  * in 2's complement representation but not representable in absolute value.
3581  * So -1 is returned for the value if INTEGER_PACK_2COMP is specified
3582  * but returns -2 if INTEGER_PACK_2COMP is not specified.
3583  *
3584  * The least significant words are filled in the buffer when overflow occur.
3585  */
3586
3587 int
3588 rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
3589 {
3590     int sign;
3591     BDIGIT *ds;
3592     size_t num_bdigits;
3593     BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
3594
3595     RB_GC_GUARD(val) = rb_to_int(val);
3596
3597     if (FIXNUM_P(val)) {
3598         long v = FIX2LONG(val);
3599         if (v < 0) {
3600             sign = -1;
3601             v = -v;
3602         }
3603         else {
3604             sign = 1;
3605         }
3606 #if SIZEOF_BDIGIT >= SIZEOF_LONG
3607         fixbuf[0] = v;
3608 #else
3609         {
3610             int i;
3611             for (i = 0; i < numberof(fixbuf); i++) {
3612                 fixbuf[i] = BIGLO(v);
3613                 v = BIGDN(v);
3614             }
3615         }
3616 #endif
3617         ds = fixbuf;
3618         num_bdigits = numberof(fixbuf);
3619     }
3620     else {
3621         sign = BIGNUM_POSITIVE_P(val) ? 1 : -1;
3622         ds = BDIGITS(val);
3623         num_bdigits = BIGNUM_LEN(val);
3624     }
3625
3626     return bary_pack(sign, ds, num_bdigits, words, numwords, wordsize, nails, flags);
3627 }
3628
3629 /*
3630  * Import an integer from a buffer.
3631  *
3632  * [words] buffer to import.
3633  * [numwords] the size of given buffer as number of words.
3634  * [wordsize] the size of word as number of bytes.
3635  * [nails] number of padding bits in a word.
3636  *   Most significant nails bits of each word are ignored.
3637  * [flags] bitwise or of constants which name starts "INTEGER_PACK_".
3638  *
3639  * flags:
3640  * [INTEGER_PACK_MSWORD_FIRST] Interpret the first word as the most significant word.
3641  * [INTEGER_PACK_LSWORD_FIRST] Interpret the first word as the least significant word.
3642  * [INTEGER_PACK_MSBYTE_FIRST] Interpret the first byte in a word as the most significant byte in the word.
3643  * [INTEGER_PACK_LSBYTE_FIRST] Interpret the first byte in a word as the least significant byte in the word.
3644  * [INTEGER_PACK_NATIVE_BYTE_ORDER] INTEGER_PACK_MSBYTE_FIRST or INTEGER_PACK_LSBYTE_FIRST corresponding to the host's endian.
3645  * [INTEGER_PACK_2COMP] Use 2's complement representation.
3646  * [INTEGER_PACK_LITTLE_ENDIAN] Same as INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST
3647  * [INTEGER_PACK_BIG_ENDIAN] Same as INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST
3648  * [INTEGER_PACK_FORCE_BIGNUM] the result will be a Bignum
3649  *   even if it is representable as a Fixnum.
3650  * [INTEGER_PACK_NEGATIVE] Returns non-positive value.
3651  *   (Returns non-negative value if not specified.)
3652  * [INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION] Use generic implementation (for test and debug).
3653  *
3654  * This function returns the imported integer as Fixnum or Bignum.
3655  *
3656  * The range of the result value depends on INTEGER_PACK_2COMP and INTEGER_PACK_NEGATIVE.
3657  *
3658  * INTEGER_PACK_2COMP is not set:
3659  *   0 <= val < 2**(numwords*(wordsize*CHAR_BIT-nails)) if !INTEGER_PACK_NEGATIVE
3660  *   -2**(numwords*(wordsize*CHAR_BIT-nails)) < val <= 0 if INTEGER_PACK_NEGATIVE
3661  *
3662  * INTEGER_PACK_2COMP is set:
3663  *   -2**(numwords*(wordsize*CHAR_BIT-nails)-1) <= val <= 2**(numwords*(wordsize*CHAR_BIT-nails)-1)-1 if !INTEGER_PACK_NEGATIVE
3664  *   -2**(numwords*(wordsize*CHAR_BIT-nails)) <= val <= -1 if INTEGER_PACK_NEGATIVE
3665  *
3666  * INTEGER_PACK_2COMP without INTEGER_PACK_NEGATIVE means sign extension.
3667  * INTEGER_PACK_2COMP with INTEGER_PACK_NEGATIVE mean assuming the higher bits are 1.
3668  *
3669  * Note that this function returns 0 when numwords is zero and
3670  * INTEGER_PACK_2COMP is set but INTEGER_PACK_NEGATIVE is not set.
3671  */
3672
3673 VALUE
3674 rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
3675 {
3676     VALUE val;
3677     size_t num_bdigits;
3678     int sign;
3679     int nlp_bits;
3680     BDIGIT *ds;
3681     BDIGIT fixbuf[2] = { 0, 0 };
3682
3683     validate_integer_pack_format(numwords, wordsize, nails, flags,
3684             INTEGER_PACK_MSWORD_FIRST|
3685             INTEGER_PACK_LSWORD_FIRST|
3686             INTEGER_PACK_MSBYTE_FIRST|
3687             INTEGER_PACK_LSBYTE_FIRST|
3688             INTEGER_PACK_NATIVE_BYTE_ORDER|
3689             INTEGER_PACK_2COMP|
3690             INTEGER_PACK_FORCE_BIGNUM|
3691             INTEGER_PACK_NEGATIVE|
3692             INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
3693
3694     num_bdigits = integer_unpack_num_bdigits(numwords, wordsize, nails, &nlp_bits);
3695
3696     if (LONG_MAX-1 < num_bdigits)
3697         rb_raise(rb_eArgError, "too big to unpack as an integer");
3698     if (num_bdigits <= numberof(fixbuf) && !(flags & INTEGER_PACK_FORCE_BIGNUM)) {
3699         val = Qfalse;
3700         ds = fixbuf;
3701     }
3702     else {
3703         val = bignew((long)num_bdigits, 0);
3704         ds = BDIGITS(val);
3705     }
3706     sign = bary_unpack_internal(ds, num_bdigits, words, numwords, wordsize, nails, flags, nlp_bits);
3707
3708     if (sign == -2) {
3709         if (val) {
3710             big_extend_carry(val);
3711         }
3712         else if (num_bdigits == numberof(fixbuf)) {
3713             val = bignew((long)num_bdigits+1, 0);
3714             MEMCPY(BDIGITS(val), fixbuf, BDIGIT, num_bdigits);
3715             BDIGITS(val)[num_bdigits++] = 1;
3716         }
3717         else {
3718             ds[num_bdigits++] = 1;
3719         }
3720     }
3721
3722     if (!val) {
3723         BDIGIT_DBL u = fixbuf[0] + BIGUP(fixbuf[1]);
3724         if (u == 0)
3725             return LONG2FIX(0);
3726         if (0 < sign && POSFIXABLE(u))
3727             return LONG2FIX((long)u);
3728         if (sign < 0 && BDIGIT_MSB(fixbuf[1]) == 0 &&
3729                 NEGFIXABLE(-(BDIGIT_DBL_SIGNED)u))
3730             return LONG2FIX((long)-(BDIGIT_DBL_SIGNED)u);
3731         val = bignew((long)num_bdigits, 0 <= sign);
3732         MEMCPY(BDIGITS(val), fixbuf, BDIGIT, num_bdigits);
3733     }
3734
3735     if ((flags & INTEGER_PACK_FORCE_BIGNUM) && sign != 0 &&
3736         bary_zero_p(BDIGITS(val), BIGNUM_LEN(val)))
3737         sign = 0;
3738     BIGNUM_SET_SIGN(val, 0 <= sign);
3739
3740     if (flags & INTEGER_PACK_FORCE_BIGNUM)
3741         return bigtrunc(val);
3742     return bignorm(val);
3743 }
3744
3745 #define conv_digit(c) (ruby_digit36_to_number_table[(unsigned char)(c)])
3746
3747 NORETURN(static inline void invalid_radix(int base));
3748 NORETURN(static inline void invalid_integer(VALUE s));
3749
3750 static inline int
3751 valid_radix_p(int base)
3752 {
3753     return (1 < base && base <= 36);
3754 }
3755
3756 static inline void
3757 invalid_radix(int base)
3758 {
3759     rb_raise(rb_eArgError, "invalid radix %d", base);
3760 }
3761
3762 static inline void
3763 invalid_integer(VALUE s)
3764 {
3765     rb_raise(rb_eArgError, "invalid value for Integer(): %+"PRIsVALUE, s);
3766 }
3767
3768 static int
3769 str2big_scan_digits(const char *s, const char *str, int base, int badcheck, size_t *num_digits_p, ssize_t *len_p)
3770 {
3771     char nondigit = 0;
3772     size_t num_digits = 0;
3773     const char *digits_start = str;
3774     const char *digits_end = str;
3775     ssize_t len = *len_p;
3776
3777     int c;
3778
3779     if (!len) {
3780         *num_digits_p = 0;
3781         *len_p = 0;
3782         return TRUE;
3783     }
3784
3785     if (badcheck && *str == '_') return FALSE;
3786
3787     while ((c = *str++) != 0) {
3788         if (c == '_') {
3789             if (nondigit) {
3790                 if (badcheck) return FALSE;
3791                 break;
3792             }
3793             nondigit = (char) c;
3794         }
3795         else if ((c = conv_digit(c)) < 0 || c >= base) {
3796             break;
3797         }
3798         else {
3799             nondigit = 0;
3800             num_digits++;
3801             digits_end = str;
3802         }
3803         if (len > 0 && !--len) break;
3804     }
3805     if (badcheck && nondigit) return FALSE;
3806     if (badcheck && len) {
3807         str--;
3808         while (*str && ISSPACE(*str)) {
3809             str++;
3810             if (len > 0 && !--len) break;
3811         }
3812         if (len && *str) {
3813             return FALSE;
3814         }
3815     }
3816     *num_digits_p = num_digits;
3817     *len_p = digits_end - digits_start;
3818     return TRUE;
3819 }
3820
3821 static VALUE
3822 str2big_poweroftwo(
3823     int sign,
3824     const char *digits_start,
3825     const char *digits_end,
3826     size_t num_digits,
3827     int bits_per_digit)
3828 {
3829     BDIGIT *dp;
3830     BDIGIT_DBL dd;
3831     int numbits;
3832
3833     size_t num_bdigits;
3834     const char *p;
3835     int c;
3836     VALUE z;
3837
3838     num_bdigits = (num_digits / BITSPERDIG) * bits_per_digit + roomof((num_digits % BITSPERDIG) * bits_per_digit, BITSPERDIG);
3839     z = bignew(num_bdigits, sign);
3840     dp = BDIGITS(z);
3841     dd = 0;
3842     numbits = 0;
3843     for (p = digits_end; digits_start < p; p--) {
3844         if ((c = conv_digit(p[-1])) < 0)
3845             continue;
3846         dd |= (BDIGIT_DBL)c << numbits;
3847         numbits += bits_per_digit;
3848         if (BITSPERDIG <= numbits) {
3849             *dp++ = BIGLO(dd);
3850             dd = BIGDN(dd);
3851             numbits -= BITSPERDIG;
3852         }
3853     }
3854     if (numbits) {
3855         *dp++ = BIGLO(dd);
3856     }
3857     RUBY_ASSERT((size_t)(dp - BDIGITS(z)) == num_bdigits);
3858
3859     return z;
3860 }
3861
3862 static VALUE
3863 str2big_normal(
3864     int sign,
3865     const char *digits_start,
3866     const char *digits_end,
3867     size_t num_bdigits,
3868     int base)
3869 {
3870     size_t blen = 1;
3871     BDIGIT *zds;
3872     BDIGIT_DBL num;
3873
3874     size_t i;
3875     const char *p;
3876     int c;
3877     VALUE z;
3878
3879     z = bignew(num_bdigits, sign);
3880     zds = BDIGITS(z);
3881     BDIGITS_ZERO(zds, num_bdigits);
3882
3883     for (p = digits_start; p < digits_end; p++) {
3884         if ((c = conv_digit(*p)) < 0)
3885             continue;
3886         num = c;
3887         i = 0;
3888         for (;;) {
3889             while (i<blen) {
3890                 num += (BDIGIT_DBL)zds[i]*base;
3891                 zds[i++] = BIGLO(num);
3892                 num = BIGDN(num);
3893             }
3894             if (num) {
3895                 blen++;
3896                 continue;
3897             }
3898             break;
3899         }
3900         RUBY_ASSERT(blen <= num_bdigits);
3901     }
3902
3903     return z;
3904 }
3905
3906 static VALUE
3907 str2big_karatsuba(
3908     int sign,
3909     const char *digits_start,
3910     const char *digits_end,
3911     size_t num_digits,
3912     size_t num_bdigits,
3913     int digits_per_bdigits_dbl,
3914     int base)
3915 {
3916     VALUE powerv;
3917     size_t unit;
3918     VALUE tmpuv = 0;
3919     BDIGIT *uds, *vds, *tds;
3920     BDIGIT_DBL dd;
3921     BDIGIT_DBL current_base;
3922     int m;
3923     int power_level = 0;
3924
3925     size_t i;
3926     const char *p;
3927     int c;
3928     VALUE z;
3929
3930     uds = ALLOCV_N(BDIGIT, tmpuv, 2*num_bdigits);
3931     vds = uds + num_bdigits;
3932
3933     powerv = power_cache_get_power(base, power_level, NULL);
3934
3935     i = 0;
3936     dd = 0;
3937     current_base = 1;
3938     m = digits_per_bdigits_dbl;
3939     if (num_digits < (size_t)m)
3940         m = (int)num_digits;
3941     for (p = digits_end; digits_start < p; p--) {
3942         if ((c = conv_digit(p[-1])) < 0)
3943             continue;
3944         dd = dd + c * current_base;
3945         current_base *= base;
3946         num_digits--;
3947         m--;
3948         if (m == 0) {
3949             uds[i++] = BIGLO(dd);
3950             uds[i++] = (BDIGIT)BIGDN(dd);
3951             dd = 0;
3952             m = digits_per_bdigits_dbl;
3953             if (num_digits < (size_t)m)
3954                 m = (int)num_digits;
3955             current_base = 1;
3956         }
3957     }
3958     RUBY_ASSERT(i == num_bdigits);
3959     for (unit = 2; unit < num_bdigits; unit *= 2) {
3960         for (i = 0; i < num_bdigits; i += unit*2) {
3961             if (2*unit <= num_bdigits - i) {
3962                 bary_mul(vds+i, unit*2, BDIGITS(powerv), BIGNUM_LEN(powerv), uds+i+unit, unit);
3963                 bary_add(vds+i, unit*2, vds+i, unit*2, uds+i, unit);
3964             }
3965             else if (unit <= num_bdigits - i) {
3966                 bary_mul(vds+i, num_bdigits-i, BDIGITS(powerv), BIGNUM_LEN(powerv), uds+i+unit, num_bdigits-(i+unit));
3967                 bary_add(vds+i, num_bdigits-i, vds+i, num_bdigits-i, uds+i, unit);
3968             }
3969             else {
3970                 MEMCPY(vds+i, uds+i, BDIGIT, num_bdigits-i);
3971             }
3972         }
3973         power_level++;
3974         powerv = power_cache_get_power(base, power_level, NULL);
3975         tds = vds;
3976         vds = uds;
3977         uds = tds;
3978     }
3979     BARY_TRUNC(uds, num_bdigits);
3980     z = bignew(num_bdigits, sign);
3981     MEMCPY(BDIGITS(z), uds, BDIGIT, num_bdigits);
3982
3983     if (tmpuv)
3984         ALLOCV_END(tmpuv);
3985
3986     return z;
3987 }
3988
3989 #if USE_GMP
3990 static VALUE
3991 str2big_gmp(
3992     int sign,
3993     const char *digits_start,
3994     const char *digits_end,
3995     size_t num_digits,
3996     size_t num_bdigits,
3997     int base)
3998 {
3999     char *buf, *p;
4000     const char *q;
4001     VALUE tmps;
4002     mpz_t mz;
4003     VALUE z;
4004     BDIGIT *zds;
4005     size_t zn, count;
4006
4007     buf = ALLOCV_N(char, tmps, num_digits+1);
4008     p = buf;
4009     for (q = digits_start; q < digits_end; q++) {
4010         if (conv_digit(*q) < 0)
4011             continue;
4012         *p++ = *q;
4013     }
4014     *p = '\0';
4015
4016     mpz_init(mz);
4017     mpz_set_str(mz, buf, base);
4018     zn = num_bdigits;
4019     z = bignew(zn, sign);
4020     zds = BDIGITS(z);
4021     bdigits_from_mpz(mz, BDIGITS(z), &count);
4022     BDIGITS_ZERO(zds+count, zn-count);
4023     mpz_clear(mz);
4024
4025     if (tmps)
4026         ALLOCV_END(tmps);
4027
4028     return z;
4029 }
4030 #endif
4031
4032 static VALUE rb_cstr_parse_inum(const char *str, ssize_t len, char **endp, int base);
4033
4034 /*
4035  * Parse +str+ as Ruby Integer, i.e., underscores, 0d and 0b prefixes.
4036  *
4037  * str:      pointer to the string to be parsed.
4038  *           should be NUL-terminated.
4039  * base:     base of conversion, must be 2..36, or -36..0.
4040  *           if +base+ > 0, the conversion is done according to the +base+
4041  *           and unmatched prefix is parsed as a part of the result if
4042  *           present.
4043  *           if +base+ <= 0, the conversion is done according to the
4044  *           prefix if present, in base <code>-base</code> if +base+ < -1,
4045  *           or in base 10.
4046  * badcheck: if non-zero, +ArgumentError+ is raised when +str+ is not
4047  *           valid as an Integer.  if zero, Fixnum 0 is returned in
4048  *           that case.
4049  */
4050 VALUE
4051 rb_cstr_to_inum(const char *str, int base, int badcheck)
4052 {
4053     char *end;
4054     VALUE ret = rb_cstr_parse_inum(str, -1, (badcheck ? NULL : &end), base);
4055     if (NIL_P(ret)) {
4056         if (badcheck) rb_invalid_str(str, "Integer()");
4057         ret = INT2FIX(0);
4058     }
4059     return ret;
4060 }
4061
4062 /*
4063  * Parse +str+ as Ruby Integer, i.e., underscores, 0d and 0b prefixes.
4064  *
4065  * str:  pointer to the string to be parsed.
4066  *       should be NUL-terminated if +len+ is negative.
4067  * len:  length of +str+ if >= 0.  if +len+ is negative, +str+ should
4068  *       be NUL-terminated.
4069  * endp: if non-NULL, the address after parsed part is stored.  if
4070  *       NULL, Qnil is returned when +str+ is not valid as an Integer.
4071  * ndigits: if non-NULL, the number of parsed digits is stored.
4072  * base: see +rb_cstr_to_inum+
4073  * flags: bitwise OR of below flags:
4074  *       RB_INT_PARSE_SIGN: allow preceding spaces and +/- sign
4075  *       RB_INT_PARSE_UNDERSCORE: allow an underscore between digits
4076  *       RB_INT_PARSE_PREFIX: allow preceding prefix
4077  */
4078
4079 VALUE
4080 rb_int_parse_cstr(const char *str, ssize_t len, char **endp, size_t *ndigits,
4081                   int base, int flags)
4082 {
4083     const char *const s = str;
4084     char sign = 1;
4085     int c;
4086     VALUE z = Qnil;
4087
4088     unsigned long val;
4089     int ov;
4090
4091     const char *digits_start, *digits_end;
4092     size_t num_digits = 0;
4093     size_t num_bdigits;
4094     const ssize_t len0 = len;
4095     const int badcheck = !endp;
4096
4097 #define ADV(n) do {\
4098         if (len > 0 && len <= (n)) goto bad; \
4099         str += (n); \
4100         len -= (n); \
4101     } while (0)
4102 #define ASSERT_LEN() do {\
4103         RUBY_ASSERT(len != 0); \
4104         if (len0 >= 0) RUBY_ASSERT(s + len0 == str + len); \
4105     } while (0)
4106
4107     if (!str) {
4108         goto bad;
4109     }
4110     if (len && (flags & RB_INT_PARSE_SIGN)) {
4111         while (ISSPACE(*str)) ADV(1);
4112
4113         if (str[0] == '+') {
4114             ADV(1);
4115         }
4116         else if (str[0] == '-') {
4117             ADV(1);
4118             sign = 0;
4119         }
4120         ASSERT_LEN();
4121     }
4122     if (base <= 0) {
4123         if (str[0] == '0' && len > 1) {
4124             switch (str[1]) {
4125               case 'x': case 'X':
4126                 base = 16;
4127                 ADV(2);
4128                 break;
4129               case 'b': case 'B':
4130                 base = 2;
4131                 ADV(2);
4132                 break;
4133               case 'o': case 'O':
4134                 base = 8;
4135                 ADV(2);
4136                 break;
4137               case 'd': case 'D':
4138                 base = 10;
4139                 ADV(2);
4140                 break;
4141               default:
4142                 base = 8;
4143             }
4144         }
4145         else if (base < -1) {
4146             base = -base;
4147         }
4148         else {
4149             base = 10;
4150         }
4151     }
4152     else if (len == 1 || !(flags & RB_INT_PARSE_PREFIX)) {
4153         /* no prefix */
4154     }
4155     else if (base == 2) {
4156         if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
4157             ADV(2);
4158         }
4159     }
4160     else if (base == 8) {
4161         if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
4162             ADV(2);
4163         }
4164     }
4165     else if (base == 10) {
4166         if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
4167             ADV(2);
4168         }
4169     }
4170     else if (base == 16) {
4171         if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
4172             ADV(2);
4173         }
4174     }
4175     if (!valid_radix_p(base)) {
4176         invalid_radix(base);
4177     }
4178     if (!len) goto bad;
4179     num_digits = str - s;
4180     if (*str == '0' && len != 1) { /* squeeze preceding 0s */
4181         int us = 0;
4182         const char *end = len < 0 ? NULL : str + len;
4183         ++num_digits;
4184         while ((c = *++str) == '0' ||
4185                ((flags & RB_INT_PARSE_UNDERSCORE) && c == '_')) {
4186             if (c == '_') {
4187                 if (++us >= 2)
4188                     break;
4189             }
4190             else {
4191                 ++num_digits;
4192                 us = 0;
4193             }
4194             if (str == end) break;
4195         }
4196         if (!c || ISSPACE(c)) --str;
4197         if (end) len = end - str;
4198     }
4199     c = *str;
4200     c = conv_digit(c);
4201     if (c < 0 || c >= base) {
4202         if (!badcheck && num_digits) z = INT2FIX(0);
4203         goto bad;
4204     }
4205
4206     if (ndigits) *ndigits = num_digits;
4207     val = ruby_scan_digits(str, len, base, &num_digits, &ov);
4208     if (!ov) {
4209         const char *end = &str[num_digits];
4210         if (num_digits > 0 && *end == '_' && (flags & RB_INT_PARSE_UNDERSCORE))
4211             goto bigparse;
4212         if (endp) *endp = (char *)end;
4213         if (ndigits) *ndigits += num_digits;
4214         if (badcheck) {
4215             if (num_digits == 0) return Qnil; /* no number */
4216             while (len < 0 ? *end : end < str + len) {
4217                 if (!ISSPACE(*end)) return Qnil; /* trailing garbage */
4218                 end++;
4219             }
4220         }
4221
4222         if (POSFIXABLE(val)) {
4223             if (sign) return LONG2FIX(val);
4224             else {
4225                 long result = -(long)val;
4226                 return LONG2FIX(result);
4227             }
4228         }
4229         else {
4230             VALUE big = rb_uint2big(val);
4231             BIGNUM_SET_SIGN(big, sign);
4232             return bignorm(big);
4233         }
4234     }
4235
4236   bigparse:
4237     digits_start = str;
4238     if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
4239         goto bad;
4240     if (endp) *endp = (char *)(str + len);
4241     if (ndigits) *ndigits += num_digits;
4242     digits_end = digits_start + len;
4243
4244     if (POW2_P(base)) {
4245         z = str2big_poweroftwo(sign, digits_start, digits_end, num_digits,
4246                                bit_length(base-1));
4247     }
4248     else {
4249         int digits_per_bdigits_dbl;
4250         maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
4251         num_bdigits = roomof(num_digits, digits_per_bdigits_dbl)*2;
4252
4253 #if USE_GMP
4254         if (GMP_STR2BIG_DIGITS < num_bdigits) {
4255             z = str2big_gmp(sign, digits_start, digits_end, num_digits,
4256                     num_bdigits, base);
4257         }
4258         else
4259 #endif
4260         if (num_bdigits < KARATSUBA_MUL_DIGITS) {
4261             z = str2big_normal(sign, digits_start, digits_end,
4262                     num_bdigits, base);
4263         }
4264         else {
4265             z = str2big_karatsuba(sign, digits_start, digits_end, num_digits,
4266                     num_bdigits, digits_per_bdigits_dbl, base);
4267         }
4268     }
4269
4270     return bignorm(z);
4271
4272   bad:
4273     if (endp) *endp = (char *)str;
4274     if (ndigits) *ndigits = num_digits;
4275     return z;
4276 }
4277
4278 static VALUE
4279 rb_cstr_parse_inum(const char *str, ssize_t len, char **endp, int base)
4280 {
4281     return rb_int_parse_cstr(str, len, endp, NULL, base,
4282                              RB_INT_PARSE_DEFAULT);
4283 }
4284
4285 VALUE
4286 rb_str_convert_to_inum(VALUE str, int base, int badcheck, int raise_exception)
4287 {
4288     VALUE ret;
4289     const char *s;
4290     long len;
4291     char *end;
4292
4293     StringValue(str);
4294     rb_must_asciicompat(str);
4295     RSTRING_GETMEM(str, s, len);
4296     ret = rb_cstr_parse_inum(s, len, (badcheck ? NULL : &end), base);
4297     if (NIL_P(ret)) {
4298         if (badcheck) {
4299             if (!raise_exception) return Qnil;
4300             invalid_integer(str);
4301         }
4302         ret = INT2FIX(0);
4303     }
4304     return ret;
4305 }
4306
4307 VALUE
4308 rb_str_to_inum(VALUE str, int base, int badcheck)
4309 {
4310     return rb_str_convert_to_inum(str, base, badcheck, TRUE);
4311 }
4312
4313 VALUE
4314 rb_str2big_poweroftwo(VALUE arg, int base, int badcheck)
4315 {
4316     int positive_p = 1;
4317     const char *s, *str;
4318     const char *digits_start, *digits_end;
4319     size_t num_digits;
4320     ssize_t len;
4321     VALUE z;
4322
4323     if (!valid_radix_p(base) || !POW2_P(base)) {
4324         invalid_radix(base);
4325     }
4326
4327     rb_must_asciicompat(arg);
4328     s = str = StringValueCStr(arg);
4329     len = RSTRING_LEN(arg);
4330     if (*str == '-') {
4331         len--;
4332         str++;
4333         positive_p = 0;
4334     }
4335
4336     digits_start = str;
4337     if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
4338         invalid_integer(arg);
4339     digits_end = digits_start + len;
4340
4341     z = str2big_poweroftwo(positive_p, digits_start, digits_end, num_digits,
4342             bit_length(base-1));
4343
4344     RB_GC_GUARD(arg);
4345
4346     return bignorm(z);
4347 }
4348
4349 VALUE
4350 rb_str2big_normal(VALUE arg, int base, int badcheck)
4351 {
4352     int positive_p = 1;
4353     const char *s, *str;
4354     const char *digits_start, *digits_end;
4355     size_t num_digits;
4356     ssize_t len;
4357     VALUE z;
4358
4359     int digits_per_bdigits_dbl;
4360     size_t num_bdigits;
4361
4362     if (!valid_radix_p(base)) {
4363         invalid_radix(base);
4364     }
4365
4366     rb_must_asciicompat(arg);
4367     s = str = StringValuePtr(arg);
4368     len = RSTRING_LEN(arg);
4369     if (len > 0 && *str == '-') {
4370         len--;
4371         str++;
4372         positive_p = 0;
4373     }
4374
4375     digits_start = str;
4376     if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
4377         invalid_integer(arg);
4378     digits_end = digits_start + len;
4379
4380     maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
4381     num_bdigits = roomof(num_digits, digits_per_bdigits_dbl)*2;
4382
4383     z = str2big_normal(positive_p, digits_start, digits_end,
4384             num_bdigits, base);
4385
4386     RB_GC_GUARD(arg);
4387
4388     return bignorm(z);
4389 }
4390
4391 VALUE
4392 rb_str2big_karatsuba(VALUE arg, int base, int badcheck)
4393 {
4394     int positive_p = 1;
4395     const char *s, *str;
4396     const char *digits_start, *digits_end;
4397     size_t num_digits;
4398     ssize_t len;
4399     VALUE z;
4400
4401     int digits_per_bdigits_dbl;
4402     size_t num_bdigits;
4403
4404     if (!valid_radix_p(base)) {
4405         invalid_radix(base);
4406     }
4407
4408     rb_must_asciicompat(arg);
4409     s = str = StringValuePtr(arg);
4410     len = RSTRING_LEN(arg);
4411     if (len > 0 && *str == '-') {
4412         len--;
4413         str++;
4414         positive_p = 0;
4415     }
4416
4417     digits_start = str;
4418     if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
4419         invalid_integer(arg);
4420     digits_end = digits_start + len;
4421
4422     maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
4423     num_bdigits = roomof(num_digits, digits_per_bdigits_dbl)*2;
4424
4425     z = str2big_karatsuba(positive_p, digits_start, digits_end, num_digits,
4426             num_bdigits, digits_per_bdigits_dbl, base);
4427
4428     RB_GC_GUARD(arg);
4429
4430     return bignorm(z);
4431 }
4432
4433 #if USE_GMP
4434 VALUE
4435 rb_str2big_gmp(VALUE arg, int base, int badcheck)
4436 {
4437     int positive_p = 1;
4438     const char *s, *str;
4439     const char *digits_start, *digits_end;
4440     size_t num_digits;
4441     ssize_t len;
4442     VALUE z;
4443
4444     int digits_per_bdigits_dbl;
4445     size_t num_bdigits;
4446
4447     if (!valid_radix_p(base)) {
4448         invalid_radix(base);
4449     }
4450
4451     rb_must_asciicompat(arg);
4452     s = str = StringValuePtr(arg);
4453     len = RSTRING_LEN(arg);
4454     if (len > 0 && *str == '-') {
4455         len--;
4456         str++;
4457         positive_p = 0;
4458     }
4459
4460     digits_start = str;
4461     if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
4462         invalid_integer(arg);
4463     digits_end = digits_start + len;
4464
4465     maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
4466     num_bdigits = roomof(num_digits, digits_per_bdigits_dbl)*2;
4467
4468     z = str2big_gmp(positive_p, digits_start, digits_end, num_digits, num_bdigits, base);
4469
4470     RB_GC_GUARD(arg);
4471
4472     return bignorm(z);
4473 }
4474 #endif
4475
4476 #if HAVE_LONG_LONG
4477
4478 static VALUE
4479 rb_ull2big(unsigned LONG_LONG n)
4480 {
4481     long i;
4482     VALUE big = bignew(bdigit_roomof(SIZEOF_LONG_LONG), 1);
4483     BDIGIT *digits = BDIGITS(big);
4484
4485 #if SIZEOF_BDIGIT >= SIZEOF_LONG_LONG
4486     digits[0] = n;
4487 #else
4488     for (i = 0; i < bdigit_roomof(SIZEOF_LONG_LONG); i++) {
4489         digits[i] = BIGLO(n);
4490         n = BIGDN(n);
4491     }
4492 #endif
4493
4494     i = bdigit_roomof(SIZEOF_LONG_LONG);
4495     while (i-- && !digits[i]) ;
4496     BIGNUM_SET_LEN(big, i+1);
4497     return big;
4498 }
4499
4500 static VALUE
4501 rb_ll2big(LONG_LONG n)
4502 {
4503     long neg = 0;
4504     unsigned LONG_LONG u;
4505     VALUE big;
4506
4507     if (n < 0) {
4508         u = 1 + (unsigned LONG_LONG)(-(n + 1)); /* u = -n avoiding overflow */
4509         neg = 1;
4510     }
4511     else {
4512         u = n;
4513     }
4514     big = rb_ull2big(u);
4515     if (neg) {
4516         BIGNUM_SET_NEGATIVE_SIGN(big);
4517     }
4518     return big;
4519 }
4520
4521 VALUE
4522 rb_ull2inum(unsigned LONG_LONG n)
4523 {
4524     if (POSFIXABLE(n)) return LONG2FIX((long)n);
4525     return rb_ull2big(n);
4526 }
4527
4528 VALUE
4529 rb_ll2inum(LONG_LONG n)
4530 {
4531     if (FIXABLE(n)) return LONG2FIX((long)n);
4532     return rb_ll2big(n);
4533 }
4534
4535 #endif  /* HAVE_LONG_LONG */
4536
4537 #ifdef HAVE_INT128_T
4538 static VALUE
4539 rb_uint128t2big(uint128_t n)
4540 {
4541     long i;
4542     VALUE big = bignew(bdigit_roomof(SIZEOF_INT128_T), 1);
4543     BDIGIT *digits = BDIGITS(big);
4544
4545     for (i = 0; i < bdigit_roomof(SIZEOF_INT128_T); i++) {
4546         digits[i] = BIGLO(RSHIFT(n ,BITSPERDIG*i));
4547     }
4548
4549     i = bdigit_roomof(SIZEOF_INT128_T);
4550     while (i-- && !digits[i]) ;
4551     BIGNUM_SET_LEN(big, i+1);
4552     return big;
4553 }
4554
4555 VALUE
4556 rb_int128t2big(int128_t n)
4557 {
4558     int neg = 0;
4559     uint128_t u;
4560     VALUE big;
4561
4562     if (n < 0) {
4563         u = 1 + (uint128_t)(-(n + 1)); /* u = -n avoiding overflow */
4564         neg = 1;
4565     }
4566     else {
4567         u = n;
4568     }
4569     big = rb_uint128t2big(u);
4570     if (neg) {
4571         BIGNUM_SET_NEGATIVE_SIGN(big);
4572     }
4573     return big;
4574 }
4575 #endif
4576
4577 VALUE
4578 rb_cstr2inum(const char *str, int base)
4579 {
4580     return rb_cstr_to_inum(str, base, base==0);
4581 }
4582
4583 VALUE
4584 rb_str2inum(VALUE str, int base)
4585 {
4586     return rb_str_to_inum(str, base, base==0);
4587 }
4588
4589 static VALUE
4590 big_shift3(VALUE x, int lshift_p, size_t shift_numdigits, int shift_numbits)
4591 {
4592     BDIGIT *xds, *zds;
4593     long s1;
4594     int s2;
4595     VALUE z;
4596     long xn;
4597
4598     if (lshift_p) {
4599         if (LONG_MAX < shift_numdigits) {
4600           too_big:
4601             rb_raise(rb_eRangeError, "shift width too big");
4602         }
4603         s1 = shift_numdigits;
4604         s2 = shift_numbits;
4605         if ((size_t)s1 != shift_numdigits) goto too_big;
4606         xn = BIGNUM_LEN(x);
4607         if (LONG_MAX/SIZEOF_BDIGIT <= xn+s1) goto too_big;
4608         z = bignew(xn+s1+1, BIGNUM_SIGN(x));
4609         zds = BDIGITS(z);
4610         BDIGITS_ZERO(zds, s1);
4611         xds = BDIGITS(x);
4612         zds[xn+s1] = bary_small_lshift(zds+s1, xds, xn, s2);
4613     }
4614     else {
4615         long zn;
4616         BDIGIT hibitsx;
4617         if (LONG_MAX < shift_numdigits || (size_t)BIGNUM_LEN(x) <= shift_numdigits) {
4618             if (BIGNUM_POSITIVE_P(x) ||
4619                 bary_zero_p(BDIGITS(x), BIGNUM_LEN(x)))
4620                 return INT2FIX(0);
4621             else
4622                 return INT2FIX(-1);
4623         }
4624         s1 = shift_numdigits;
4625         s2 = shift_numbits;
4626         hibitsx = abs2twocomp(&x, &xn);
4627         xds = BDIGITS(x);
4628         if (xn <= s1) {
4629             return hibitsx ? INT2FIX(-1) : INT2FIX(0);
4630         }
4631         zn = xn - s1;
4632         z = bignew(zn, 0);
4633         zds = BDIGITS(z);
4634         bary_small_rshift(zds, xds+s1, zn, s2, hibitsx != 0 ? BDIGMAX : 0);
4635         twocomp2abs_bang(z, hibitsx != 0);
4636     }
4637     RB_GC_GUARD(x);
4638     return z;
4639 }
4640
4641 static VALUE
4642 big_shift2(VALUE x, int lshift_p, VALUE y)
4643 {
4644     int sign;
4645     size_t lens[2];
4646     size_t shift_numdigits;
4647     int shift_numbits;
4648
4649     RUBY_ASSERT(POW2_P(CHAR_BIT));
4650     RUBY_ASSERT(POW2_P(BITSPERDIG));
4651
4652     if (BIGZEROP(x))
4653         return INT2FIX(0);
4654     sign = rb_integer_pack(y, lens, numberof(lens), sizeof(size_t), 0,
4655         INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER);
4656     if (sign < 0) {
4657         lshift_p = !lshift_p;
4658         sign = -sign;
4659     }
4660     if (lshift_p) {
4661         if (1 < sign || CHAR_BIT <= lens[1])
4662             rb_raise(rb_eRangeError, "shift width too big");
4663     }
4664     else {
4665         if (1 < sign || CHAR_BIT <= lens[1])
4666             return BIGNUM_POSITIVE_P(x) ? INT2FIX(0) : INT2FIX(-1);
4667     }
4668     shift_numbits = (int)(lens[0] & (BITSPERDIG-1));
4669     shift_numdigits = (lens[0] >> bit_length(BITSPERDIG-1)) |
4670       (lens[1] << (CHAR_BIT*SIZEOF_SIZE_T - bit_length(BITSPERDIG-1)));
4671     return big_shift3(x, lshift_p, shift_numdigits, shift_numbits);
4672 }
4673
4674 static VALUE
4675 big_lshift(VALUE x, unsigned long shift)
4676 {
4677     long s1 = shift/BITSPERDIG;
4678     int s2 = (int)(shift%BITSPERDIG);
4679     return big_shift3(x, 1, s1, s2);
4680 }
4681
4682 static VALUE
4683 big_rshift(VALUE x, unsigned long shift)
4684 {
4685     long s1 = shift/BITSPERDIG;
4686     int s2 = (int)(shift%BITSPERDIG);
4687     return big_shift3(x, 0, s1, s2);
4688 }
4689
4690 #define MAX_BASE36_POWER_TABLE_ENTRIES (SIZEOF_SIZE_T * CHAR_BIT + 1)
4691
4692 static VALUE base36_power_cache[35][MAX_BASE36_POWER_TABLE_ENTRIES];
4693 static size_t base36_numdigits_cache[35][MAX_BASE36_POWER_TABLE_ENTRIES];
4694
4695 static void
4696 power_cache_init(void)
4697 {
4698 }
4699
4700 static inline VALUE
4701 power_cache_get_power(int base, int power_level, size_t *numdigits_ret)
4702 {
4703     /*
4704      * MAX_BASE36_POWER_TABLE_ENTRIES is big enough to that
4705      * base36_power_cache[base][MAX_BASE36_POWER_TABLE_ENTRIES-1] fills whole memory.
4706      * So MAX_BASE36_POWER_TABLE_ENTRIES <= power_level is not possible to calculate.
4707      *
4708      * number-of-bytes =
4709      * log256(base36_power_cache[base][MAX_BASE36_POWER_TABLE_ENTRIES-1]) =
4710      * log256(maxpow_in_bdigit_dbl(base)**(2**(MAX_BASE36_POWER_TABLE_ENTRIES-1))) =
4711      * log256(maxpow_in_bdigit_dbl(base)**(2**(SIZEOF_SIZE_T*CHAR_BIT))) =
4712      * (2**(SIZEOF_SIZE_T*CHAR_BIT))*log256(maxpow_in_bdigit_dbl(base)) =
4713      * (256**SIZEOF_SIZE_T)*log256(maxpow_in_bdigit_dbl(base)) >
4714      * (256**SIZEOF_SIZE_T)*(sizeof(BDIGIT_DBL)-1) >
4715      * 256**SIZEOF_SIZE_T
4716      */
4717     if (MAX_BASE36_POWER_TABLE_ENTRIES <= power_level)
4718         rb_bug("too big power number requested: maxpow_in_bdigit_dbl(%d)**(2**%d)", base, power_level);
4719
4720     VALUE power = base36_power_cache[base - 2][power_level];
4721     if (!power) {
4722         size_t numdigits;
4723         if (power_level == 0) {
4724             int numdigits0;
4725             BDIGIT_DBL dd = maxpow_in_bdigit_dbl(base, &numdigits0);
4726             power = bignew(2, 1);
4727             bdigitdbl2bary(BDIGITS(power), 2, dd);
4728             numdigits = numdigits0;
4729         }
4730         else {
4731             power = bigtrunc(bigsq(power_cache_get_power(base, power_level - 1, &numdigits)));
4732             numdigits *= 2;
4733         }
4734         rb_obj_hide(power);
4735         base36_power_cache[base - 2][power_level] = power;
4736         base36_numdigits_cache[base - 2][power_level] = numdigits;
4737         rb_vm_register_global_object(power);
4738     }
4739     if (numdigits_ret)
4740         *numdigits_ret = base36_numdigits_cache[base - 2][power_level];
4741     return power;
4742 }
4743
4744 struct big2str_struct {
4745     int negative;
4746     int base;
4747     BDIGIT_DBL hbase2;
4748     int hbase2_numdigits;
4749     VALUE result;
4750     char *ptr;
4751 };
4752
4753 static void
4754 big2str_alloc(struct big2str_struct *b2s, size_t len)
4755 {
4756     if (LONG_MAX-1 < len)
4757         rb_raise(rb_eArgError, "too big number");
4758     b2s->result = rb_usascii_str_new(0, (long)(len + 1)); /* plus one for sign */
4759     b2s->ptr = RSTRING_PTR(b2s->result);
4760     if (b2s->negative)
4761         *b2s->ptr++ = '-';
4762 }
4763
4764 static void
4765 big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t taillen)
4766 {
4767     size_t j;
4768     BDIGIT_DBL num;
4769     char buf[SIZEOF_BDIGIT_DBL*CHAR_BIT], *p;
4770     int beginning = !b2s->ptr;
4771     size_t len = 0;
4772
4773     RUBY_ASSERT(xn <= 2);
4774     num = bary2bdigitdbl(xds, xn);
4775
4776     if (beginning) {
4777         if (num == 0)
4778             return;
4779         p = buf;
4780         j = sizeof(buf);
4781         do {
4782             BDIGIT_DBL idx = num % b2s->base;
4783             num /= b2s->base;
4784             p[--j] = ruby_digitmap[idx];
4785         } while (num);
4786         len = sizeof(buf) - j;
4787         big2str_alloc(b2s, len + taillen);
4788         MEMCPY(b2s->ptr, buf + j, char, len);
4789     }
4790     else {
4791         p = b2s->ptr;
4792         j = b2s->hbase2_numdigits;
4793         do {
4794             BDIGIT_DBL idx = num % b2s->base;
4795             num /= b2s->base;
4796             p[--j] = ruby_digitmap[idx];
4797         } while (j);
4798         len = b2s->hbase2_numdigits;
4799     }
4800     b2s->ptr += len;
4801 }
4802
4803 static void
4804 big2str_karatsuba(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t wn,
4805                   int power_level, size_t taillen)
4806 {
4807     VALUE b;
4808     size_t half_numdigits, lower_numdigits;
4809     int lower_power_level;
4810     size_t bn;
4811     const BDIGIT *bds;
4812     size_t len;
4813
4814     /*
4815      * Precondition:
4816      * abs(x) < maxpow**(2**power_level)
4817      * where
4818      *   maxpow = maxpow_in_bdigit_dbl(base, &numdigits)
4819      *
4820      * This function generates sequence of zeros, and then stringized abs(x) into b2s->ptr.
4821      *
4822      * b2s->ptr can be NULL.
4823      * It is allocated when the first character is generated via big2str_alloc.
4824      *
4825      * The prefix zeros should be generated if and only if b2s->ptr is not NULL.
4826      * When the zeros are generated, the zeros and abs(x) consists
4827      * numdigits*(2**power_level) characters at total.
4828      *
4829      * Note:
4830      * power_cache_get_power(base, power_level, &len) may not be cached yet. It should not be called.
4831      * power_cache_get_power(base, power_level-1, &len) should be cached already if 0 <= power_level-1.
4832      */
4833
4834     if (xn == 0 || bary_zero_p(xds, xn)) {
4835         if (b2s->ptr) {
4836             /* When x is zero, power_cache_get_power(base, power_level) should be cached already. */
4837             power_cache_get_power(b2s->base, power_level, &len);
4838             memset(b2s->ptr, '0', len);
4839             b2s->ptr += len;
4840         }
4841         return;
4842     }
4843
4844     if (power_level == 0) {
4845         big2str_2bdigits(b2s, xds, xn, taillen);
4846         return;
4847     }
4848
4849     lower_power_level = power_level-1;
4850     b = power_cache_get_power(b2s->base, lower_power_level, &lower_numdigits);
4851     bn = BIGNUM_LEN(b);
4852     bds = BDIGITS(b);
4853
4854     half_numdigits = lower_numdigits;
4855
4856     while (0 < lower_power_level &&
4857             (xn < bn ||
4858              (xn == bn && bary_cmp(xds, xn, bds, bn) < 0))) {
4859         lower_power_level--;
4860         b = power_cache_get_power(b2s->base, lower_power_level, &lower_numdigits);
4861         bn = BIGNUM_LEN(b);
4862         bds = BDIGITS(b);
4863     }
4864
4865     if (lower_power_level == 0 &&
4866             (xn < bn ||
4867              (xn == bn && bary_cmp(xds, xn, bds, bn) < 0))) {
4868         if (b2s->ptr) {
4869             len = half_numdigits * 2 - lower_numdigits;
4870             memset(b2s->ptr, '0', len);
4871             b2s->ptr += len;
4872         }
4873         big2str_2bdigits(b2s, xds, xn, taillen);
4874     }
4875     else {
4876         BDIGIT *qds, *rds;
4877         size_t qn, rn;
4878         BDIGIT *tds;
4879         int shift;
4880
4881         if (lower_power_level != power_level-1 && b2s->ptr) {
4882             len = (half_numdigits - lower_numdigits) * 2;
4883             memset(b2s->ptr, '0', len);
4884             b2s->ptr += len;
4885         }
4886
4887         shift = nlz(bds[bn-1]);
4888
4889         qn = xn + BIGDIVREM_EXTRA_WORDS;
4890
4891         if (shift == 0) {
4892             /* bigdivrem_restoring will not modify y.
4893              * So use bds directly.  */
4894             tds = (BDIGIT *)bds;
4895             xds[xn] = 0;
4896         }
4897         else {
4898             /* bigdivrem_restoring will modify y.
4899              * So use temporary buffer.  */
4900             tds = xds + qn;
4901             RUBY_ASSERT(qn + bn <= xn + wn);
4902             bary_small_lshift(tds, bds, bn, shift);
4903             xds[xn] = bary_small_lshift(xds, xds, xn, shift);
4904         }
4905
4906         bigdivrem_restoring(xds, qn, tds, bn);
4907
4908         rds = xds;
4909         rn = bn;
4910
4911         qds = xds + bn;
4912         qn = qn - bn;
4913
4914         if (shift) {
4915             bary_small_rshift(rds, rds, rn, shift, 0);
4916         }
4917
4918         BARY_TRUNC(qds, qn);
4919         RUBY_ASSERT(qn <= bn);
4920         big2str_karatsuba(b2s, qds, qn, xn+wn - (rn+qn), lower_power_level, lower_numdigits+taillen);
4921         BARY_TRUNC(rds, rn);
4922         big2str_karatsuba(b2s, rds, rn, xn+wn - rn, lower_power_level, taillen);
4923     }
4924 }
4925
4926 static VALUE
4927 big2str_base_poweroftwo(VALUE x, int base)
4928 {
4929     int word_numbits = ffs(base) - 1;
4930     size_t numwords;
4931     VALUE result;
4932     char *ptr;
4933     numwords = rb_absint_numwords(x, word_numbits, NULL);
4934     if (BIGNUM_NEGATIVE_P(x)) {
4935         if (LONG_MAX-1 < numwords)
4936             rb_raise(rb_eArgError, "too big number");
4937         result = rb_usascii_str_new(0, 1+numwords);
4938         ptr = RSTRING_PTR(result);
4939         *ptr++ = BIGNUM_POSITIVE_P(x) ? '+' : '-';
4940     }
4941     else {
4942         if (LONG_MAX < numwords)
4943             rb_raise(rb_eArgError, "too big number");
4944         result = rb_usascii_str_new(0, numwords);
4945         ptr = RSTRING_PTR(result);
4946     }
4947     rb_integer_pack(x, ptr, numwords, 1, CHAR_BIT-word_numbits,
4948                     INTEGER_PACK_BIG_ENDIAN);
4949     while (0 < numwords) {
4950         *ptr = ruby_digitmap[*(unsigned char *)ptr];
4951         ptr++;
4952         numwords--;
4953     }
4954     return result;
4955 }
4956
4957 VALUE
4958 rb_big2str_poweroftwo(VALUE x, int base)
4959 {
4960     return big2str_base_poweroftwo(x, base);
4961 }
4962
4963 static VALUE
4964 big2str_generic(VALUE x, int base)
4965 {
4966     BDIGIT *xds;
4967     size_t xn;
4968     struct big2str_struct b2s_data;
4969     int power_level;
4970     VALUE power;
4971
4972     xds = BDIGITS(x);
4973     xn = BIGNUM_LEN(x);
4974     BARY_TRUNC(xds, xn);
4975
4976     if (xn == 0) {
4977         return rb_usascii_str_new2("0");
4978     }
4979
4980     if (!valid_radix_p(base))
4981         invalid_radix(base);
4982
4983     if (xn >= LONG_MAX/BITSPERDIG) {
4984         rb_raise(rb_eRangeError, "bignum too big to convert into 'string'");
4985     }
4986
4987     power_level = 0;
4988     power = power_cache_get_power(base, power_level, NULL);
4989     while (power_level < MAX_BASE36_POWER_TABLE_ENTRIES &&
4990            (size_t)BIGNUM_LEN(power) <= (xn+1)/2) {
4991         power_level++;
4992         power = power_cache_get_power(base, power_level, NULL);
4993     }
4994     RUBY_ASSERT(power_level != MAX_BASE36_POWER_TABLE_ENTRIES);
4995
4996     if ((size_t)BIGNUM_LEN(power) <= xn) {
4997         /*
4998          * This increment guarantees x < power_cache_get_power(base, power_level)
4999          * without invoking it actually.
5000          * (power_cache_get_power(base, power_level) can be slow and not used
5001          * in big2str_karatsuba.)
5002          *
5003          * Although it is possible that x < power_cache_get_power(base, power_level-1),
5004          * it is no problem because big2str_karatsuba checks it and
5005          * doesn't affect the result when b2s_data.ptr is NULL.
5006          */
5007         power_level++;
5008     }
5009
5010     b2s_data.negative = BIGNUM_NEGATIVE_P(x);
5011     b2s_data.base = base;
5012     b2s_data.hbase2 = maxpow_in_bdigit_dbl(base, &b2s_data.hbase2_numdigits);
5013
5014     b2s_data.result = Qnil;
5015     b2s_data.ptr = NULL;
5016
5017     if (power_level == 0) {
5018         big2str_2bdigits(&b2s_data, xds, xn, 0);
5019     }
5020     else {
5021         VALUE tmpw = 0;
5022         BDIGIT *wds;
5023         size_t wn;
5024         wn = power_level * BIGDIVREM_EXTRA_WORDS + BIGNUM_LEN(power);
5025         wds = ALLOCV_N(BDIGIT, tmpw, xn + wn);
5026         MEMCPY(wds, xds, BDIGIT, xn);
5027         big2str_karatsuba(&b2s_data, wds, xn, wn, power_level, 0);
5028         if (tmpw)
5029             ALLOCV_END(tmpw);
5030     }
5031     RB_GC_GUARD(x);
5032
5033     *b2s_data.ptr = '\0';
5034     rb_str_resize(b2s_data.result, (long)(b2s_data.ptr - RSTRING_PTR(b2s_data.result)));
5035
5036     RB_GC_GUARD(x);
5037     return b2s_data.result;
5038 }
5039
5040 VALUE
5041 rb_big2str_generic(VALUE x, int base)
5042 {
5043     return big2str_generic(x, base);
5044 }
5045
5046 #if USE_GMP
5047 static VALUE
5048 big2str_gmp(VALUE x, int base)
5049 {
5050     mpz_t mx;
5051     size_t size;
5052     VALUE str;
5053     BDIGIT *xds = BDIGITS(x);
5054     size_t xn = BIGNUM_LEN(x);
5055
5056     mpz_init(mx);
5057     bdigits_to_mpz(mx, xds, xn);
5058
5059     size = mpz_sizeinbase(mx, base);
5060
5061     if (BIGNUM_NEGATIVE_P(x)) {
5062         mpz_neg(mx, mx);
5063         str = rb_usascii_str_new(0, size+1);
5064     }
5065     else {
5066         str = rb_usascii_str_new(0, size);
5067     }
5068     mpz_get_str(RSTRING_PTR(str), base, mx);
5069     mpz_clear(mx);
5070
5071     if (RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\0') {
5072         rb_str_set_len(str, RSTRING_LEN(str)-1);
5073     }
5074
5075     RB_GC_GUARD(x);
5076     return str;
5077 }
5078
5079 VALUE
5080 rb_big2str_gmp(VALUE x, int base)
5081 {
5082     return big2str_gmp(x, base);
5083 }
5084 #endif
5085
5086 static VALUE
5087 rb_big2str1(VALUE x, int base)
5088 {
5089     BDIGIT *xds;
5090     size_t xn;
5091
5092     if (FIXNUM_P(x)) {
5093         return rb_fix2str(x, base);
5094     }
5095
5096     bigtrunc(x);
5097     xds = BDIGITS(x);
5098     xn = BIGNUM_LEN(x);
5099     BARY_TRUNC(xds, xn);
5100
5101     if (xn == 0) {
5102         return rb_usascii_str_new2("0");
5103     }
5104
5105     if (!valid_radix_p(base))
5106         invalid_radix(base);
5107
5108     if (xn >= LONG_MAX/BITSPERDIG) {
5109         rb_raise(rb_eRangeError, "bignum too big to convert into 'string'");
5110     }
5111
5112     if (POW2_P(base)) {
5113         /* base == 2 || base == 4 || base == 8 || base == 16 || base == 32 */
5114         return big2str_base_poweroftwo(x, base);
5115     }
5116
5117 #if USE_GMP
5118     if (GMP_BIG2STR_DIGITS < xn) {
5119         return big2str_gmp(x, base);
5120     }
5121 #endif
5122
5123     return big2str_generic(x, base);
5124 }
5125
5126 VALUE
5127 rb_big2str(VALUE x, int base)
5128 {
5129     return rb_big2str1(x, base);
5130 }
5131
5132 static unsigned long
5133 big2ulong(VALUE x, const char *type)
5134 {
5135 #if SIZEOF_LONG > SIZEOF_BDIGIT
5136     size_t i;
5137 #endif
5138     size_t len = BIGNUM_LEN(x);
5139     unsigned long num;
5140     BDIGIT *ds;
5141
5142     if (len == 0)
5143         return 0;
5144     if (BIGSIZE(x) > sizeof(long)) {
5145         rb_raise(rb_eRangeError, "bignum too big to convert into '%s'", type);
5146     }
5147     ds = BDIGITS(x);
5148 #if SIZEOF_LONG <= SIZEOF_BDIGIT
5149     num = (unsigned long)ds[0];
5150 #else
5151     num = 0;
5152     for (i = 0; i < len; i++) {
5153         num <<= BITSPERDIG;
5154         num += (unsigned long)ds[len - i - 1]; /* overflow is already checked */
5155     }
5156 #endif
5157     return num;
5158 }
5159
5160 unsigned long
5161 rb_big2ulong(VALUE x)
5162 {
5163     unsigned long num = big2ulong(x, "unsigned long");
5164
5165     if (BIGNUM_POSITIVE_P(x)) {
5166         return num;
5167     }
5168     else {
5169         if (num <= 1+(unsigned long)(-(LONG_MIN+1)))
5170             return -(long)(num-1)-1;
5171     }
5172     rb_raise(rb_eRangeError, "bignum out of range of unsigned long");
5173 }
5174
5175 long
5176 rb_big2long(VALUE x)
5177 {
5178     unsigned long num = big2ulong(x, "long");
5179
5180     if (BIGNUM_POSITIVE_P(x)) {
5181         if (num <= LONG_MAX)
5182             return num;
5183     }
5184     else {
5185         if (num <= 1+(unsigned long)(-(LONG_MIN+1)))
5186             return -(long)(num-1)-1;
5187     }
5188     rb_raise(rb_eRangeError, "bignum too big to convert into 'long'");
5189 }
5190
5191 #if HAVE_LONG_LONG
5192
5193 static unsigned LONG_LONG
5194 big2ull(VALUE x, const char *type)
5195 {
5196 #if SIZEOF_LONG_LONG > SIZEOF_BDIGIT
5197     size_t i;
5198 #endif
5199     size_t len = BIGNUM_LEN(x);
5200     unsigned LONG_LONG num;
5201     BDIGIT *ds = BDIGITS(x);
5202
5203     if (len == 0)
5204         return 0;
5205     if (BIGSIZE(x) > SIZEOF_LONG_LONG)
5206         rb_raise(rb_eRangeError, "bignum too big to convert into '%s'", type);
5207 #if SIZEOF_LONG_LONG <= SIZEOF_BDIGIT
5208     num = (unsigned LONG_LONG)ds[0];
5209 #else
5210     num = 0;
5211     for (i = 0; i < len; i++) {
5212         num = BIGUP(num);
5213         num += ds[len - i - 1];
5214     }
5215 #endif
5216     return num;
5217 }
5218
5219 unsigned LONG_LONG
5220 rb_big2ull(VALUE x)
5221 {
5222     unsigned LONG_LONG num = big2ull(x, "unsigned long long");
5223
5224     if (BIGNUM_POSITIVE_P(x)) {
5225         return num;
5226     }
5227     else {
5228         if (num <= 1+(unsigned LONG_LONG)(-(LLONG_MIN+1)))
5229             return -(LONG_LONG)(num-1)-1;
5230     }
5231     rb_raise(rb_eRangeError, "bignum out of range of unsigned long long");
5232 }
5233
5234 LONG_LONG
5235 rb_big2ll(VALUE x)
5236 {
5237     unsigned LONG_LONG num = big2ull(x, "long long");
5238
5239     if (BIGNUM_POSITIVE_P(x)) {
5240         if (num <= LLONG_MAX)
5241             return num;
5242     }
5243     else {
5244         if (num <= 1+(unsigned LONG_LONG)(-(LLONG_MIN+1)))
5245             return -(LONG_LONG)(num-1)-1;
5246     }
5247     rb_raise(rb_eRangeError, "bignum too big to convert into 'long long'");
5248 }
5249
5250 #endif  /* HAVE_LONG_LONG */
5251
5252 static VALUE
5253 dbl2big(double d)
5254 {
5255     long i = 0;
5256     BDIGIT c;
5257     BDIGIT *digits;
5258     VALUE z;
5259     double u = (d < 0)?-d:d;
5260
5261     if (isinf(d)) {
5262         rb_raise(rb_eFloatDomainError, d < 0 ? "-Infinity" : "Infinity");
5263     }
5264     if (isnan(d)) {
5265         rb_raise(rb_eFloatDomainError, "NaN");
5266     }
5267
5268     while (1.0 <= u) {
5269         u /= (double)(BIGRAD);
5270         i++;
5271     }
5272     z = bignew(i, d>=0);
5273     digits = BDIGITS(z);
5274     while (i--) {
5275         u *= BIGRAD;
5276         c = (BDIGIT)u;
5277         u -= c;
5278         digits[i] = c;
5279     }
5280
5281     return z;
5282 }
5283
5284 VALUE
5285 rb_dbl2big(double d)
5286 {
5287     return bignorm(dbl2big(d));
5288 }
5289
5290 static double
5291 big2dbl(VALUE x)
5292 {
5293     double d = 0.0;
5294     long i = (bigtrunc(x), BIGNUM_LEN(x)), lo = 0, bits;
5295     BDIGIT *ds = BDIGITS(x), dl;
5296
5297     if (i) {
5298         bits = i * BITSPERDIG - nlz(ds[i-1]);
5299         if (bits > DBL_MANT_DIG+DBL_MAX_EXP) {
5300             d = HUGE_VAL;
5301         }
5302         else {
5303             if (bits > DBL_MANT_DIG+1)
5304                 lo = (bits -= DBL_MANT_DIG+1) / BITSPERDIG;
5305             else
5306                 bits = 0;
5307             while (--i > lo) {
5308                 d = ds[i] + BIGRAD*d;
5309             }
5310             dl = ds[i];
5311             if (bits && (dl & ((BDIGIT)1 << (bits %= BITSPERDIG)))) {
5312                 int carry = (dl & ~(BDIGMAX << bits)) != 0;
5313                 if (!carry) {
5314                     while (i-- > 0) {
5315                         carry = ds[i] != 0;
5316                         if (carry) break;
5317                     }
5318                 }
5319                 if (carry) {
5320                     BDIGIT mask = BDIGMAX;
5321                     BDIGIT bit = 1;
5322                     mask <<= bits;
5323                     bit <<= bits;
5324                     dl &= mask;
5325                     dl += bit;
5326                     dl = BIGLO(dl);
5327                     if (!dl) d += 1;
5328                 }
5329             }
5330             d = dl + BIGRAD*d;
5331             if (lo) {
5332                 if (lo > INT_MAX / BITSPERDIG)
5333                     d = HUGE_VAL;
5334                 else if (lo < INT_MIN / BITSPERDIG)
5335                     d = 0.0;
5336                 else
5337                     d = ldexp(d, (int)(lo * BITSPERDIG));
5338             }
5339         }
5340     }
5341     if (BIGNUM_NEGATIVE_P(x)) d = -d;
5342     return d;
5343 }
5344
5345 double
5346 rb_big2dbl(VALUE x)
5347 {
5348     double d = big2dbl(x);
5349
5350     if (isinf(d)) {
5351         rb_warning("Integer out of Float range");
5352         if (d < 0.0)
5353             d = -HUGE_VAL;
5354         else
5355             d = HUGE_VAL;
5356     }
5357     return d;
5358 }
5359
5360 VALUE
5361 rb_integer_float_cmp(VALUE x, VALUE y)
5362 {
5363     double yd = RFLOAT_VALUE(y);
5364     double yi, yf;
5365     VALUE rel;
5366
5367     if (isnan(yd))
5368         return Qnil;
5369     if (isinf(yd)) {
5370         if (yd > 0.0) return INT2FIX(-1);
5371         else return INT2FIX(1);
5372     }
5373     yf = modf(yd, &yi);
5374     if (FIXNUM_P(x)) {
5375 #if SIZEOF_LONG * CHAR_BIT < DBL_MANT_DIG /* assume FLT_RADIX == 2 */
5376         double xd = (double)FIX2LONG(x);
5377         if (xd < yd)
5378             return INT2FIX(-1);
5379         if (xd > yd)
5380             return INT2FIX(1);
5381         return INT2FIX(0);
5382 #else
5383         long xn, yn;
5384         if (yi < FIXNUM_MIN)
5385             return INT2FIX(1);
5386         if (FIXNUM_MAX+1 <= yi)
5387             return INT2FIX(-1);
5388         xn = FIX2LONG(x);
5389         yn = (long)yi;
5390         if (xn < yn)
5391             return INT2FIX(-1);
5392         if (xn > yn)
5393             return INT2FIX(1);
5394         if (yf < 0.0)
5395             return INT2FIX(1);
5396         if (0.0 < yf)
5397             return INT2FIX(-1);
5398         return INT2FIX(0);
5399 #endif
5400     }
5401     y = rb_dbl2big(yi);
5402     rel = rb_big_cmp(x, y);
5403     if (yf == 0.0 || rel != INT2FIX(0))
5404         return rel;
5405     if (yf < 0.0)
5406         return INT2FIX(1);
5407     return INT2FIX(-1);
5408 }
5409
5410 #if SIZEOF_LONG * CHAR_BIT >= DBL_MANT_DIG /* assume FLT_RADIX == 2 */
5411 COMPILER_WARNING_PUSH
5412 #if __has_warning("-Wimplicit-int-float-conversion")
5413 COMPILER_WARNING_IGNORED(-Wimplicit-int-float-conversion)
5414 #endif
5415 static const double LONG_MAX_as_double = LONG_MAX;
5416 COMPILER_WARNING_POP
5417 #endif
5418
5419 VALUE
5420 rb_integer_float_eq(VALUE x, VALUE y)
5421 {
5422     double yd = RFLOAT_VALUE(y);
5423     double yi, yf;
5424
5425     if (!isfinite(yd))
5426         return Qfalse;
5427     yf = modf(yd, &yi);
5428     if (yf != 0)
5429         return Qfalse;
5430     if (FIXNUM_P(x)) {
5431 #if SIZEOF_LONG * CHAR_BIT < DBL_MANT_DIG /* assume FLT_RADIX == 2 */
5432         double xd = (double)FIX2LONG(x);
5433         return RBOOL(xd == yd);
5434 #else
5435         long xn, yn;
5436         if (yi < LONG_MIN || LONG_MAX_as_double <= yi)
5437             return Qfalse;
5438         xn = FIX2LONG(x);
5439         yn = (long)yi;
5440         return RBOOL(xn == yn);
5441 #endif
5442     }
5443     y = rb_dbl2big(yi);
5444     return rb_big_eq(x, y);
5445 }
5446
5447
5448 VALUE
5449 rb_big_cmp(VALUE x, VALUE y)
5450 {
5451     if (FIXNUM_P(y)) {
5452         x = bigfixize(x);
5453         if (FIXNUM_P(x)) {
5454             /* SIGNED_VALUE and Fixnum have same sign-bits, same
5455              * order */
5456             SIGNED_VALUE sx = (SIGNED_VALUE)x, sy = (SIGNED_VALUE)y;
5457             if (sx < sy) return INT2FIX(-1);
5458             return INT2FIX(sx > sy);
5459         }
5460     }
5461     else if (RB_BIGNUM_TYPE_P(y)) {
5462         if (BIGNUM_SIGN(x) == BIGNUM_SIGN(y)) {
5463             int cmp = bary_cmp(BDIGITS(x), BIGNUM_LEN(x), BDIGITS(y), BIGNUM_LEN(y));
5464             return INT2FIX(BIGNUM_SIGN(x) ? cmp : -cmp);
5465         }
5466     }
5467     else if (RB_FLOAT_TYPE_P(y)) {
5468         return rb_integer_float_cmp(x, y);
5469     }
5470     else {
5471         return rb_num_coerce_cmp(x, y, idCmp);
5472     }
5473     return INT2FIX(BIGNUM_SIGN(x) ? 1 : -1);
5474 }
5475
5476 enum big_op_t {
5477     big_op_gt,
5478     big_op_ge,
5479     big_op_lt,
5480     big_op_le
5481 };
5482
5483 static VALUE
5484 big_op(VALUE x, VALUE y, enum big_op_t op)
5485 {
5486     VALUE rel;
5487     int n;
5488
5489     if (RB_INTEGER_TYPE_P(y)) {
5490         rel = rb_big_cmp(x, y);
5491     }
5492     else if (RB_FLOAT_TYPE_P(y)) {
5493         rel = rb_integer_float_cmp(x, y);
5494     }
5495     else {
5496         ID id = 0;
5497         switch (op) {
5498           case big_op_gt: id = '>'; break;
5499           case big_op_ge: id = idGE; break;
5500           case big_op_lt: id = '<'; break;
5501           case big_op_le: id = idLE; break;
5502         }
5503         return rb_num_coerce_relop(x, y, id);
5504     }
5505
5506     if (NIL_P(rel)) return Qfalse;
5507     n = FIX2INT(rel);
5508
5509     switch (op) {
5510       case big_op_gt: return RBOOL(n >  0);
5511       case big_op_ge: return RBOOL(n >= 0);
5512       case big_op_lt: return RBOOL(n <  0);
5513       case big_op_le: return RBOOL(n <= 0);
5514     }
5515     return Qundef;
5516 }
5517
5518 VALUE
5519 rb_big_gt(VALUE x, VALUE y)
5520 {
5521     return big_op(x, y, big_op_gt);
5522 }
5523
5524 VALUE
5525 rb_big_ge(VALUE x, VALUE y)
5526 {
5527     return big_op(x, y, big_op_ge);
5528 }
5529
5530 VALUE
5531 rb_big_lt(VALUE x, VALUE y)
5532 {
5533     return big_op(x, y, big_op_lt);
5534 }
5535
5536 VALUE
5537 rb_big_le(VALUE x, VALUE y)
5538 {
5539     return big_op(x, y, big_op_le);
5540 }
5541
5542 /*
5543  *  call-seq:
5544  *     big == obj  -> true or false
5545  *
5546  *  Returns <code>true</code> only if <i>obj</i> has the same value
5547  *  as <i>big</i>. Contrast this with Integer#eql?, which requires
5548  *  <i>obj</i> to be an Integer.
5549  *
5550  *     68719476736 == 68719476736.0   #=> true
5551  */
5552
5553 VALUE
5554 rb_big_eq(VALUE x, VALUE y)
5555 {
5556     if (FIXNUM_P(y)) {
5557         return RBOOL(bignorm(x) == y);
5558     }
5559     else if (RB_BIGNUM_TYPE_P(y)) {
5560     }
5561     else if (RB_FLOAT_TYPE_P(y)) {
5562         return rb_integer_float_eq(x, y);
5563     }
5564     else {
5565         return rb_equal(y, x);
5566     }
5567     if (BIGNUM_SIGN(x) != BIGNUM_SIGN(y)) return Qfalse;
5568     if (BIGNUM_LEN(x) != BIGNUM_LEN(y)) return Qfalse;
5569     return RBOOL(MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,BIGNUM_LEN(y)) == 0);
5570 }
5571
5572 VALUE
5573 rb_big_eql(VALUE x, VALUE y)
5574 {
5575     if (!RB_BIGNUM_TYPE_P(y)) return Qfalse;
5576     if (BIGNUM_SIGN(x) != BIGNUM_SIGN(y)) return Qfalse;
5577     if (BIGNUM_LEN(x) != BIGNUM_LEN(y)) return Qfalse;
5578     return RBOOL(MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,BIGNUM_LEN(y)) == 0);
5579 }
5580
5581 VALUE
5582 rb_big_uminus(VALUE x)
5583 {
5584     VALUE z = rb_big_clone(x);
5585
5586     BIGNUM_NEGATE(z);
5587
5588     return bignorm(z);
5589 }
5590
5591 VALUE
5592 rb_big_comp(VALUE x)
5593 {
5594     VALUE z = rb_big_clone(x);
5595     BDIGIT *ds = BDIGITS(z);
5596     long n = BIGNUM_LEN(z);
5597
5598     if (!n) return INT2FIX(-1);
5599
5600     if (BIGNUM_POSITIVE_P(z)) {
5601         if (bary_add_one(ds, n)) {
5602             big_extend_carry(z);
5603         }
5604         BIGNUM_SET_NEGATIVE_SIGN(z);
5605     }
5606     else {
5607         bary_neg(ds, n);
5608         if (bary_add_one(ds, n))
5609             return INT2FIX(-1);
5610         bary_neg(ds, n);
5611         BIGNUM_SET_POSITIVE_SIGN(z);
5612     }
5613
5614     return bignorm(z);
5615 }
5616
5617 static VALUE
5618 bigsub(VALUE x, VALUE y)
5619 {
5620     VALUE z;
5621     BDIGIT *xds, *yds, *zds;
5622     long xn, yn, zn;
5623
5624     xn = BIGNUM_LEN(x);
5625     yn = BIGNUM_LEN(y);
5626     zn = xn < yn ? yn : xn;
5627
5628     z = bignew(zn, 1);
5629
5630     xds = BDIGITS(x);
5631     yds = BDIGITS(y);
5632     zds = BDIGITS(z);
5633
5634     if (bary_sub(zds, zn, xds, xn, yds, yn)) {
5635         bary_2comp(zds, zn);
5636         BIGNUM_SET_NEGATIVE_SIGN(z);
5637     }
5638
5639     return z;
5640 }
5641
5642 static VALUE bigadd_int(VALUE x, long y);
5643
5644 static VALUE
5645 bigsub_int(VALUE x, long y0)
5646 {
5647     VALUE z;
5648     BDIGIT *xds, *zds;
5649     long xn, zn;
5650     BDIGIT_DBL_SIGNED num;
5651     long i, y;
5652
5653     y = y0;
5654     xds = BDIGITS(x);
5655     xn = BIGNUM_LEN(x);
5656
5657     if (xn == 0)
5658         return LONG2NUM(-y0);
5659
5660     zn = xn;
5661 #if SIZEOF_BDIGIT < SIZEOF_LONG
5662     if (zn < bdigit_roomof(SIZEOF_LONG))
5663         zn = bdigit_roomof(SIZEOF_LONG);
5664 #endif
5665     z = bignew(zn, BIGNUM_SIGN(x));
5666     zds = BDIGITS(z);
5667
5668 #if SIZEOF_BDIGIT >= SIZEOF_LONG
5669     RUBY_ASSERT(xn == zn);
5670     num = (BDIGIT_DBL_SIGNED)xds[0] - y;
5671     if (xn == 1 && num < 0) {
5672         BIGNUM_NEGATE(z);
5673         zds[0] = (BDIGIT)-num;
5674         RB_GC_GUARD(x);
5675         return bignorm(z);
5676     }
5677     zds[0] = BIGLO(num);
5678     num = BIGDN(num);
5679     i = 1;
5680     if (i < xn)
5681         goto y_is_zero_x;
5682     goto finish;
5683 #else
5684     num = 0;
5685     for (i=0; i < xn; i++) {
5686         if (y == 0) goto y_is_zero_x;
5687         num += (BDIGIT_DBL_SIGNED)xds[i] - BIGLO(y);
5688         zds[i] = BIGLO(num);
5689         num = BIGDN(num);
5690         y = BIGDN(y);
5691     }
5692     for (; i < zn; i++) {
5693         if (y == 0) goto y_is_zero_z;
5694         num -= BIGLO(y);
5695         zds[i] = BIGLO(num);
5696         num = BIGDN(num);
5697         y = BIGDN(y);
5698     }
5699     goto finish;
5700 #endif
5701
5702     for (; i < xn; i++) {
5703       y_is_zero_x:
5704         if (num == 0) goto num_is_zero_x;
5705         num += xds[i];
5706         zds[i] = BIGLO(num);
5707         num = BIGDN(num);
5708     }
5709 #if SIZEOF_BDIGIT < SIZEOF_LONG
5710     for (; i < zn; i++) {
5711       y_is_zero_z:
5712         if (num == 0) goto num_is_zero_z;
5713         zds[i] = BIGLO(num);
5714         num = BIGDN(num);
5715     }
5716 #endif
5717     goto finish;
5718
5719     for (; i < xn; i++) {
5720       num_is_zero_x:
5721         zds[i] = xds[i];
5722     }
5723 #if SIZEOF_BDIGIT < SIZEOF_LONG
5724     for (; i < zn; i++) {
5725       num_is_zero_z:
5726         zds[i] = 0;
5727     }
5728 #endif
5729     goto finish;
5730
5731   finish:
5732     RUBY_ASSERT(num == 0 || num == -1);
5733     if (num < 0) {
5734         get2comp(z);
5735         BIGNUM_NEGATE(z);
5736     }
5737     RB_GC_GUARD(x);
5738     return bignorm(z);
5739 }
5740
5741 static VALUE
5742 bigadd_int(VALUE x, long y)
5743 {
5744     VALUE z;
5745     BDIGIT *xds, *zds;
5746     long xn, zn;
5747     BDIGIT_DBL num;
5748     long i;
5749
5750     xds = BDIGITS(x);
5751     xn = BIGNUM_LEN(x);
5752
5753     if (xn == 0)
5754         return LONG2NUM(y);
5755
5756     zn = xn;
5757 #if SIZEOF_BDIGIT < SIZEOF_LONG
5758     if (zn < bdigit_roomof(SIZEOF_LONG))
5759         zn = bdigit_roomof(SIZEOF_LONG);
5760 #endif
5761     zn++;
5762
5763     z = bignew(zn, BIGNUM_SIGN(x));
5764     zds = BDIGITS(z);
5765
5766 #if SIZEOF_BDIGIT >= SIZEOF_LONG
5767     num = (BDIGIT_DBL)xds[0] + y;
5768     zds[0] = BIGLO(num);
5769     num = BIGDN(num);
5770     i = 1;
5771     if (i < xn)
5772         goto y_is_zero_x;
5773     goto y_is_zero_z;
5774 #else
5775     num = 0;
5776     for (i=0; i < xn; i++) {
5777         if (y == 0) goto y_is_zero_x;
5778         num += (BDIGIT_DBL)xds[i] + BIGLO(y);
5779         zds[i] = BIGLO(num);
5780         num = BIGDN(num);
5781         y = BIGDN(y);
5782     }
5783     for (; i < zn; i++) {
5784         if (y == 0) goto y_is_zero_z;
5785         num += BIGLO(y);
5786         zds[i] = BIGLO(num);
5787         num = BIGDN(num);
5788         y = BIGDN(y);
5789     }
5790     goto finish;
5791
5792 #endif
5793
5794     for (;i < xn; i++) {
5795       y_is_zero_x:
5796         if (num == 0) goto num_is_zero_x;
5797         num += (BDIGIT_DBL)xds[i];
5798         zds[i] = BIGLO(num);
5799         num = BIGDN(num);
5800     }
5801     for (; i < zn; i++) {
5802       y_is_zero_z:
5803         if (num == 0) goto num_is_zero_z;
5804         zds[i] = BIGLO(num);
5805         num = BIGDN(num);
5806     }
5807     goto finish;
5808
5809     for (;i < xn; i++) {
5810       num_is_zero_x:
5811         zds[i] = xds[i];
5812     }
5813     for (; i < zn; i++) {
5814       num_is_zero_z:
5815         zds[i] = 0;
5816     }
5817     goto finish;
5818
5819   finish:
5820     RB_GC_GUARD(x);
5821     return bignorm(z);
5822 }
5823
5824 static VALUE
5825 bigadd(VALUE x, VALUE y, int sign)
5826 {
5827     VALUE z;
5828     size_t len;
5829
5830     sign = (sign == BIGNUM_SIGN(y));
5831     if (BIGNUM_SIGN(x) != sign) {
5832         if (sign) return bigsub(y, x);
5833         return bigsub(x, y);
5834     }
5835
5836     if (BIGNUM_LEN(x) > BIGNUM_LEN(y)) {
5837         len = BIGNUM_LEN(x) + 1;
5838     }
5839     else {
5840         len = BIGNUM_LEN(y) + 1;
5841     }
5842     z = bignew(len, sign);
5843
5844     bary_add(BDIGITS(z), BIGNUM_LEN(z),
5845              BDIGITS(x), BIGNUM_LEN(x),
5846              BDIGITS(y), BIGNUM_LEN(y));
5847
5848     return z;
5849 }
5850
5851 VALUE
5852 rb_big_plus(VALUE x, VALUE y)
5853 {
5854     long n;
5855
5856     if (FIXNUM_P(y)) {
5857         n = FIX2LONG(y);
5858         if ((n > 0) != BIGNUM_SIGN(x)) {
5859             if (n < 0) {
5860                 n = -n;
5861             }
5862             return bigsub_int(x, n);
5863         }
5864         if (n < 0) {
5865             n = -n;
5866         }
5867         return bigadd_int(x, n);
5868     }
5869     else if (RB_BIGNUM_TYPE_P(y)) {
5870         return bignorm(bigadd(x, y, 1));
5871     }
5872     else if (RB_FLOAT_TYPE_P(y)) {
5873         return DBL2NUM(rb_big2dbl(x) + RFLOAT_VALUE(y));
5874     }
5875     else {
5876         return rb_num_coerce_bin(x, y, '+');
5877     }
5878 }
5879
5880 VALUE
5881 rb_big_minus(VALUE x, VALUE y)
5882 {
5883     long n;
5884
5885     if (FIXNUM_P(y)) {
5886         n = FIX2LONG(y);
5887         if ((n > 0) != BIGNUM_SIGN(x)) {
5888             if (n < 0) {
5889                 n = -n;
5890             }
5891             return bigadd_int(x, n);
5892         }
5893         if (n < 0) {
5894             n = -n;
5895         }
5896         return bigsub_int(x, n);
5897     }
5898     else if (RB_BIGNUM_TYPE_P(y)) {
5899         return bignorm(bigadd(x, y, 0));
5900     }
5901     else if (RB_FLOAT_TYPE_P(y)) {
5902         return DBL2NUM(rb_big2dbl(x) - RFLOAT_VALUE(y));
5903     }
5904     else {
5905         return rb_num_coerce_bin(x, y, '-');
5906     }
5907 }
5908
5909 static VALUE
5910 bigsq(VALUE x)
5911 {
5912     long xn, zn;
5913     VALUE z;
5914     BDIGIT *xds, *zds;
5915
5916     xn = BIGNUM_LEN(x);
5917     zn = 2 * xn;
5918
5919     z = bignew(zn, 1);
5920
5921     xds = BDIGITS(x);
5922     zds = BDIGITS(z);
5923
5924     if (xn < NAIVE_MUL_DIGITS)
5925         bary_sq_fast(zds, zn, xds, xn);
5926     else
5927         bary_mul(zds, zn, xds, xn, xds, xn);
5928
5929     RB_GC_GUARD(x);
5930     return z;
5931 }
5932
5933 static VALUE
5934 bigmul0(VALUE x, VALUE y)
5935 {
5936     long xn, yn, zn;
5937     VALUE z;
5938     BDIGIT *xds, *yds, *zds;
5939
5940     if (x == y)
5941         return bigsq(x);
5942
5943     xn = BIGNUM_LEN(x);
5944     yn = BIGNUM_LEN(y);
5945     zn = xn + yn;
5946
5947     z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
5948
5949     xds = BDIGITS(x);
5950     yds = BDIGITS(y);
5951     zds = BDIGITS(z);
5952
5953     bary_mul(zds, zn, xds, xn, yds, yn);
5954
5955     RB_GC_GUARD(x);
5956     RB_GC_GUARD(y);
5957     return z;
5958 }
5959
5960 VALUE
5961 rb_big_mul(VALUE x, VALUE y)
5962 {
5963     if (FIXNUM_P(y)) {
5964         y = rb_int2big(FIX2LONG(y));
5965     }
5966     else if (RB_BIGNUM_TYPE_P(y)) {
5967     }
5968     else if (RB_FLOAT_TYPE_P(y)) {
5969         return DBL2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y));
5970     }
5971     else {
5972         return rb_num_coerce_bin(x, y, '*');
5973     }
5974
5975     return bignorm(bigmul0(x, y));
5976 }
5977
5978 static VALUE
5979 bigdivrem(VALUE x, VALUE y, volatile VALUE *divp, volatile VALUE *modp)
5980 {
5981     long xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y);
5982     VALUE z;
5983     BDIGIT *xds, *yds, *zds;
5984     BDIGIT dd;
5985
5986     VALUE q = Qnil, r = Qnil;
5987     BDIGIT *qds, *rds;
5988     long qn, rn;
5989
5990     yds = BDIGITS(y);
5991     BARY_TRUNC(yds, yn);
5992     if (yn == 0)
5993         rb_num_zerodiv();
5994
5995     xds = BDIGITS(x);
5996     BARY_TRUNC(xds, xn);
5997
5998     if (xn < yn || (xn == yn && xds[xn - 1] < yds[yn - 1])) {
5999         if (divp) *divp = rb_int2big(0);
6000         if (modp) *modp = x;
6001         return Qnil;
6002     }
6003     if (yn == 1) {
6004         dd = yds[0];
6005         z = bignew(xn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
6006         zds = BDIGITS(z);
6007         dd = bigdivrem_single(zds, xds, xn, dd);
6008         if (modp) {
6009             *modp = rb_uint2big((uintptr_t)dd);
6010             BIGNUM_SET_SIGN(*modp, BIGNUM_SIGN(x));
6011         }
6012         if (divp) *divp = z;
6013         return Qnil;
6014     }
6015     if (xn == 2 && yn == 2) {
6016         BDIGIT_DBL x0 = bary2bdigitdbl(xds, 2);
6017         BDIGIT_DBL y0 = bary2bdigitdbl(yds, 2);
6018         BDIGIT_DBL q0 = x0 / y0;
6019         BDIGIT_DBL r0 = x0 % y0;
6020         if (divp) {
6021             z = bignew(bdigit_roomof(sizeof(BDIGIT_DBL)), BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
6022             zds = BDIGITS(z);
6023             zds[0] = BIGLO(q0);
6024             zds[1] = BIGLO(BIGDN(q0));
6025             *divp = z;
6026         }
6027         if (modp) {
6028             z = bignew(bdigit_roomof(sizeof(BDIGIT_DBL)), BIGNUM_SIGN(x));
6029             zds = BDIGITS(z);
6030             zds[0] = BIGLO(r0);
6031             zds[1] = BIGLO(BIGDN(r0));
6032             *modp = z;
6033         }
6034         return Qnil;
6035     }
6036
6037     if (divp) {
6038         qn = xn + BIGDIVREM_EXTRA_WORDS;
6039         q = bignew(qn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
6040         qds = BDIGITS(q);
6041     }
6042     else {
6043         qn = 0;
6044         qds = NULL;
6045     }
6046
6047     if (modp) {
6048         rn = yn;
6049         r = bignew(rn, BIGNUM_SIGN(x));
6050         rds = BDIGITS(r);
6051     }
6052     else {
6053         rn = 0;
6054         rds = NULL;
6055     }
6056
6057     bary_divmod_branch(qds, qn, rds, rn, xds, xn, yds, yn);
6058
6059     if (divp) {
6060         bigtrunc(q);
6061         *divp = q;
6062     }
6063     if (modp) {
6064         bigtrunc(r);
6065         *modp = r;
6066     }
6067
6068     return Qnil;
6069 }
6070
6071 static void
6072 bigdivmod(VALUE x, VALUE y, volatile VALUE *divp, volatile VALUE *modp)
6073 {
6074     VALUE mod;
6075
6076     bigdivrem(x, y, divp, &mod);
6077     if (BIGNUM_SIGN(x) != BIGNUM_SIGN(y) && !BIGZEROP(mod)) {
6078         if (divp) *divp = bigadd(*divp, rb_int2big(1), 0);
6079         if (modp) *modp = bigadd(mod, y, 1);
6080     }
6081     else if (modp) {
6082         *modp = mod;
6083     }
6084 }
6085
6086
6087 static VALUE
6088 rb_big_divide(VALUE x, VALUE y, ID op)
6089 {
6090     VALUE z;
6091
6092     if (FIXNUM_P(y)) {
6093         y = rb_int2big(FIX2LONG(y));
6094     }
6095     else if (RB_BIGNUM_TYPE_P(y)) {
6096     }
6097     else if (RB_FLOAT_TYPE_P(y)) {
6098         if (op == '/') {
6099             double dx = rb_big2dbl(x);
6100             return rb_flo_div_flo(DBL2NUM(dx), y);
6101         }
6102         else {
6103             VALUE v;
6104             double dy = RFLOAT_VALUE(y);
6105             if (dy == 0.0) rb_num_zerodiv();
6106             v = rb_big_divide(x, y, '/');
6107             return rb_dbl2big(RFLOAT_VALUE(v));
6108         }
6109     }
6110     else {
6111         return rb_num_coerce_bin(x, y, op);
6112     }
6113     bigdivmod(x, y, &z, 0);
6114
6115     return bignorm(z);
6116 }
6117
6118 VALUE
6119 rb_big_div(VALUE x, VALUE y)
6120 {
6121     return rb_big_divide(x, y, '/');
6122 }
6123
6124 VALUE
6125 rb_big_idiv(VALUE x, VALUE y)
6126 {
6127     return rb_big_divide(x, y, idDiv);
6128 }
6129
6130 VALUE
6131 rb_big_modulo(VALUE x, VALUE y)
6132 {
6133     VALUE z;
6134
6135     if (FIXNUM_P(y)) {
6136         y = rb_int2big(FIX2LONG(y));
6137     }
6138     else if (!RB_BIGNUM_TYPE_P(y)) {
6139         return rb_num_coerce_bin(x, y, '%');
6140     }
6141     bigdivmod(x, y, 0, &z);
6142
6143     return bignorm(z);
6144 }
6145
6146 VALUE
6147 rb_big_remainder(VALUE x, VALUE y)
6148 {
6149     VALUE z;
6150
6151     if (FIXNUM_P(y)) {
6152         y = rb_int2big(FIX2LONG(y));
6153     }
6154     else if (!RB_BIGNUM_TYPE_P(y)) {
6155         return rb_num_coerce_bin(x, y, rb_intern("remainder"));
6156     }
6157     bigdivrem(x, y, 0, &z);
6158
6159     return bignorm(z);
6160 }
6161
6162 VALUE
6163 rb_big_divmod(VALUE x, VALUE y)
6164 {
6165     VALUE div, mod;
6166
6167     if (FIXNUM_P(y)) {
6168         y = rb_int2big(FIX2LONG(y));
6169     }
6170     else if (!RB_BIGNUM_TYPE_P(y)) {
6171         return rb_num_coerce_bin(x, y, idDivmod);
6172     }
6173     bigdivmod(x, y, &div, &mod);
6174
6175     return rb_assoc_new(bignorm(div), bignorm(mod));
6176 }
6177
6178 static VALUE
6179 big_shift(VALUE x, long n)
6180 {
6181     if (n < 0)
6182         return big_lshift(x, 1+(unsigned long)(-(n+1)));
6183     else if (n > 0)
6184         return big_rshift(x, (unsigned long)n);
6185     return x;
6186 }
6187
6188 enum {DBL_BIGDIG = ((DBL_MANT_DIG + BITSPERDIG) / BITSPERDIG)};
6189
6190 static double
6191 big_fdiv(VALUE x, VALUE y, long ey)
6192 {
6193     VALUE z;
6194     long l, ex;
6195
6196     bigtrunc(x);
6197     l = BIGNUM_LEN(x);
6198     ex = l * BITSPERDIG - nlz(BDIGITS(x)[l-1]);
6199     ex -= 2 * DBL_BIGDIG * BITSPERDIG;
6200     if (ex > BITSPERDIG) ex -= BITSPERDIG;
6201     else if (ex > 0) ex = 0;
6202     if (ex) x = big_shift(x, ex);
6203
6204     bigdivrem(x, y, &z, 0);
6205     l = ex - ey;
6206 #if SIZEOF_LONG > SIZEOF_INT
6207     {
6208         /* Visual C++ can't be here */
6209         if (l > INT_MAX) return HUGE_VAL;
6210         if (l < INT_MIN) return 0.0;
6211     }
6212 #endif
6213     return ldexp(big2dbl(z), (int)l);
6214 }
6215
6216 static double
6217 big_fdiv_int(VALUE x, VALUE y)
6218 {
6219     long l, ey;
6220     bigtrunc(y);
6221     l = BIGNUM_LEN(y);
6222     ey = l * BITSPERDIG - nlz(BDIGITS(y)[l-1]);
6223     ey -= DBL_BIGDIG * BITSPERDIG;
6224     if (ey) y = big_shift(y, ey);
6225     return big_fdiv(x, y, ey);
6226 }
6227
6228 static double
6229 big_fdiv_float(VALUE x, VALUE y)
6230 {
6231     int i;
6232     y = dbl2big(ldexp(frexp(RFLOAT_VALUE(y), &i), DBL_MANT_DIG));
6233     return big_fdiv(x, y, i - DBL_MANT_DIG);
6234 }
6235
6236 double
6237 rb_big_fdiv_double(VALUE x, VALUE y)
6238 {
6239     double dx, dy;
6240     VALUE v;
6241
6242     dx = big2dbl(x);
6243     if (FIXNUM_P(y)) {
6244         dy = (double)FIX2LONG(y);
6245         if (isinf(dx))
6246             return big_fdiv_int(x, rb_int2big(FIX2LONG(y)));
6247     }
6248     else if (RB_BIGNUM_TYPE_P(y)) {
6249         return big_fdiv_int(x, y);
6250     }
6251     else if (RB_FLOAT_TYPE_P(y)) {
6252         dy = RFLOAT_VALUE(y);
6253         if (isnan(dy))
6254             return dy;
6255         if (isinf(dx))
6256             return big_fdiv_float(x, y);
6257     }
6258     else {
6259         return NUM2DBL(rb_num_coerce_bin(x, y, idFdiv));
6260     }
6261     v = rb_flo_div_flo(DBL2NUM(dx), DBL2NUM(dy));
6262     return NUM2DBL(v);
6263 }
6264
6265 VALUE
6266 rb_big_fdiv(VALUE x, VALUE y)
6267 {
6268     return DBL2NUM(rb_big_fdiv_double(x, y));
6269 }
6270
6271 VALUE
6272 rb_big_pow(VALUE x, VALUE y)
6273 {
6274     double d;
6275     SIGNED_VALUE yy;
6276
6277   again:
6278     if (y == INT2FIX(0)) return INT2FIX(1);
6279     if (y == INT2FIX(1)) return x;
6280     if (RB_FLOAT_TYPE_P(y)) {
6281         d = RFLOAT_VALUE(y);
6282         if ((BIGNUM_NEGATIVE_P(x) && !BIGZEROP(x))) {
6283             return rb_dbl_complex_new_polar_pi(pow(-rb_big2dbl(x), d), d);
6284         }
6285     }
6286     else if (RB_BIGNUM_TYPE_P(y)) {
6287         y = bignorm(y);
6288         if (FIXNUM_P(y))
6289             goto again;
6290         rb_warn("in a**b, b may be too big");
6291         d = rb_big2dbl(y);
6292     }
6293     else if (FIXNUM_P(y)) {
6294         yy = FIX2LONG(y);
6295
6296         if (yy < 0) {
6297             x = rb_big_pow(x, LONG2NUM(-yy));
6298             if (RB_INTEGER_TYPE_P(x))
6299                 return rb_rational_raw(INT2FIX(1), x);
6300             else
6301                 return DBL2NUM(1.0 / NUM2DBL(x));
6302         }
6303         else {
6304             VALUE z = 0;
6305             SIGNED_VALUE mask;
6306             const size_t xbits = rb_absint_numwords(x, 1, NULL);
6307             const size_t BIGLEN_LIMIT = 32*1024*1024;
6308
6309             if (xbits == (size_t)-1 ||
6310                 (xbits > BIGLEN_LIMIT) ||
6311                 (xbits * yy > BIGLEN_LIMIT)) {
6312                 rb_warn("in a**b, b may be too big");
6313                 d = (double)yy;
6314             }
6315             else {
6316                 for (mask = FIXNUM_MAX + 1; mask; mask >>= 1) {
6317                     if (z) z = bigsq(z);
6318                     if (yy & mask) {
6319                         z = z ? bigtrunc(bigmul0(z, x)) : x;
6320                     }
6321                 }
6322                 return bignorm(z);
6323             }
6324         }
6325     }
6326     else {
6327         return rb_num_coerce_bin(x, y, idPow);
6328     }
6329     return DBL2NUM(pow(rb_big2dbl(x), d));
6330 }
6331
6332 static VALUE
6333 bigand_int(VALUE x, long xn, BDIGIT hibitsx, long y)
6334 {
6335     VALUE z;
6336     BDIGIT *xds, *zds;
6337     long zn;
6338     long i;
6339     BDIGIT hibitsy;
6340
6341     if (y == 0) return INT2FIX(0);
6342     if (xn == 0) return hibitsx ? LONG2NUM(y) : 0;
6343     hibitsy = 0 <= y ? 0 : BDIGMAX;
6344     xds = BDIGITS(x);
6345 #if SIZEOF_BDIGIT >= SIZEOF_LONG
6346     if (!hibitsy) {
6347         y &= xds[0];
6348         return LONG2NUM(y);
6349     }
6350 #endif
6351
6352     zn = xn;
6353 #if SIZEOF_BDIGIT < SIZEOF_LONG
6354     if (hibitsx && zn < bdigit_roomof(SIZEOF_LONG))
6355         zn = bdigit_roomof(SIZEOF_LONG);
6356 #endif
6357
6358     z = bignew(zn, 0);
6359     zds = BDIGITS(z);
6360
6361 #if SIZEOF_BDIGIT >= SIZEOF_LONG
6362     i = 1;
6363     zds[0] = xds[0] & BIGLO(y);
6364 #else
6365     for (i=0; i < xn; i++) {
6366         if (y == 0 || y == -1) break;
6367         zds[i] = xds[i] & BIGLO(y);
6368         y = BIGDN(y);
6369     }
6370     for (; i < zn; i++) {
6371         if (y == 0 || y == -1) break;
6372         zds[i] = hibitsx & BIGLO(y);
6373         y = BIGDN(y);
6374     }
6375 #endif
6376     for (;i < xn; i++) {
6377         zds[i] = xds[i] & hibitsy;
6378     }
6379     for (;i < zn; i++) {
6380         zds[i] = hibitsx & hibitsy;
6381     }
6382     twocomp2abs_bang(z, hibitsx && hibitsy);
6383     RB_GC_GUARD(x);
6384     return bignorm(z);
6385 }
6386
6387 VALUE
6388 rb_big_and(VALUE x, VALUE y)
6389 {
6390     VALUE z;
6391     BDIGIT *ds1, *ds2, *zds;
6392     long i, xn, yn, n1, n2;
6393     BDIGIT hibitsx, hibitsy;
6394     BDIGIT hibits1, hibits2;
6395     VALUE tmpv;
6396     BDIGIT tmph;
6397     long tmpn;
6398
6399     if (!RB_INTEGER_TYPE_P(y)) {
6400         return rb_num_coerce_bit(x, y, '&');
6401     }
6402
6403     hibitsx = abs2twocomp(&x, &xn);
6404     if (FIXNUM_P(y)) {
6405         return bigand_int(x, xn, hibitsx, FIX2LONG(y));
6406     }
6407     hibitsy = abs2twocomp(&y, &yn);
6408     if (xn > yn) {
6409         tmpv = x; x = y; y = tmpv;
6410         tmpn = xn; xn = yn; yn = tmpn;
6411         tmph = hibitsx; hibitsx = hibitsy; hibitsy = tmph;
6412     }
6413     n1 = xn;
6414     n2 = yn;
6415     ds1 = BDIGITS(x);
6416     ds2 = BDIGITS(y);
6417     hibits1 = hibitsx;
6418     hibits2 = hibitsy;
6419
6420     if (!hibits1)
6421         n2 = n1;
6422
6423     z = bignew(n2, 0);
6424     zds = BDIGITS(z);
6425
6426     for (i=0; i<n1; i++) {
6427         zds[i] = ds1[i] & ds2[i];
6428     }
6429     for (; i<n2; i++) {
6430         zds[i] = hibits1 & ds2[i];
6431     }
6432     twocomp2abs_bang(z, hibits1 && hibits2);
6433     RB_GC_GUARD(x);
6434     RB_GC_GUARD(y);
6435     return bignorm(z);
6436 }
6437
6438 static VALUE
6439 bigor_int(VALUE x, long xn, BDIGIT hibitsx, long y)
6440 {
6441     VALUE z;
6442     BDIGIT *xds, *zds;
6443     long zn;
6444     long i;
6445     BDIGIT hibitsy;
6446
6447     if (y == -1) return INT2FIX(-1);
6448     if (xn == 0) return hibitsx ? INT2FIX(-1) : LONG2FIX(y);
6449     hibitsy = 0 <= y ? 0 : BDIGMAX;
6450     xds = BDIGITS(x);
6451
6452     zn = BIGNUM_LEN(x);
6453 #if SIZEOF_BDIGIT < SIZEOF_LONG
6454     if (zn < bdigit_roomof(SIZEOF_LONG))
6455         zn = bdigit_roomof(SIZEOF_LONG);
6456 #endif
6457     z = bignew(zn, 0);
6458     zds = BDIGITS(z);
6459
6460 #if SIZEOF_BDIGIT >= SIZEOF_LONG
6461     i = 1;
6462     zds[0] = xds[0] | BIGLO(y);
6463     if (i < zn)
6464         goto y_is_fixed_point;
6465     goto finish;
6466 #else
6467     for (i=0; i < xn; i++) {
6468         if (y == 0 || y == -1) goto y_is_fixed_point;
6469         zds[i] = xds[i] | BIGLO(y);
6470         y = BIGDN(y);
6471     }
6472     if (hibitsx)
6473         goto fill_hibits;
6474     for (; i < zn; i++) {
6475         if (y == 0 || y == -1) goto y_is_fixed_point;
6476         zds[i] = BIGLO(y);
6477         y = BIGDN(y);
6478     }
6479   goto finish;
6480 #endif
6481
6482   y_is_fixed_point:
6483     if (hibitsy)
6484         goto fill_hibits;
6485     for (; i < xn; i++) {
6486         zds[i] = xds[i];
6487     }
6488     if (hibitsx)
6489         goto fill_hibits;
6490     for (; i < zn; i++) {
6491         zds[i] = 0;
6492     }
6493   goto finish;
6494
6495   fill_hibits:
6496     for (; i < zn; i++) {
6497         zds[i] = BDIGMAX;
6498     }
6499
6500   finish:
6501     twocomp2abs_bang(z, hibitsx || hibitsy);
6502     RB_GC_GUARD(x);
6503     return bignorm(z);
6504 }
6505
6506 VALUE
6507 rb_big_or(VALUE x, VALUE y)
6508 {
6509     VALUE z;
6510     BDIGIT *ds1, *ds2, *zds;
6511     long i, xn, yn, n1, n2;
6512     BDIGIT hibitsx, hibitsy;
6513     BDIGIT hibits1, hibits2;
6514     VALUE tmpv;
6515     BDIGIT tmph;
6516     long tmpn;
6517
6518     if (!RB_INTEGER_TYPE_P(y)) {
6519         return rb_num_coerce_bit(x, y, '|');
6520     }
6521
6522     hibitsx = abs2twocomp(&x, &xn);
6523     if (FIXNUM_P(y)) {
6524         return bigor_int(x, xn, hibitsx, FIX2LONG(y));
6525     }
6526     hibitsy = abs2twocomp(&y, &yn);
6527     if (xn > yn) {
6528         tmpv = x; x = y; y = tmpv;
6529         tmpn = xn; xn = yn; yn = tmpn;
6530         tmph = hibitsx; hibitsx = hibitsy; hibitsy = tmph;
6531     }
6532     n1 = xn;
6533     n2 = yn;
6534     ds1 = BDIGITS(x);
6535     ds2 = BDIGITS(y);
6536     hibits1 = hibitsx;
6537     hibits2 = hibitsy;
6538
6539     if (hibits1)
6540         n2 = n1;
6541
6542     z = bignew(n2, 0);
6543     zds = BDIGITS(z);
6544
6545     for (i=0; i<n1; i++) {
6546         zds[i] = ds1[i] | ds2[i];
6547     }
6548     for (; i<n2; i++) {
6549         zds[i] = hibits1 | ds2[i];
6550     }
6551     twocomp2abs_bang(z, hibits1 || hibits2);
6552     RB_GC_GUARD(x);
6553     RB_GC_GUARD(y);
6554     return bignorm(z);
6555 }
6556
6557 static VALUE
6558 bigxor_int(VALUE x, long xn, BDIGIT hibitsx, long y)
6559 {
6560     VALUE z;
6561     BDIGIT *xds, *zds;
6562     long zn;
6563     long i;
6564     BDIGIT hibitsy;
6565
6566     hibitsy = 0 <= y ? 0 : BDIGMAX;
6567     xds = BDIGITS(x);
6568     zn = BIGNUM_LEN(x);
6569 #if SIZEOF_BDIGIT < SIZEOF_LONG
6570     if (zn < bdigit_roomof(SIZEOF_LONG))
6571         zn = bdigit_roomof(SIZEOF_LONG);
6572 #endif
6573     z = bignew(zn, 0);
6574     zds = BDIGITS(z);
6575
6576 #if SIZEOF_BDIGIT >= SIZEOF_LONG
6577     i = 1;
6578     zds[0] = xds[0] ^ BIGLO(y);
6579 #else
6580     for (i = 0; i < xn; i++) {
6581         zds[i] = xds[i] ^ BIGLO(y);
6582         y = BIGDN(y);
6583     }
6584     for (; i < zn; i++) {
6585         zds[i] = hibitsx ^ BIGLO(y);
6586         y = BIGDN(y);
6587     }
6588 #endif
6589     for (; i < xn; i++) {
6590         zds[i] = xds[i] ^ hibitsy;
6591     }
6592     for (; i < zn; i++) {
6593         zds[i] = hibitsx ^ hibitsy;
6594     }
6595     twocomp2abs_bang(z, (hibitsx ^ hibitsy) != 0);
6596     RB_GC_GUARD(x);
6597     return bignorm(z);
6598 }
6599
6600 VALUE
6601 rb_big_xor(VALUE x, VALUE y)
6602 {
6603     VALUE z;
6604     BDIGIT *ds1, *ds2, *zds;
6605     long i, xn, yn, n1, n2;
6606     BDIGIT hibitsx, hibitsy;
6607     BDIGIT hibits1, hibits2;
6608     VALUE tmpv;
6609     BDIGIT tmph;
6610     long tmpn;
6611
6612     if (!RB_INTEGER_TYPE_P(y)) {
6613         return rb_num_coerce_bit(x, y, '^');
6614     }
6615
6616     hibitsx = abs2twocomp(&x, &xn);
6617     if (FIXNUM_P(y)) {
6618         return bigxor_int(x, xn, hibitsx, FIX2LONG(y));
6619     }
6620     hibitsy = abs2twocomp(&y, &yn);
6621     if (xn > yn) {
6622         tmpv = x; x = y; y = tmpv;
6623         tmpn = xn; xn = yn; yn = tmpn;
6624         tmph = hibitsx; hibitsx = hibitsy; hibitsy = tmph;
6625     }
6626     n1 = xn;
6627     n2 = yn;
6628     ds1 = BDIGITS(x);
6629     ds2 = BDIGITS(y);
6630     hibits1 = hibitsx;
6631     hibits2 = hibitsy;
6632
6633     z = bignew(n2, 0);
6634     zds = BDIGITS(z);
6635
6636     for (i=0; i<n1; i++) {
6637         zds[i] = ds1[i] ^ ds2[i];
6638     }
6639     for (; i<n2; i++) {
6640         zds[i] = hibitsx ^ ds2[i];
6641     }
6642     twocomp2abs_bang(z, (hibits1 ^ hibits2) != 0);
6643     RB_GC_GUARD(x);
6644     RB_GC_GUARD(y);
6645     return bignorm(z);
6646 }
6647
6648 VALUE
6649 rb_big_lshift(VALUE x, VALUE y)
6650 {
6651     int lshift_p;
6652     size_t shift_numdigits;
6653     int shift_numbits;
6654
6655     for (;;) {
6656         if (FIXNUM_P(y)) {
6657             long l = FIX2LONG(y);
6658             unsigned long shift;
6659             if (0 <= l) {
6660                 lshift_p = 1;
6661                 shift = l;
6662             }
6663             else {
6664                 lshift_p = 0;
6665                 shift = 1+(unsigned long)(-(l+1));
6666             }
6667             shift_numbits = (int)(shift & (BITSPERDIG-1));
6668             shift_numdigits = shift >> bit_length(BITSPERDIG-1);
6669             return bignorm(big_shift3(x, lshift_p, shift_numdigits, shift_numbits));
6670         }
6671         else if (RB_BIGNUM_TYPE_P(y)) {
6672             return bignorm(big_shift2(x, 1, y));
6673         }
6674         y = rb_to_int(y);
6675     }
6676 }
6677
6678 VALUE
6679 rb_big_rshift(VALUE x, VALUE y)
6680 {
6681     int lshift_p;
6682     size_t shift_numdigits;
6683     int shift_numbits;
6684
6685     for (;;) {
6686         if (FIXNUM_P(y)) {
6687             long l = FIX2LONG(y);
6688             unsigned long shift;
6689             if (0 <= l) {
6690                 lshift_p = 0;
6691                 shift = l;
6692             }
6693             else {
6694                 lshift_p = 1;
6695                 shift = 1+(unsigned long)(-(l+1));
6696             }
6697             shift_numbits = (int)(shift & (BITSPERDIG-1));
6698             shift_numdigits = shift >> bit_length(BITSPERDIG-1);
6699             return bignorm(big_shift3(x, lshift_p, shift_numdigits, shift_numbits));
6700         }
6701         else if (RB_BIGNUM_TYPE_P(y)) {
6702             return bignorm(big_shift2(x, 0, y));
6703         }
6704         y = rb_to_int(y);
6705     }
6706 }
6707
6708 VALUE
6709 rb_big_aref(VALUE x, VALUE y)
6710 {
6711     BDIGIT *xds;
6712     size_t shift;
6713     size_t i, s1, s2;
6714     long l;
6715     BDIGIT bit;
6716
6717     if (RB_BIGNUM_TYPE_P(y)) {
6718         if (BIGNUM_NEGATIVE_P(y))
6719             return INT2FIX(0);
6720         bigtrunc(y);
6721         if (BIGSIZE(y) > sizeof(size_t)) {
6722             return BIGNUM_SIGN(x) ? INT2FIX(0) : INT2FIX(1);
6723         }
6724 #if SIZEOF_SIZE_T <= SIZEOF_LONG
6725         shift = big2ulong(y, "long");
6726 #else
6727         shift = big2ull(y, "long long");
6728 #endif
6729     }
6730     else {
6731         l = NUM2LONG(y);
6732         if (l < 0) return INT2FIX(0);
6733         shift = (size_t)l;
6734     }
6735     s1 = shift/BITSPERDIG;
6736     s2 = shift%BITSPERDIG;
6737     bit = (BDIGIT)1 << s2;
6738
6739     if (s1 >= BIGNUM_LEN(x))
6740         return BIGNUM_SIGN(x) ? INT2FIX(0) : INT2FIX(1);
6741
6742     xds = BDIGITS(x);
6743     if (BIGNUM_POSITIVE_P(x))
6744         return (xds[s1] & bit) ? INT2FIX(1) : INT2FIX(0);
6745     if (xds[s1] & (bit-1))
6746         return (xds[s1] & bit) ? INT2FIX(0) : INT2FIX(1);
6747     for (i = 0; i < s1; i++)
6748         if (xds[i])
6749             return (xds[s1] & bit) ? INT2FIX(0) : INT2FIX(1);
6750     return (xds[s1] & bit) ? INT2FIX(1) : INT2FIX(0);
6751 }
6752
6753 VALUE
6754 rb_big_hash(VALUE x)
6755 {
6756     st_index_t hash;
6757
6758     hash = rb_memhash(BDIGITS(x), sizeof(BDIGIT)*BIGNUM_LEN(x)) ^ BIGNUM_SIGN(x);
6759     return ST2FIX(hash);
6760 }
6761
6762 /*
6763  * call-seq:
6764  *   int.coerce(numeric)  ->  array
6765  *
6766  * Returns an array with both a +numeric+ and a +int+ represented as
6767  * Integer objects or Float objects.
6768  *
6769  * This is achieved by converting +numeric+ to an Integer or a Float.
6770  *
6771  * A TypeError is raised if the +numeric+ is not an Integer or a Float
6772  * type.
6773  *
6774  *     (0x3FFFFFFFFFFFFFFF+1).coerce(42)   #=> [42, 4611686018427387904]
6775  */
6776
6777 static VALUE
6778 rb_int_coerce(VALUE x, VALUE y)
6779 {
6780     if (RB_INTEGER_TYPE_P(y)) {
6781         return rb_assoc_new(y, x);
6782     }
6783     else {
6784         x = rb_Float(x);
6785         y = rb_Float(y);
6786         return rb_assoc_new(y, x);
6787     }
6788 }
6789
6790 VALUE
6791 rb_big_abs(VALUE x)
6792 {
6793     if (BIGNUM_NEGATIVE_P(x)) {
6794         x = rb_big_clone(x);
6795         BIGNUM_SET_POSITIVE_SIGN(x);
6796     }
6797     return x;
6798 }
6799
6800 int
6801 rb_big_sign(VALUE x)
6802 {
6803     return BIGNUM_SIGN(x);
6804 }
6805
6806 size_t
6807 rb_big_size(VALUE big)
6808 {
6809     return BIGSIZE(big);
6810 }
6811
6812 VALUE
6813 rb_big_size_m(VALUE big)
6814 {
6815     return SIZET2NUM(rb_big_size(big));
6816 }
6817
6818 VALUE
6819 rb_big_bit_length(VALUE big)
6820 {
6821     int nlz_bits;
6822     size_t numbytes;
6823
6824     static const BDIGIT char_bit[1] = { CHAR_BIT };
6825     BDIGIT numbytes_bary[bdigit_roomof(sizeof(size_t))];
6826     BDIGIT nlz_bary[1];
6827     BDIGIT result_bary[bdigit_roomof(sizeof(size_t)+1)];
6828
6829     numbytes = rb_absint_size(big, &nlz_bits);
6830
6831     if (numbytes == 0)
6832         return LONG2FIX(0);
6833
6834     if (BIGNUM_NEGATIVE_P(big) && rb_absint_singlebit_p(big)) {
6835         if (nlz_bits != CHAR_BIT-1) {
6836             nlz_bits++;
6837         }
6838         else {
6839             nlz_bits = 0;
6840             numbytes--;
6841         }
6842     }
6843
6844     if (numbytes <= SIZE_MAX / CHAR_BIT) {
6845         return SIZET2NUM(numbytes * CHAR_BIT - nlz_bits);
6846     }
6847
6848     nlz_bary[0] = nlz_bits;
6849
6850     bary_unpack(BARY_ARGS(numbytes_bary), &numbytes, 1, sizeof(numbytes), 0,
6851             INTEGER_PACK_NATIVE_BYTE_ORDER);
6852     BARY_SHORT_MUL(result_bary, numbytes_bary, char_bit);
6853     BARY_SUB(result_bary, result_bary, nlz_bary);
6854
6855     return rb_integer_unpack(result_bary, numberof(result_bary), sizeof(BDIGIT), 0,
6856             INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER);
6857 }
6858
6859 VALUE
6860 rb_big_odd_p(VALUE num)
6861 {
6862     return RBOOL(BIGNUM_LEN(num) != 0 && BDIGITS(num)[0] & 1);
6863 }
6864
6865 VALUE
6866 rb_big_even_p(VALUE num)
6867 {
6868     if (BIGNUM_LEN(num) != 0 && BDIGITS(num)[0] & 1) {
6869         return Qfalse;
6870     }
6871     return Qtrue;
6872 }
6873
6874 unsigned long rb_ulong_isqrt(unsigned long);
6875 #if SIZEOF_BDIGIT*2 > SIZEOF_LONG
6876 BDIGIT rb_bdigit_dbl_isqrt(BDIGIT_DBL);
6877 # ifdef ULL_TO_DOUBLE
6878 #   define BDIGIT_DBL_TO_DOUBLE(n) ULL_TO_DOUBLE(n)
6879 # endif
6880 #else
6881 # define rb_bdigit_dbl_isqrt(x) (BDIGIT)rb_ulong_isqrt(x)
6882 #endif
6883 #ifndef BDIGIT_DBL_TO_DOUBLE
6884 # define BDIGIT_DBL_TO_DOUBLE(n) (double)(n)
6885 #endif
6886
6887 VALUE
6888 rb_big_isqrt(VALUE n)
6889 {
6890     BDIGIT *nds = BDIGITS(n);
6891     size_t len = BIGNUM_LEN(n);
6892
6893     if (len <= 2) {
6894         BDIGIT sq = rb_bdigit_dbl_isqrt(bary2bdigitdbl(nds, len));
6895 #if SIZEOF_BDIGIT > SIZEOF_LONG
6896         return ULL2NUM(sq);
6897 #else
6898         return ULONG2NUM(sq);
6899 #endif
6900     }
6901     else {
6902         size_t shift = FIX2LONG(rb_big_bit_length(n)) / 4;
6903         VALUE n2 = rb_int_rshift(n, SIZET2NUM(2 * shift));
6904         VALUE x = FIXNUM_P(n2) ? LONG2FIX(rb_ulong_isqrt(FIX2ULONG(n2))) : rb_big_isqrt(n2);
6905         /* x = (x+n/x)/2 */
6906         x = rb_int_plus(rb_int_lshift(x, SIZET2NUM(shift - 1)), rb_int_idiv(rb_int_rshift(n, SIZET2NUM(shift + 1)), x));
6907         VALUE xx = rb_int_mul(x, x);
6908         while (rb_int_gt(xx, n)) {
6909             xx = rb_int_minus(xx, rb_int_minus(rb_int_plus(x, x), INT2FIX(1)));
6910             x = rb_int_minus(x, INT2FIX(1));
6911         }
6912         return x;
6913     }
6914 }
6915
6916 #if USE_GMP
6917 static void
6918 bary_powm_gmp(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, const BDIGIT *mds, size_t mn)
6919 {
6920     mpz_t z, x, y, m;
6921     size_t count;
6922     mpz_init(x);
6923     mpz_init(y);
6924     mpz_init(m);
6925     mpz_init(z);
6926     bdigits_to_mpz(x, xds, xn);
6927     bdigits_to_mpz(y, yds, yn);
6928     bdigits_to_mpz(m, mds, mn);
6929     mpz_powm(z, x, y, m);
6930     bdigits_from_mpz(z, zds, &count);
6931     BDIGITS_ZERO(zds+count, zn-count);
6932     mpz_clear(x);
6933     mpz_clear(y);
6934     mpz_clear(m);
6935     mpz_clear(z);
6936 }
6937 #endif
6938
6939 static VALUE
6940 int_pow_tmp3(VALUE x, VALUE y, VALUE m, int nega_flg)
6941 {
6942 #if USE_GMP
6943     VALUE z;
6944     size_t xn, yn, mn, zn;
6945
6946     if (FIXNUM_P(x)) {
6947        x = rb_int2big(FIX2LONG(x));
6948     }
6949     if (FIXNUM_P(y)) {
6950        y = rb_int2big(FIX2LONG(y));
6951     }
6952     RUBY_ASSERT(RB_BIGNUM_TYPE_P(m));
6953     xn = BIGNUM_LEN(x);
6954     yn = BIGNUM_LEN(y);
6955     mn = BIGNUM_LEN(m);
6956     zn = mn;
6957     z = bignew(zn, 1);
6958     bary_powm_gmp(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn, BDIGITS(m), mn);
6959     if (nega_flg & BIGNUM_POSITIVE_P(z)) {
6960         z = rb_big_minus(z, m);
6961     }
6962     RB_GC_GUARD(x);
6963     RB_GC_GUARD(y);
6964     RB_GC_GUARD(m);
6965     return rb_big_norm(z);
6966 #else
6967     VALUE tmp = LONG2FIX(1L);
6968     long yy;
6969
6970     for (/*NOP*/; ! FIXNUM_P(y); y = rb_big_rshift(y, LONG2FIX(1L))) {
6971         if (RTEST(rb_int_odd_p(y))) {
6972             tmp = rb_int_mul(tmp, x);
6973             tmp = rb_int_modulo(tmp, m);
6974         }
6975         x = rb_int_mul(x, x);
6976         x = rb_int_modulo(x, m);
6977     }
6978     for (yy = FIX2LONG(y); yy; yy >>= 1L) {
6979         if (yy & 1L) {
6980             tmp = rb_int_mul(tmp, x);
6981             tmp = rb_int_modulo(tmp, m);
6982         }
6983         x = rb_int_mul(x, x);
6984         x = rb_int_modulo(x, m);
6985     }
6986
6987     if (nega_flg && rb_int_positive_p(tmp)) {
6988         tmp = rb_int_minus(tmp, m);
6989     }
6990     return tmp;
6991 #endif
6992 }
6993
6994 /*
6995  * Integer#pow
6996  */
6997
6998 static VALUE
6999 int_pow_tmp1(VALUE x, VALUE y, long mm, int nega_flg)
7000 {
7001     long xx = FIX2LONG(x);
7002     long tmp = 1L;
7003     long yy;
7004
7005     for (/*NOP*/; ! FIXNUM_P(y); y = rb_big_rshift(y, LONG2FIX(1L))) {
7006         if (RTEST(rb_int_odd_p(y))) {
7007             tmp = (tmp * xx) % mm;
7008         }
7009         xx = (xx * xx) % mm;
7010     }
7011     for (yy = FIX2LONG(y); yy; yy >>= 1L) {
7012         if (yy & 1L) {
7013             tmp = (tmp * xx) % mm;
7014         }
7015         xx = (xx * xx) % mm;
7016     }
7017
7018     if (nega_flg && tmp) {
7019         tmp -= mm;
7020     }
7021     return LONG2FIX(tmp);
7022 }
7023
7024 static VALUE
7025 int_pow_tmp2(VALUE x, VALUE y, long mm, int nega_flg)
7026 {
7027     long tmp = 1L;
7028     long yy;
7029 #ifdef DLONG
7030     const DLONG m = mm;
7031     long tmp2 = tmp;
7032     long xx = FIX2LONG(x);
7033 # define MUL_MODULO(a, b, c) (long)(((DLONG)(a) * (DLONG)(b)) % (c))
7034 #else
7035     const VALUE m = LONG2FIX(mm);
7036     VALUE tmp2 = LONG2FIX(tmp);
7037     VALUE xx = x;
7038 # define MUL_MODULO(a, b, c) rb_int_modulo(rb_fix_mul_fix((a), (b)), (c))
7039 #endif
7040
7041     for (/*NOP*/; ! FIXNUM_P(y); y = rb_big_rshift(y, LONG2FIX(1L))) {
7042         if (RTEST(rb_int_odd_p(y))) {
7043             tmp2 = MUL_MODULO(tmp2, xx, m);
7044         }
7045         xx = MUL_MODULO(xx, xx, m);
7046     }
7047     for (yy = FIX2LONG(y); yy; yy >>= 1L) {
7048         if (yy & 1L) {
7049             tmp2 = MUL_MODULO(tmp2, xx, m);
7050         }
7051         xx = MUL_MODULO(xx, xx, m);
7052     }
7053
7054 #ifdef DLONG
7055     tmp = tmp2;
7056 #else
7057     tmp = FIX2LONG(tmp2);
7058 #endif
7059     if (nega_flg && tmp) {
7060         tmp -= mm;
7061     }
7062     return LONG2FIX(tmp);
7063 }
7064
7065 /*
7066  * Document-method: Integer#pow
7067  * call-seq:
7068  *    integer.pow(numeric)           ->  numeric
7069  *    integer.pow(integer, integer)  ->  integer
7070  *
7071  * Returns (modular) exponentiation as:
7072  *
7073  *   a.pow(b)     #=> same as a**b
7074  *   a.pow(b, m)  #=> same as (a**b) % m, but avoids huge temporary values
7075  */
7076 VALUE
7077 rb_int_powm(int const argc, VALUE * const argv, VALUE const num)
7078 {
7079     rb_check_arity(argc, 1, 2);
7080
7081     if (argc == 1) {
7082         return rb_int_pow(num, argv[0]);
7083     }
7084     else {
7085         VALUE const a = num;
7086         VALUE const b = argv[0];
7087         VALUE m = argv[1];
7088         int nega_flg = 0;
7089         if ( ! RB_INTEGER_TYPE_P(b)) {
7090             rb_raise(rb_eTypeError, "Integer#pow() 2nd argument not allowed unless a 1st argument is integer");
7091         }
7092         if (rb_int_negative_p(b)) {
7093             rb_raise(rb_eRangeError, "Integer#pow() 1st argument cannot be negative when 2nd argument specified");
7094         }
7095         if (!RB_INTEGER_TYPE_P(m)) {
7096             rb_raise(rb_eTypeError, "Integer#pow() 2nd argument not allowed unless all arguments are integers");
7097         }
7098
7099         if (rb_int_negative_p(m)) {
7100             m = rb_int_uminus(m);
7101             nega_flg = 1;
7102         }
7103
7104         if (FIXNUM_P(m)) {
7105             long const half_val = (long)HALF_LONG_MSB;
7106             long const mm = FIX2LONG(m);
7107             if (!mm) rb_num_zerodiv();
7108             if (mm == 1) return INT2FIX(0);
7109             if (mm <= half_val) {
7110                 return int_pow_tmp1(rb_int_modulo(a, m), b, mm, nega_flg);
7111             }
7112             else {
7113                 return int_pow_tmp2(rb_int_modulo(a, m), b, mm, nega_flg);
7114             }
7115         }
7116         else {
7117             if (rb_bigzero_p(m)) rb_num_zerodiv();
7118             if (bignorm(m) == INT2FIX(1)) return INT2FIX(0);
7119             return int_pow_tmp3(rb_int_modulo(a, m), b, m, nega_flg);
7120         }
7121     }
7122     UNREACHABLE_RETURN(Qnil);
7123 }
7124
7125 /*
7126  *  Bignum objects hold integers outside the range of
7127  *  Fixnum. Bignum objects are created
7128  *  automatically when integer calculations would otherwise overflow a
7129  *  Fixnum. When a calculation involving
7130  *  Bignum objects returns a result that will fit in a
7131  *  Fixnum, the result is automatically converted.
7132  *
7133  *  For the purposes of the bitwise operations and <code>[]</code>, a
7134  *  Bignum is treated as if it were an infinite-length
7135  *  bitstring with 2's complement representation.
7136  *
7137  *  While Fixnum values are immediate, Bignum
7138  *  objects are not---assignment and parameter passing work with
7139  *  references to objects, not the objects themselves.
7140  *
7141  */
7142
7143 void
7144 Init_Bignum(void)
7145 {
7146     rb_define_method(rb_cInteger, "coerce", rb_int_coerce, 1);
7147
7148 #if USE_GMP
7149     /* The version of loaded GMP. */
7150     rb_define_const(rb_cInteger, "GMP_VERSION", rb_sprintf("GMP %s", gmp_version));
7151 #endif
7152
7153     power_cache_init();
7154 }