source/libs/gmp/gmp-src/mpz/lucnum_ui.c

   1 /* mpz_lucnum_ui -- calculate Lucas number.
   2
   3 Copyright 2001, 2003, 2005, 2011, 2012 Free Software Foundation, Inc.
   4
   5 This file is part of the GNU MP Library.
   6
   7 The GNU MP Library is free software; you can redistribute it and/or modify
   8 it under the terms of either:
   9
  10   * the GNU Lesser General Public License as published by the Free
  11     Software Foundation; either version 3 of the License, or (at your
  12     option) any later version.
  13
  14 or
  15
  16   * the GNU General Public License as published by the Free Software
  17     Foundation; either version 2 of the License, or (at your option) any
  18     later version.
  19
  20 or both in parallel, as here.
  21
  22 The GNU MP Library is distributed in the hope that it will be useful, but
  23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  25 for more details.
  26
  27 You should have received copies of the GNU General Public License and the
  28 GNU Lesser General Public License along with the GNU MP Library.  If not,
  29 see https://www.gnu.org/licenses/.  */
  30
  31 #include <stdio.h>
  32 #include "gmp.h"
  33 #include "gmp-impl.h"
  34
  35
  36 /* change this to "#define TRACE(x) x" for diagnostics */
  37 #define TRACE(x)
  38
  39
  40 /* Notes:
  41
  42    For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so
  43    there can't be an overflow applying +4 to just the low limb (since that
  44    would leave 0, 1, 2 or 3 mod 8).
  45
  46    For the -4 in L[2k+1] when k is even, it seems (no proof) that
  47    L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb
  48    L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the
  49    low limb.  Obviously L[0xBFFFFFFD] is a huge number, but it's at least
  50    conceivable to calculate it, so it probably should be handled.
  51
  52    For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod
  53    2^b, so for instance in 32-bits L[0x80000000] has a low limb of
  54    0xFFFFFFFF so there would have been a borrow.  Again L[0x80000000] is
  55    obviously huge, but probably should be made to work.  */
  56
  57 void
  58 mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
  59 {
  60   mp_size_t  lalloc, xalloc, lsize, xsize;
  61   mp_ptr     lp, xp;
  62   mp_limb_t  c;
  63   int        zeros;
  64   TMP_DECL;
  65
  66   TRACE (printf ("mpn_lucnum_ui n=%lu\n", n));
  67
  68   if (n <= FIB_TABLE_LUCNUM_LIMIT)
  69     {
  70       /* L[n] = F[n] + 2F[n-1] */
  71       PTR(ln)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1);
  72       SIZ(ln) = 1;
  73       return;
  74     }
  75
  76   /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1
  77      since square or mul used below might need an extra limb over the true
  78      size */
  79   lalloc = MPN_FIB2_SIZE (n) + 2;
  80   lp = MPZ_REALLOC (ln, lalloc);
  81
  82   TMP_MARK;
  83   xalloc = lalloc;
  84   xp = TMP_ALLOC_LIMBS (xalloc);
  85
  86   /* Strip trailing zeros from n, until either an odd number is reached
  87      where the L[2k+1] formula can be used, or until n fits within the
  88      FIB_TABLE data.  The table is preferred of course.  */
  89   zeros = 0;
  90   for (;;)
  91     {
  92       if (n & 1)
  93         {
  94           /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
  95
  96           mp_size_t  yalloc, ysize;
  97           mp_ptr     yp;
  98
  99           TRACE (printf ("  initial odd n=%lu\n", n));
 100
 101           yalloc = MPN_FIB2_SIZE (n/2);
 102           yp = TMP_ALLOC_LIMBS (yalloc);
 103           ASSERT (xalloc >= yalloc);
 104
 105           xsize = mpn_fib2_ui (xp, yp, n/2);
 106
 107           /* possible high zero on F[k-1] */
 108           ysize = xsize;
 109           ysize -= (yp[ysize-1] == 0);
 110           ASSERT (yp[ysize-1] != 0);
 111
 112           /* xp = 2*F[k] + F[k-1] */
 113 #if HAVE_NATIVE_mpn_addlsh1_n
 114           c = mpn_addlsh1_n (xp, yp, xp, xsize);
 115 #else
 116           c = mpn_lshift (xp, xp, xsize, 1);
 117           c += mpn_add_n (xp, xp, yp, xsize);
 118 #endif
 119           ASSERT (xalloc >= xsize+1);
 120           xp[xsize] = c;
 121           xsize += (c != 0);
 122           ASSERT (xp[xsize-1] != 0);
 123
 124           ASSERT (lalloc >= xsize + ysize);
 125           c = mpn_mul (lp, xp, xsize, yp, ysize);
 126           lsize = xsize + ysize;
 127           lsize -= (c == 0);
 128
 129           /* lp = 5*lp */
 130 #if HAVE_NATIVE_mpn_addlsh2_n
 131           c = mpn_addlsh2_n (lp, lp, lp, lsize);
 132 #else
 133           /* FIXME: Is this faster than mpn_mul_1 ? */
 134           c = mpn_lshift (xp, lp, lsize, 2);
 135           c += mpn_add_n (lp, lp, xp, lsize);
 136 #endif
 137           ASSERT (lalloc >= lsize+1);
 138           lp[lsize] = c;
 139           lsize += (c != 0);
 140
 141           /* lp = lp - 4*(-1)^k */
 142           if (n & 2)
 143             {
 144               /* no overflow, see comments above */
 145               ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
 146               lp[0] += 4;
 147             }
 148           else
 149             {
 150               /* won't go negative */
 151               MPN_DECR_U (lp, lsize, CNST_LIMB(4));
 152             }
 153
 154           TRACE (mpn_trace ("  l",lp, lsize));
 155           break;
 156         }
 157
 158       MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
 159       zeros++;
 160       n /= 2;
 161
 162       if (n <= FIB_TABLE_LUCNUM_LIMIT)
 163         {
 164           /* L[n] = F[n] + 2F[n-1] */
 165           lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
 166           lsize = 1;
 167
 168           TRACE (printf ("  initial small n=%lu\n", n);
 169                  mpn_trace ("  l",lp, lsize));
 170           break;
 171         }
 172     }
 173
 174   for ( ; zeros != 0; zeros--)
 175     {
 176       /* L[2k] = L[k]^2 + 2*(-1)^k */
 177
 178       TRACE (printf ("  zeros=%d\n", zeros));
 179
 180       ASSERT (xalloc >= 2*lsize);
 181       mpn_sqr (xp, lp, lsize);
 182       lsize *= 2;
 183       lsize -= (xp[lsize-1] == 0);
 184
 185       /* First time around the loop k==n determines (-1)^k, after that k is
 186          always even and we set n=0 to indicate that.  */
 187       if (n & 1)
 188         {
 189           /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
 190           ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
 191           xp[0] += 2;
 192           n = 0;
 193         }
 194       else
 195         {
 196           /* won't go negative */
 197           MPN_DECR_U (xp, lsize, CNST_LIMB(2));
 198         }
 199
 200       MP_PTR_SWAP (xp, lp);
 201       ASSERT (lp[lsize-1] != 0);
 202     }
 203
 204   /* should end up in the right spot after all the xp/lp swaps */
 205   ASSERT (lp == PTR(ln));
 206   SIZ(ln) = lsize;
 207
 208   TMP_FREE;
 209 }