dma: rework config parsing
[dragonfly.git] / contrib / gmp / mpn / generic / dive_1.c
blob27df57b80e9c257f15bbc83b19f984844f94f549
1 /* mpn_divexact_1 -- mpn by limb exact division.
3 THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
4 CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5 FUTURE GNU MP RELEASES.
7 Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
24 #include "gmp.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
30 /* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
31 q will only be correct if d divides a exactly.
33 A separate loop is used for shift==0 because n<<BITS_PER_MP_LIMB doesn't
34 give zero on all CPUs (for instance it doesn't on the x86s). This
35 separate loop might run faster too, helping odd divisors.
37 Possibilities:
39 mpn_divexact_1c could be created, accepting and returning c. This would
40 let a long calculation be done piece by piece. Currently there's no
41 particular need for that, and not returning c means that a final umul can
42 be skipped.
44 Another use for returning c would be letting the caller know whether the
45 division was in fact exact. It would work just to return the carry bit
46 "c=(l>s)" and let the caller do a final umul if interested.
48 When the divisor is even, the factors of two could be handled with a
49 separate mpn_rshift, instead of shifting on the fly. That might be
50 faster on some CPUs and would mean just the shift==0 style loop would be
51 needed.
53 If n<<BITS_PER_MP_LIMB gives zero on a particular CPU then the separate
54 shift==0 loop is unnecessary, and could be eliminated if there's no great
55 speed difference.
57 It's not clear whether "/" is the best way to handle size==1. Alpha gcc
58 2.95 for instance has a poor "/" and might prefer the modular method.
59 Perhaps a tuned parameter should control this.
61 If src[size-1] < divisor then dst[size-1] will be zero, and one divide
62 step could be skipped. A test at last step for s<divisor (or ls in the
63 even case) might be a good way to do that. But if this code is often
64 used with small divisors then it might not be worth bothering */
66 void
67 mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
69 mp_size_t i;
70 mp_limb_t c, h, l, ls, s, s_next, inverse, dummy;
71 unsigned shift;
73 ASSERT (size >= 1);
74 ASSERT (divisor != 0);
75 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
76 ASSERT_MPN (src, size);
77 ASSERT_LIMB (divisor);
79 s = src[0];
81 if (size == 1)
83 dst[0] = s / divisor;
84 return;
87 if ((divisor & 1) == 0)
89 count_trailing_zeros (shift, divisor);
90 divisor >>= shift;
92 else
93 shift = 0;
95 binvert_limb (inverse, divisor);
96 divisor <<= GMP_NAIL_BITS;
98 if (shift != 0)
100 c = 0;
101 i = 0;
102 size--;
106 s_next = src[i+1];
107 ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
108 s = s_next;
110 SUBC_LIMB (c, l, ls, c);
112 l = (l * inverse) & GMP_NUMB_MASK;
113 dst[i] = l;
115 umul_ppmm (h, dummy, l, divisor);
116 c += h;
118 i++;
120 while (i < size);
122 ls = s >> shift;
123 l = ls - c;
124 l = (l * inverse) & GMP_NUMB_MASK;
125 dst[i] = l;
127 else
129 l = (s * inverse) & GMP_NUMB_MASK;
130 dst[0] = l;
131 i = 1;
132 c = 0;
136 umul_ppmm (h, dummy, l, divisor);
137 c += h;
139 s = src[i];
140 SUBC_LIMB (c, l, s, c);
142 l = (l * inverse) & GMP_NUMB_MASK;
143 dst[i] = l;
144 i++;
146 while (i < size);