1 /* mpn_mulmid -- middle product
3 Contributed by David Harvey.
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 Copyright 2011 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
26 or both in parallel, as here.
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
42 #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
46 mpn_mulmid (mp_ptr rp
,
47 mp_srcptr ap
, mp_size_t an
,
48 mp_srcptr bp
, mp_size_t bn
)
55 ASSERT (! MPN_OVERLAP_P (rp
, an
- bn
+ 3, ap
, an
));
56 ASSERT (! MPN_OVERLAP_P (rp
, an
- bn
+ 3, bp
, bn
));
58 if (bn
< MULMID_TOOM42_THRESHOLD
)
60 /* region not tall enough to make toom42 worthwhile for any portion */
64 /* region not too wide either, just call basecase directly */
65 mpn_mulmid_basecase (rp
, ap
, an
, bp
, bn
);
69 /* Region quite wide. For better locality, use basecase on chunks:
76 k
= CHUNK
- bn
+ 1; /* number of diagonals per chunk */
78 /* first chunk (marked A in the above diagram) */
79 mpn_mulmid_basecase (rp
, ap
, CHUNK
, bp
, bn
);
81 /* remaining chunks (B, C, etc) */
88 t0
= rp
[0], t1
= rp
[1];
89 mpn_mulmid_basecase (rp
, ap
, CHUNK
, bp
, bn
);
90 ADDC_LIMB (cy
, rp
[0], rp
[0], t0
); /* add back saved limbs */
91 MPN_INCR_U (rp
+ 1, k
+ 1, t1
+ cy
);
97 /* last remaining chunk */
100 t0
= rp
[0], t1
= rp
[1];
101 mpn_mulmid_basecase (rp
, ap
, an
, bp
, bn
);
102 ADDC_LIMB (cy
, rp
[0], rp
[0], t0
);
103 MPN_INCR_U (rp
+ 1, an
- bn
+ 2, t1
+ cy
);
109 /* region is tall enough for toom42 */
113 if (rn
< MULMID_TOOM42_THRESHOLD
)
115 /* region not wide enough to make toom42 worthwhile for any portion */
121 /* region not too tall either, just call basecase directly */
122 mpn_mulmid_basecase (rp
, ap
, an
, bp
, bn
);
126 /* Region quite tall. For better locality, use basecase on chunks:
137 temp
= TMP_ALLOC_LIMBS (rn
+ 2);
139 /* first chunk (marked A in the above diagram) */
140 bp
+= bn
- CHUNK
, an
-= bn
- CHUNK
;
141 mpn_mulmid_basecase (rp
, ap
, an
, bp
, CHUNK
);
143 /* remaining chunks (B, C, etc) */
148 ap
+= CHUNK
, bp
-= CHUNK
;
149 mpn_mulmid_basecase (temp
, ap
, an
, bp
, CHUNK
);
150 mpn_add_n (rp
, rp
, temp
, rn
+ 2);
156 /* last remaining chunk */
157 ap
+= CHUNK
, bp
-= bn
;
158 mpn_mulmid_basecase (temp
, ap
, rn
+ bn
- 1, bp
, bn
);
159 mpn_add_n (rp
, rp
, temp
, rn
+ 2);
166 /* we're definitely going to use toom42 somewhere */
170 /* slice region into chunks, use toom42 on all chunks except possibly
183 temp
= TMP_ALLOC_LIMBS (rn
+ 2 + mpn_toom42_mulmid_itch (rn
));
184 scratch
= temp
+ rn
+ 2;
186 /* first chunk (marked A in the above diagram) */
188 mpn_toom42_mulmid (rp
, ap
, bp
, rn
, scratch
);
190 /* remaining chunks (B, C, etc) */
196 mpn_toom42_mulmid (temp
, ap
, bp
, rn
, scratch
);
197 mpn_add_n (rp
, rp
, temp
, rn
+ 2);
203 /* last remaining chunk */
205 mpn_mulmid (temp
, ap
, rn
+ bn
- 1, bp
, bn
);
206 mpn_add_n (rp
, rp
, temp
, rn
+ 2);
213 /* slice region into chunks, use toom42 on all chunks except possibly
224 scratch
= TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn
));
226 /* first chunk (marked A in the above diagram) */
227 mpn_toom42_mulmid (rp
, ap
, bp
, bn
, scratch
);
229 /* remaining chunks (B, C, etc) */
234 mp_limb_t t0
, t1
, cy
;
236 t0
= rp
[0], t1
= rp
[1];
237 mpn_toom42_mulmid (rp
, ap
, bp
, bn
, scratch
);
238 ADDC_LIMB (cy
, rp
[0], rp
[0], t0
); /* add back saved limbs */
239 MPN_INCR_U (rp
+ 1, bn
+ 1, t1
+ cy
);
247 /* last remaining chunk */
248 mp_limb_t t0
, t1
, cy
;
250 t0
= rp
[0], t1
= rp
[1];
251 mpn_mulmid (rp
, ap
, rn
+ bn
- 1, bp
, bn
);
252 ADDC_LIMB (cy
, rp
[0], rp
[0], t0
);
253 MPN_INCR_U (rp
+ 1, rn
+ 1, t1
+ cy
);