Make more use of GET_MODE_UNIT_SIZE
[official-gcc.git] / libstdc++-v3 / include / parallel / multiseq_selection.h
blobb5d6941c742832bed1abc1f2c13f5af2562c9706
1 // -*- C++ -*-
3 // Copyright (C) 2007-2017 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
9 // version.
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file parallel/multiseq_selection.h
26 * @brief Functions to find elements of a certain global __rank in
27 * multiple sorted sequences. Also serves for splitting such
28 * sequence sets.
30 * The algorithm description can be found in
32 * P. J. Varman, S. D. Scheufler, B. R. Iyer, and G. R. Ricard.
33 * Merging Multiple Lists on Hierarchical-Memory Multiprocessors.
34 * Journal of Parallel and Distributed Computing, 12(2):171–177, 1991.
36 * This file is a GNU parallel extension to the Standard C++ Library.
39 // Written by Johannes Singler.
41 #ifndef _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H
42 #define _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H 1
44 #include <vector>
45 #include <queue>
47 #include <bits/stl_algo.h>
49 namespace __gnu_parallel
51 /** @brief Compare __a pair of types lexicographically, ascending. */
52 template<typename _T1, typename _T2, typename _Compare>
53 class _Lexicographic
54 : public std::binary_function<std::pair<_T1, _T2>,
55 std::pair<_T1, _T2>, bool>
57 private:
58 _Compare& _M_comp;
60 public:
61 _Lexicographic(_Compare& __comp) : _M_comp(__comp) { }
63 bool
64 operator()(const std::pair<_T1, _T2>& __p1,
65 const std::pair<_T1, _T2>& __p2) const
67 if (_M_comp(__p1.first, __p2.first))
68 return true;
70 if (_M_comp(__p2.first, __p1.first))
71 return false;
73 // Firsts are equal.
74 return __p1.second < __p2.second;
78 /** @brief Compare __a pair of types lexicographically, descending. */
79 template<typename _T1, typename _T2, typename _Compare>
80 class _LexicographicReverse : public std::binary_function<_T1, _T2, bool>
82 private:
83 _Compare& _M_comp;
85 public:
86 _LexicographicReverse(_Compare& __comp) : _M_comp(__comp) { }
88 bool
89 operator()(const std::pair<_T1, _T2>& __p1,
90 const std::pair<_T1, _T2>& __p2) const
92 if (_M_comp(__p2.first, __p1.first))
93 return true;
95 if (_M_comp(__p1.first, __p2.first))
96 return false;
98 // Firsts are equal.
99 return __p2.second < __p1.second;
103 /**
104 * @brief Splits several sorted sequences at a certain global __rank,
105 * resulting in a splitting point for each sequence.
106 * The sequences are passed via a sequence of random-access
107 * iterator pairs, none of the sequences may be empty. If there
108 * are several equal elements across the split, the ones on the
109 * __left side will be chosen from sequences with smaller number.
110 * @param __begin_seqs Begin of the sequence of iterator pairs.
111 * @param __end_seqs End of the sequence of iterator pairs.
112 * @param __rank The global rank to partition at.
113 * @param __begin_offsets A random-access __sequence __begin where the
114 * __result will be stored in. Each element of the sequence is an
115 * iterator that points to the first element on the greater part of
116 * the respective __sequence.
117 * @param __comp The ordering functor, defaults to std::less<_Tp>.
119 template<typename _RanSeqs, typename _RankType, typename _RankIterator,
120 typename _Compare>
121 void
122 multiseq_partition(_RanSeqs __begin_seqs, _RanSeqs __end_seqs,
123 _RankType __rank,
124 _RankIterator __begin_offsets,
125 _Compare __comp = std::less<
126 typename std::iterator_traits<typename
127 std::iterator_traits<_RanSeqs>::value_type::
128 first_type>::value_type>()) // std::less<_Tp>
130 _GLIBCXX_CALL(__end_seqs - __begin_seqs)
132 typedef typename std::iterator_traits<_RanSeqs>::value_type::first_type
133 _It;
134 typedef typename std::iterator_traits<_RanSeqs>::difference_type
135 _SeqNumber;
136 typedef typename std::iterator_traits<_It>::difference_type
137 _DifferenceType;
138 typedef typename std::iterator_traits<_It>::value_type _ValueType;
140 _Lexicographic<_ValueType, _SeqNumber, _Compare> __lcomp(__comp);
141 _LexicographicReverse<_ValueType, _SeqNumber, _Compare> __lrcomp(__comp);
143 // Number of sequences, number of elements in total (possibly
144 // including padding).
145 _DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __nn = 0,
146 __nmax, __n, __r;
148 for (_SeqNumber __i = 0; __i < __m; __i++)
150 __nn += std::distance(__begin_seqs[__i].first,
151 __begin_seqs[__i].second);
152 _GLIBCXX_PARALLEL_ASSERT(
153 std::distance(__begin_seqs[__i].first,
154 __begin_seqs[__i].second) > 0);
157 if (__rank == __nn)
159 for (_SeqNumber __i = 0; __i < __m; __i++)
160 __begin_offsets[__i] = __begin_seqs[__i].second; // Very end.
161 // Return __m - 1;
162 return;
165 _GLIBCXX_PARALLEL_ASSERT(__m != 0);
166 _GLIBCXX_PARALLEL_ASSERT(__nn != 0);
167 _GLIBCXX_PARALLEL_ASSERT(__rank >= 0);
168 _GLIBCXX_PARALLEL_ASSERT(__rank < __nn);
170 _DifferenceType* __ns = new _DifferenceType[__m];
171 _DifferenceType* __a = new _DifferenceType[__m];
172 _DifferenceType* __b = new _DifferenceType[__m];
173 _DifferenceType __l;
175 __ns[0] = std::distance(__begin_seqs[0].first, __begin_seqs[0].second);
176 __nmax = __ns[0];
177 for (_SeqNumber __i = 0; __i < __m; __i++)
179 __ns[__i] = std::distance(__begin_seqs[__i].first,
180 __begin_seqs[__i].second);
181 __nmax = std::max(__nmax, __ns[__i]);
184 __r = __rd_log2(__nmax) + 1;
186 // Pad all lists to this length, at least as long as any ns[__i],
187 // equality iff __nmax = 2^__k - 1.
188 __l = (1ULL << __r) - 1;
190 for (_SeqNumber __i = 0; __i < __m; __i++)
192 __a[__i] = 0;
193 __b[__i] = __l;
195 __n = __l / 2;
197 // Invariants:
198 // 0 <= __a[__i] <= __ns[__i], 0 <= __b[__i] <= __l
200 #define __S(__i) (__begin_seqs[__i].first)
202 // Initial partition.
203 std::vector<std::pair<_ValueType, _SeqNumber> > __sample;
205 for (_SeqNumber __i = 0; __i < __m; __i++)
206 if (__n < __ns[__i]) //__sequence long enough
207 __sample.push_back(std::make_pair(__S(__i)[__n], __i));
208 __gnu_sequential::sort(__sample.begin(), __sample.end(), __lcomp);
210 for (_SeqNumber __i = 0; __i < __m; __i++) //conceptual infinity
211 if (__n >= __ns[__i]) //__sequence too short, conceptual infinity
212 __sample.push_back(
213 std::make_pair(__S(__i)[0] /*__dummy element*/, __i));
215 _DifferenceType __localrank = __rank / __l;
217 _SeqNumber __j;
218 for (__j = 0;
219 __j < __localrank && ((__n + 1) <= __ns[__sample[__j].second]);
220 ++__j)
221 __a[__sample[__j].second] += __n + 1;
222 for (; __j < __m; __j++)
223 __b[__sample[__j].second] -= __n + 1;
225 // Further refinement.
226 while (__n > 0)
228 __n /= 2;
230 _SeqNumber __lmax_seq = -1; // to avoid warning
231 const _ValueType* __lmax = 0; // impossible to avoid the warning?
232 for (_SeqNumber __i = 0; __i < __m; __i++)
234 if (__a[__i] > 0)
236 if (!__lmax)
238 __lmax = &(__S(__i)[__a[__i] - 1]);
239 __lmax_seq = __i;
241 else
243 // Max, favor rear sequences.
244 if (!__comp(__S(__i)[__a[__i] - 1], *__lmax))
246 __lmax = &(__S(__i)[__a[__i] - 1]);
247 __lmax_seq = __i;
253 _SeqNumber __i;
254 for (__i = 0; __i < __m; __i++)
256 _DifferenceType __middle = (__b[__i] + __a[__i]) / 2;
257 if (__lmax && __middle < __ns[__i] &&
258 __lcomp(std::make_pair(__S(__i)[__middle], __i),
259 std::make_pair(*__lmax, __lmax_seq)))
260 __a[__i] = std::min(__a[__i] + __n + 1, __ns[__i]);
261 else
262 __b[__i] -= __n + 1;
265 _DifferenceType __leftsize = 0;
266 for (_SeqNumber __i = 0; __i < __m; __i++)
267 __leftsize += __a[__i] / (__n + 1);
269 _DifferenceType __skew = __rank / (__n + 1) - __leftsize;
271 if (__skew > 0)
273 // Move to the left, find smallest.
274 std::priority_queue<std::pair<_ValueType, _SeqNumber>,
275 std::vector<std::pair<_ValueType, _SeqNumber> >,
276 _LexicographicReverse<_ValueType, _SeqNumber, _Compare> >
277 __pq(__lrcomp);
279 for (_SeqNumber __i = 0; __i < __m; __i++)
280 if (__b[__i] < __ns[__i])
281 __pq.push(std::make_pair(__S(__i)[__b[__i]], __i));
283 for (; __skew != 0 && !__pq.empty(); --__skew)
285 _SeqNumber __source = __pq.top().second;
286 __pq.pop();
288 __a[__source]
289 = std::min(__a[__source] + __n + 1, __ns[__source]);
290 __b[__source] += __n + 1;
292 if (__b[__source] < __ns[__source])
293 __pq.push(
294 std::make_pair(__S(__source)[__b[__source]], __source));
297 else if (__skew < 0)
299 // Move to the right, find greatest.
300 std::priority_queue<std::pair<_ValueType, _SeqNumber>,
301 std::vector<std::pair<_ValueType, _SeqNumber> >,
302 _Lexicographic<_ValueType, _SeqNumber, _Compare> >
303 __pq(__lcomp);
305 for (_SeqNumber __i = 0; __i < __m; __i++)
306 if (__a[__i] > 0)
307 __pq.push(std::make_pair(__S(__i)[__a[__i] - 1], __i));
309 for (; __skew != 0; ++__skew)
311 _SeqNumber __source = __pq.top().second;
312 __pq.pop();
314 __a[__source] -= __n + 1;
315 __b[__source] -= __n + 1;
317 if (__a[__source] > 0)
318 __pq.push(std::make_pair(
319 __S(__source)[__a[__source] - 1], __source));
324 // Postconditions:
325 // __a[__i] == __b[__i] in most cases, except when __a[__i] has been
326 // clamped because of having reached the boundary
328 // Now return the result, calculate the offset.
330 // Compare the keys on both edges of the border.
332 // Maximum of left edge, minimum of right edge.
333 _ValueType* __maxleft = 0;
334 _ValueType* __minright = 0;
335 for (_SeqNumber __i = 0; __i < __m; __i++)
337 if (__a[__i] > 0)
339 if (!__maxleft)
340 __maxleft = &(__S(__i)[__a[__i] - 1]);
341 else
343 // Max, favor rear sequences.
344 if (!__comp(__S(__i)[__a[__i] - 1], *__maxleft))
345 __maxleft = &(__S(__i)[__a[__i] - 1]);
348 if (__b[__i] < __ns[__i])
350 if (!__minright)
351 __minright = &(__S(__i)[__b[__i]]);
352 else
354 // Min, favor fore sequences.
355 if (__comp(__S(__i)[__b[__i]], *__minright))
356 __minright = &(__S(__i)[__b[__i]]);
361 _SeqNumber __seq = 0;
362 for (_SeqNumber __i = 0; __i < __m; __i++)
363 __begin_offsets[__i] = __S(__i) + __a[__i];
365 delete[] __ns;
366 delete[] __a;
367 delete[] __b;
371 /**
372 * @brief Selects the element at a certain global __rank from several
373 * sorted sequences.
375 * The sequences are passed via a sequence of random-access
376 * iterator pairs, none of the sequences may be empty.
377 * @param __begin_seqs Begin of the sequence of iterator pairs.
378 * @param __end_seqs End of the sequence of iterator pairs.
379 * @param __rank The global rank to partition at.
380 * @param __offset The rank of the selected element in the global
381 * subsequence of elements equal to the selected element. If the
382 * selected element is unique, this number is 0.
383 * @param __comp The ordering functor, defaults to std::less.
385 template<typename _Tp, typename _RanSeqs, typename _RankType,
386 typename _Compare>
388 multiseq_selection(_RanSeqs __begin_seqs, _RanSeqs __end_seqs,
389 _RankType __rank,
390 _RankType& __offset, _Compare __comp = std::less<_Tp>())
392 _GLIBCXX_CALL(__end_seqs - __begin_seqs)
394 typedef typename std::iterator_traits<_RanSeqs>::value_type::first_type
395 _It;
396 typedef typename std::iterator_traits<_RanSeqs>::difference_type
397 _SeqNumber;
398 typedef typename std::iterator_traits<_It>::difference_type
399 _DifferenceType;
401 _Lexicographic<_Tp, _SeqNumber, _Compare> __lcomp(__comp);
402 _LexicographicReverse<_Tp, _SeqNumber, _Compare> __lrcomp(__comp);
404 // Number of sequences, number of elements in total (possibly
405 // including padding).
406 _DifferenceType __m = std::distance(__begin_seqs, __end_seqs);
407 _DifferenceType __nn = 0;
408 _DifferenceType __nmax, __n, __r;
410 for (_SeqNumber __i = 0; __i < __m; __i++)
411 __nn += std::distance(__begin_seqs[__i].first,
412 __begin_seqs[__i].second);
414 if (__m == 0 || __nn == 0 || __rank < 0 || __rank >= __nn)
416 // result undefined if there is no data or __rank is outside bounds
417 throw std::exception();
421 _DifferenceType* __ns = new _DifferenceType[__m];
422 _DifferenceType* __a = new _DifferenceType[__m];
423 _DifferenceType* __b = new _DifferenceType[__m];
424 _DifferenceType __l;
426 __ns[0] = std::distance(__begin_seqs[0].first, __begin_seqs[0].second);
427 __nmax = __ns[0];
428 for (_SeqNumber __i = 0; __i < __m; ++__i)
430 __ns[__i] = std::distance(__begin_seqs[__i].first,
431 __begin_seqs[__i].second);
432 __nmax = std::max(__nmax, __ns[__i]);
435 __r = __rd_log2(__nmax) + 1;
437 // Pad all lists to this length, at least as long as any ns[__i],
438 // equality iff __nmax = 2^__k - 1
439 __l = __round_up_to_pow2(__r) - 1;
441 for (_SeqNumber __i = 0; __i < __m; ++__i)
443 __a[__i] = 0;
444 __b[__i] = __l;
446 __n = __l / 2;
448 // Invariants:
449 // 0 <= __a[__i] <= __ns[__i], 0 <= __b[__i] <= __l
451 #define __S(__i) (__begin_seqs[__i].first)
453 // Initial partition.
454 std::vector<std::pair<_Tp, _SeqNumber> > __sample;
456 for (_SeqNumber __i = 0; __i < __m; __i++)
457 if (__n < __ns[__i])
458 __sample.push_back(std::make_pair(__S(__i)[__n], __i));
459 __gnu_sequential::sort(__sample.begin(), __sample.end(),
460 __lcomp, sequential_tag());
462 // Conceptual infinity.
463 for (_SeqNumber __i = 0; __i < __m; __i++)
464 if (__n >= __ns[__i])
465 __sample.push_back(
466 std::make_pair(__S(__i)[0] /*__dummy element*/, __i));
468 _DifferenceType __localrank = __rank / __l;
470 _SeqNumber __j;
471 for (__j = 0;
472 __j < __localrank && ((__n + 1) <= __ns[__sample[__j].second]);
473 ++__j)
474 __a[__sample[__j].second] += __n + 1;
475 for (; __j < __m; ++__j)
476 __b[__sample[__j].second] -= __n + 1;
478 // Further refinement.
479 while (__n > 0)
481 __n /= 2;
483 const _Tp* __lmax = 0;
484 for (_SeqNumber __i = 0; __i < __m; ++__i)
486 if (__a[__i] > 0)
488 if (!__lmax)
489 __lmax = &(__S(__i)[__a[__i] - 1]);
490 else
492 if (__comp(*__lmax, __S(__i)[__a[__i] - 1])) //max
493 __lmax = &(__S(__i)[__a[__i] - 1]);
498 _SeqNumber __i;
499 for (__i = 0; __i < __m; __i++)
501 _DifferenceType __middle = (__b[__i] + __a[__i]) / 2;
502 if (__lmax && __middle < __ns[__i]
503 && __comp(__S(__i)[__middle], *__lmax))
504 __a[__i] = std::min(__a[__i] + __n + 1, __ns[__i]);
505 else
506 __b[__i] -= __n + 1;
509 _DifferenceType __leftsize = 0;
510 for (_SeqNumber __i = 0; __i < __m; ++__i)
511 __leftsize += __a[__i] / (__n + 1);
513 _DifferenceType __skew = __rank / (__n + 1) - __leftsize;
515 if (__skew > 0)
517 // Move to the left, find smallest.
518 std::priority_queue<std::pair<_Tp, _SeqNumber>,
519 std::vector<std::pair<_Tp, _SeqNumber> >,
520 _LexicographicReverse<_Tp, _SeqNumber, _Compare> >
521 __pq(__lrcomp);
523 for (_SeqNumber __i = 0; __i < __m; ++__i)
524 if (__b[__i] < __ns[__i])
525 __pq.push(std::make_pair(__S(__i)[__b[__i]], __i));
527 for (; __skew != 0 && !__pq.empty(); --__skew)
529 _SeqNumber __source = __pq.top().second;
530 __pq.pop();
532 __a[__source]
533 = std::min(__a[__source] + __n + 1, __ns[__source]);
534 __b[__source] += __n + 1;
536 if (__b[__source] < __ns[__source])
537 __pq.push(
538 std::make_pair(__S(__source)[__b[__source]], __source));
541 else if (__skew < 0)
543 // Move to the right, find greatest.
544 std::priority_queue<std::pair<_Tp, _SeqNumber>,
545 std::vector<std::pair<_Tp, _SeqNumber> >,
546 _Lexicographic<_Tp, _SeqNumber, _Compare> > __pq(__lcomp);
548 for (_SeqNumber __i = 0; __i < __m; ++__i)
549 if (__a[__i] > 0)
550 __pq.push(std::make_pair(__S(__i)[__a[__i] - 1], __i));
552 for (; __skew != 0; ++__skew)
554 _SeqNumber __source = __pq.top().second;
555 __pq.pop();
557 __a[__source] -= __n + 1;
558 __b[__source] -= __n + 1;
560 if (__a[__source] > 0)
561 __pq.push(std::make_pair(
562 __S(__source)[__a[__source] - 1], __source));
567 // Postconditions:
568 // __a[__i] == __b[__i] in most cases, except when __a[__i] has been
569 // clamped because of having reached the boundary
571 // Now return the result, calculate the offset.
573 // Compare the keys on both edges of the border.
575 // Maximum of left edge, minimum of right edge.
576 bool __maxleftset = false, __minrightset = false;
578 // Impossible to avoid the warning?
579 _Tp __maxleft, __minright;
580 for (_SeqNumber __i = 0; __i < __m; ++__i)
582 if (__a[__i] > 0)
584 if (!__maxleftset)
586 __maxleft = __S(__i)[__a[__i] - 1];
587 __maxleftset = true;
589 else
591 // Max.
592 if (__comp(__maxleft, __S(__i)[__a[__i] - 1]))
593 __maxleft = __S(__i)[__a[__i] - 1];
596 if (__b[__i] < __ns[__i])
598 if (!__minrightset)
600 __minright = __S(__i)[__b[__i]];
601 __minrightset = true;
603 else
605 // Min.
606 if (__comp(__S(__i)[__b[__i]], __minright))
607 __minright = __S(__i)[__b[__i]];
612 // Minright is the __splitter, in any case.
614 if (!__maxleftset || __comp(__minright, __maxleft))
616 // Good luck, everything is split unambiguously.
617 __offset = 0;
619 else
621 // We have to calculate an offset.
622 __offset = 0;
624 for (_SeqNumber __i = 0; __i < __m; ++__i)
626 _DifferenceType lb
627 = std::lower_bound(__S(__i), __S(__i) + __ns[__i],
628 __minright,
629 __comp) - __S(__i);
630 __offset += __a[__i] - lb;
634 delete[] __ns;
635 delete[] __a;
636 delete[] __b;
638 return __minright;
642 #undef __S
644 #endif /* _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H */