3 // Copyright (C) 2007-2017 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file parallel/multiway_mergesort.h
26 * @brief Parallel multiway merge sort.
27 * This file is a GNU parallel extension to the Standard C++ Library.
30 // Written by Johannes Singler.
32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
37 #include <parallel/basic_iterator.h>
38 #include <bits/stl_algo.h>
39 #include <parallel/parallel.h>
40 #include <parallel/multiway_merge.h>
42 namespace __gnu_parallel
44 /** @brief Subsequence description. */
45 template<typename _DifferenceTp
>
48 typedef _DifferenceTp _DifferenceType
;
50 /** @brief Begin of subsequence. */
51 _DifferenceType _M_begin
;
53 /** @brief End of subsequence. */
54 _DifferenceType _M_end
;
57 /** @brief Data accessed by all threads.
59 * PMWMS = parallel multiway mergesort */
60 template<typename _RAIter
>
61 struct _PMWMSSortingData
63 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
64 typedef typename
_TraitsType::value_type _ValueType
;
65 typedef typename
_TraitsType::difference_type _DifferenceType
;
67 /** @brief Number of threads involved. */
68 _ThreadIndex _M_num_threads
;
70 /** @brief Input __begin. */
73 /** @brief Start indices, per thread. */
74 _DifferenceType
* _M_starts
;
76 /** @brief Storage in which to sort. */
77 _ValueType
** _M_temporary
;
79 /** @brief Samples. */
80 _ValueType
* _M_samples
;
82 /** @brief Offsets to add to the found positions. */
83 _DifferenceType
* _M_offsets
;
85 /** @brief Pieces of data to merge @c [thread][__sequence] */
86 std::vector
<_Piece
<_DifferenceType
> >* _M_pieces
;
90 * @brief Select _M_samples from a sequence.
91 * @param __sd Pointer to algorithm data. _Result will be placed in
92 * @c __sd->_M_samples.
93 * @param __num_samples Number of _M_samples to select.
95 template<typename _RAIter
, typename _DifferenceTp
>
97 __determine_samples(_PMWMSSortingData
<_RAIter
>* __sd
,
98 _DifferenceTp __num_samples
)
100 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
101 typedef typename
_TraitsType::value_type _ValueType
;
102 typedef _DifferenceTp _DifferenceType
;
104 _ThreadIndex __iam
= omp_get_thread_num();
106 _DifferenceType
* __es
= new _DifferenceType
[__num_samples
+ 2];
108 __equally_split(__sd
->_M_starts
[__iam
+ 1] - __sd
->_M_starts
[__iam
],
109 __num_samples
+ 1, __es
);
111 for (_DifferenceType __i
= 0; __i
< __num_samples
; ++__i
)
112 ::new(&(__sd
->_M_samples
[__iam
* __num_samples
+ __i
]))
113 _ValueType(__sd
->_M_source
[__sd
->_M_starts
[__iam
]
119 /** @brief Split consistently. */
120 template<bool __exact
, typename _RAIter
,
121 typename _Compare
, typename _SortingPlacesIterator
>
122 struct _SplitConsistently
125 /** @brief Split by exact splitting. */
126 template<typename _RAIter
, typename _Compare
,
127 typename _SortingPlacesIterator
>
128 struct _SplitConsistently
<true, _RAIter
, _Compare
, _SortingPlacesIterator
>
131 operator()(const _ThreadIndex __iam
,
132 _PMWMSSortingData
<_RAIter
>* __sd
,
135 std::iterator_traits
<_RAIter
>::difference_type
140 std::vector
<std::pair
<_SortingPlacesIterator
,
141 _SortingPlacesIterator
> >
142 __seqs(__sd
->_M_num_threads
);
143 for (_ThreadIndex __s
= 0; __s
< __sd
->_M_num_threads
; __s
++)
144 __seqs
[__s
] = std::make_pair(__sd
->_M_temporary
[__s
],
145 __sd
->_M_temporary
[__s
]
146 + (__sd
->_M_starts
[__s
+ 1]
147 - __sd
->_M_starts
[__s
]));
149 std::vector
<_SortingPlacesIterator
> __offsets(__sd
->_M_num_threads
);
151 // if not last thread
152 if (__iam
< __sd
->_M_num_threads
- 1)
153 multiseq_partition(__seqs
.begin(), __seqs
.end(),
154 __sd
->_M_starts
[__iam
+ 1], __offsets
.begin(),
157 for (_ThreadIndex __seq
= 0; __seq
< __sd
->_M_num_threads
; __seq
++)
160 if (__iam
< (__sd
->_M_num_threads
- 1))
161 __sd
->_M_pieces
[__iam
][__seq
]._M_end
162 = __offsets
[__seq
] - __seqs
[__seq
].first
;
164 // very end of this sequence
165 __sd
->_M_pieces
[__iam
][__seq
]._M_end
=
166 __sd
->_M_starts
[__seq
+ 1] - __sd
->_M_starts
[__seq
];
171 for (_ThreadIndex __seq
= 0; __seq
< __sd
->_M_num_threads
; __seq
++)
173 // For each sequence.
175 __sd
->_M_pieces
[__iam
][__seq
]._M_begin
=
176 __sd
->_M_pieces
[__iam
- 1][__seq
]._M_end
;
178 // Absolute beginning.
179 __sd
->_M_pieces
[__iam
][__seq
]._M_begin
= 0;
184 /** @brief Split by sampling. */
185 template<typename _RAIter
, typename _Compare
,
186 typename _SortingPlacesIterator
>
187 struct _SplitConsistently
<false, _RAIter
, _Compare
, _SortingPlacesIterator
>
190 operator()(const _ThreadIndex __iam
,
191 _PMWMSSortingData
<_RAIter
>* __sd
,
194 std::iterator_traits
<_RAIter
>::difference_type
197 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
198 typedef typename
_TraitsType::value_type _ValueType
;
199 typedef typename
_TraitsType::difference_type _DifferenceType
;
201 __determine_samples(__sd
, __num_samples
);
206 __gnu_sequential::sort(__sd
->_M_samples
,
208 + (__num_samples
* __sd
->_M_num_threads
),
213 for (_ThreadIndex __s
= 0; __s
< __sd
->_M_num_threads
; ++__s
)
215 // For each sequence.
216 if (__num_samples
* __iam
> 0)
217 __sd
->_M_pieces
[__iam
][__s
]._M_begin
=
218 std::lower_bound(__sd
->_M_temporary
[__s
],
219 __sd
->_M_temporary
[__s
]
220 + (__sd
->_M_starts
[__s
+ 1]
221 - __sd
->_M_starts
[__s
]),
222 __sd
->_M_samples
[__num_samples
* __iam
],
224 - __sd
->_M_temporary
[__s
];
226 // Absolute beginning.
227 __sd
->_M_pieces
[__iam
][__s
]._M_begin
= 0;
229 if ((__num_samples
* (__iam
+ 1)) <
230 (__num_samples
* __sd
->_M_num_threads
))
231 __sd
->_M_pieces
[__iam
][__s
]._M_end
=
232 std::lower_bound(__sd
->_M_temporary
[__s
],
233 __sd
->_M_temporary
[__s
]
234 + (__sd
->_M_starts
[__s
+ 1]
235 - __sd
->_M_starts
[__s
]),
236 __sd
->_M_samples
[__num_samples
* (__iam
+ 1)],
238 - __sd
->_M_temporary
[__s
];
241 __sd
->_M_pieces
[__iam
][__s
]._M_end
= (__sd
->_M_starts
[__s
+ 1]
242 - __sd
->_M_starts
[__s
]);
247 template<bool __stable
, typename _RAIter
, typename _Compare
>
248 struct __possibly_stable_sort
251 template<typename _RAIter
, typename _Compare
>
252 struct __possibly_stable_sort
<true, _RAIter
, _Compare
>
254 void operator()(const _RAIter
& __begin
,
255 const _RAIter
& __end
, _Compare
& __comp
) const
256 { __gnu_sequential::stable_sort(__begin
, __end
, __comp
); }
259 template<typename _RAIter
, typename _Compare
>
260 struct __possibly_stable_sort
<false, _RAIter
, _Compare
>
262 void operator()(const _RAIter __begin
,
263 const _RAIter __end
, _Compare
& __comp
) const
264 { __gnu_sequential::sort(__begin
, __end
, __comp
); }
267 template<bool __stable
, typename Seq_RAIter
,
268 typename _RAIter
, typename _Compare
,
270 struct __possibly_stable_multiway_merge
273 template<typename Seq_RAIter
, typename _RAIter
,
274 typename _Compare
, typename _DiffType
>
275 struct __possibly_stable_multiway_merge
<true, Seq_RAIter
,
276 _RAIter
, _Compare
, _DiffType
>
278 void operator()(const Seq_RAIter
& __seqs_begin
,
279 const Seq_RAIter
& __seqs_end
,
280 const _RAIter
& __target
,
282 _DiffType __length_am
) const
283 { stable_multiway_merge(__seqs_begin
, __seqs_end
, __target
,
284 __length_am
, __comp
, sequential_tag()); }
287 template<typename Seq_RAIter
, typename _RAIter
,
288 typename _Compare
, typename _DiffType
>
289 struct __possibly_stable_multiway_merge
<false, Seq_RAIter
,
290 _RAIter
, _Compare
, _DiffType
>
292 void operator()(const Seq_RAIter
& __seqs_begin
,
293 const Seq_RAIter
& __seqs_end
,
294 const _RAIter
& __target
,
296 _DiffType __length_am
) const
297 { multiway_merge(__seqs_begin
, __seqs_end
, __target
, __length_am
,
298 __comp
, sequential_tag()); }
301 /** @brief PMWMS code executed by each thread.
302 * @param __sd Pointer to algorithm data.
303 * @param __comp Comparator.
305 template<bool __stable
, bool __exact
, typename _RAIter
,
308 parallel_sort_mwms_pu(_PMWMSSortingData
<_RAIter
>* __sd
,
311 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
312 typedef typename
_TraitsType::value_type _ValueType
;
313 typedef typename
_TraitsType::difference_type _DifferenceType
;
315 _ThreadIndex __iam
= omp_get_thread_num();
317 // Length of this thread's chunk, before merging.
318 _DifferenceType __length_local
=
319 __sd
->_M_starts
[__iam
+ 1] - __sd
->_M_starts
[__iam
];
321 // Sort in temporary storage, leave space for sentinel.
323 typedef _ValueType
* _SortingPlacesIterator
;
325 __sd
->_M_temporary
[__iam
] =
326 static_cast<_ValueType
*>(::operator new(sizeof(_ValueType
)
327 * (__length_local
+ 1)));
330 std::uninitialized_copy(__sd
->_M_source
+ __sd
->_M_starts
[__iam
],
331 __sd
->_M_source
+ __sd
->_M_starts
[__iam
]
333 __sd
->_M_temporary
[__iam
]);
335 __possibly_stable_sort
<__stable
, _SortingPlacesIterator
, _Compare
>()
336 (__sd
->_M_temporary
[__iam
],
337 __sd
->_M_temporary
[__iam
] + __length_local
,
340 // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
341 // __sd->_M_temporary[__iam] + __length_local.
343 // No barrier here: Synchronization is done by the splitting routine.
345 _DifferenceType __num_samples
=
346 _Settings::get().sort_mwms_oversampling
* __sd
->_M_num_threads
- 1;
347 _SplitConsistently
<__exact
, _RAIter
, _Compare
, _SortingPlacesIterator
>()
348 (__iam
, __sd
, __comp
, __num_samples
);
350 // Offset from __target __begin, __length after merging.
351 _DifferenceType __offset
= 0, __length_am
= 0;
352 for (_ThreadIndex __s
= 0; __s
< __sd
->_M_num_threads
; __s
++)
354 __length_am
+= (__sd
->_M_pieces
[__iam
][__s
]._M_end
355 - __sd
->_M_pieces
[__iam
][__s
]._M_begin
);
356 __offset
+= __sd
->_M_pieces
[__iam
][__s
]._M_begin
;
360 std::pair
<_SortingPlacesIterator
, _SortingPlacesIterator
> >
362 _SeqVector
__seqs(__sd
->_M_num_threads
);
364 for (_ThreadIndex __s
= 0; __s
< __sd
->_M_num_threads
; ++__s
)
367 std::make_pair(__sd
->_M_temporary
[__s
]
368 + __sd
->_M_pieces
[__iam
][__s
]._M_begin
,
369 __sd
->_M_temporary
[__s
]
370 + __sd
->_M_pieces
[__iam
][__s
]._M_end
);
373 __possibly_stable_multiway_merge
<
374 __stable
, typename
_SeqVector::iterator
,
375 _RAIter
, _Compare
, _DifferenceType
>()(__seqs
.begin(), __seqs
.end(),
376 __sd
->_M_source
+ __offset
, __comp
,
381 for (_DifferenceType __i
= 0; __i
< __length_local
; ++__i
)
382 __sd
->_M_temporary
[__iam
][__i
].~_ValueType();
383 ::operator delete(__sd
->_M_temporary
[__iam
]);
386 /** @brief PMWMS main call.
387 * @param __begin Begin iterator of sequence.
388 * @param __end End iterator of sequence.
389 * @param __comp Comparator.
390 * @param __num_threads Number of threads to use.
392 template<bool __stable
, bool __exact
, typename _RAIter
,
395 parallel_sort_mwms(_RAIter __begin
, _RAIter __end
,
397 _ThreadIndex __num_threads
)
399 _GLIBCXX_CALL(__end
- __begin
)
401 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
402 typedef typename
_TraitsType::value_type _ValueType
;
403 typedef typename
_TraitsType::difference_type _DifferenceType
;
405 _DifferenceType __n
= __end
- __begin
;
410 // at least one element per thread
411 if (__num_threads
> __n
)
412 __num_threads
= static_cast<_ThreadIndex
>(__n
);
415 _PMWMSSortingData
<_RAIter
> __sd
;
416 _DifferenceType
* __starts
;
417 _DifferenceType __size
;
419 # pragma omp parallel num_threads(__num_threads)
421 __num_threads
= omp_get_num_threads(); //no more threads than requested
425 __sd
._M_num_threads
= __num_threads
;
426 __sd
._M_source
= __begin
;
428 __sd
._M_temporary
= new _ValueType
*[__num_threads
];
433 (_Settings::get().sort_mwms_oversampling
* __num_threads
- 1)
435 __sd
._M_samples
= static_cast<_ValueType
*>
436 (::operator new(__size
* sizeof(_ValueType
)));
441 __sd
._M_offsets
= new _DifferenceType
[__num_threads
- 1];
443 = new std::vector
<_Piece
<_DifferenceType
> >[__num_threads
];
444 for (_ThreadIndex __s
= 0; __s
< __num_threads
; ++__s
)
445 __sd
._M_pieces
[__s
].resize(__num_threads
);
446 __starts
= __sd
._M_starts
= new _DifferenceType
[__num_threads
+ 1];
448 _DifferenceType __chunk_length
= __n
/ __num_threads
;
449 _DifferenceType __split
= __n
% __num_threads
;
450 _DifferenceType __pos
= 0;
451 for (_ThreadIndex __i
= 0; __i
< __num_threads
; ++__i
)
453 __starts
[__i
] = __pos
;
454 __pos
+= ((__i
< __split
)
455 ? (__chunk_length
+ 1) : __chunk_length
);
457 __starts
[__num_threads
] = __pos
;
460 // Now sort in parallel.
461 parallel_sort_mwms_pu
<__stable
, __exact
>(&__sd
, __comp
);
465 delete[] __sd
._M_temporary
;
469 for (_DifferenceType __i
= 0; __i
< __size
; ++__i
)
470 __sd
._M_samples
[__i
].~_ValueType();
471 ::operator delete(__sd
._M_samples
);
474 delete[] __sd
._M_offsets
;
475 delete[] __sd
._M_pieces
;
478 } //namespace __gnu_parallel
480 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */