3 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
31 /** @file parallel/balanced_quicksort.h
32 * @brief Implementation of a dynamically load-balanced parallel quicksort.
34 * It works in-place and needs only logarithmic extra memory.
35 * The algorithm is similar to the one proposed in
37 * P. Tsigas and Y. Zhang.
38 * A simple, fast parallel implementation of quicksort and
39 * its performance evaluation on SUN enterprise 10000.
40 * In 11th Euromicro Conference on Parallel, Distributed and
41 * Network-Based Processing, page 372, 2003.
43 * This file is a GNU parallel extension to the Standard C++ Library.
46 // Written by Johannes Singler.
48 #ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H
49 #define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1
51 #include <parallel/basic_iterator.h>
52 #include <bits/stl_algo.h>
54 #include <parallel/settings.h>
55 #include <parallel/partition.h>
56 #include <parallel/random_number.h>
57 #include <parallel/queue.h>
60 #if _GLIBCXX_ASSERTIONS
61 #include <parallel/checkers.h>
64 namespace __gnu_parallel
66 /** @brief Information local to one thread in the parallel quicksort run. */
67 template<typename RandomAccessIterator
>
70 typedef std::iterator_traits
<RandomAccessIterator
> traits_type
;
71 typedef typename
traits_type::difference_type difference_type
;
73 /** @brief Continuous part of the sequence, described by an
75 typedef std::pair
<RandomAccessIterator
, RandomAccessIterator
> Piece
;
77 /** @brief Initial piece to work on. */
80 /** @brief Work-stealing queue. */
81 RestrictedBoundedConcurrentQueue
<Piece
> leftover_parts
;
83 /** @brief Number of threads involved in this algorithm. */
84 thread_index_t num_threads
;
86 /** @brief Pointer to a counter of elements left over to sort. */
87 volatile difference_type
* elements_leftover
;
89 /** @brief The complete sequence to sort. */
92 /** @brief Constructor.
93 * @param queue_size Size of the work-stealing queue. */
94 QSBThreadLocal(int queue_size
) : leftover_parts(queue_size
) { }
97 /** @brief Balanced quicksort divide step.
98 * @param begin Begin iterator of subsequence.
99 * @param end End iterator of subsequence.
100 * @param comp Comparator.
101 * @param num_threads Number of threads that are allowed to work on
103 * @pre @c (end-begin)>=1 */
104 template<typename RandomAccessIterator
, typename Comparator
>
105 typename
std::iterator_traits
<RandomAccessIterator
>::difference_type
106 qsb_divide(RandomAccessIterator begin
, RandomAccessIterator end
,
107 Comparator comp
, thread_index_t num_threads
)
109 _GLIBCXX_PARALLEL_ASSERT(num_threads
> 0);
111 typedef std::iterator_traits
<RandomAccessIterator
> traits_type
;
112 typedef typename
traits_type::value_type value_type
;
113 typedef typename
traits_type::difference_type difference_type
;
115 RandomAccessIterator pivot_pos
=
116 median_of_three_iterators(begin
, begin
+ (end
- begin
) / 2,
119 #if defined(_GLIBCXX_ASSERTIONS)
120 // Must be in between somewhere.
121 difference_type n
= end
- begin
;
123 _GLIBCXX_PARALLEL_ASSERT(
124 (!comp(*pivot_pos
, *begin
) && !comp(*(begin
+ n
/ 2), *pivot_pos
))
125 || (!comp(*pivot_pos
, *begin
) && !comp(*(end
- 1), *pivot_pos
))
126 || (!comp(*pivot_pos
, *(begin
+ n
/ 2)) && !comp(*begin
, *pivot_pos
))
127 || (!comp(*pivot_pos
, *(begin
+ n
/ 2)) && !comp(*(end
- 1), *pivot_pos
))
128 || (!comp(*pivot_pos
, *(end
- 1)) && !comp(*begin
, *pivot_pos
))
129 || (!comp(*pivot_pos
, *(end
- 1)) && !comp(*(begin
+ n
/ 2), *pivot_pos
)));
132 // Swap pivot value to end.
133 if (pivot_pos
!= (end
- 1))
134 std::swap(*pivot_pos
, *(end
- 1));
137 __gnu_parallel::binder2nd
<Comparator
, value_type
, value_type
, bool>
138 pred(comp
, *pivot_pos
);
140 // Divide, returning end - begin - 1 in the worst case.
141 difference_type split_pos
= parallel_partition(
142 begin
, end
- 1, pred
, num_threads
);
144 // Swap back pivot to middle.
145 std::swap(*(begin
+ split_pos
), *pivot_pos
);
146 pivot_pos
= begin
+ split_pos
;
148 #if _GLIBCXX_ASSERTIONS
149 RandomAccessIterator r
;
150 for (r
= begin
; r
!= pivot_pos
; ++r
)
151 _GLIBCXX_PARALLEL_ASSERT(comp(*r
, *pivot_pos
));
152 for (; r
!= end
; ++r
)
153 _GLIBCXX_PARALLEL_ASSERT(!comp(*r
, *pivot_pos
));
159 /** @brief Quicksort conquer step.
160 * @param tls Array of thread-local storages.
161 * @param begin Begin iterator of subsequence.
162 * @param end End iterator of subsequence.
163 * @param comp Comparator.
164 * @param iam Number of the thread processing this function.
166 * Number of threads that are allowed to work on this part. */
167 template<typename RandomAccessIterator
, typename Comparator
>
169 qsb_conquer(QSBThreadLocal
<RandomAccessIterator
>** tls
,
170 RandomAccessIterator begin
, RandomAccessIterator end
,
172 thread_index_t iam
, thread_index_t num_threads
,
175 typedef std::iterator_traits
<RandomAccessIterator
> traits_type
;
176 typedef typename
traits_type::value_type value_type
;
177 typedef typename
traits_type::difference_type difference_type
;
179 difference_type n
= end
- begin
;
181 if (num_threads
<= 1 || n
<= 1)
183 tls
[iam
]->initial
.first
= begin
;
184 tls
[iam
]->initial
.second
= end
;
186 qsb_local_sort_with_helping(tls
, comp
, iam
, parent_wait
);
192 difference_type split_pos
= qsb_divide(begin
, end
, comp
, num_threads
);
194 #if _GLIBCXX_ASSERTIONS
195 _GLIBCXX_PARALLEL_ASSERT(0 <= split_pos
&& split_pos
< (end
- begin
));
198 thread_index_t num_threads_leftside
=
199 std::max
<thread_index_t
>(1, std::min
<thread_index_t
>(
200 num_threads
- 1, split_pos
* num_threads
/ n
));
203 *tls
[iam
]->elements_leftover
-= (difference_type
)1;
206 # pragma omp parallel num_threads(2)
209 if(omp_get_num_threads() < 2)
214 # pragma omp sections
218 qsb_conquer(tls
, begin
, begin
+ split_pos
, comp
,
220 num_threads_leftside
,
224 // The pivot_pos is left in place, to ensure termination.
227 qsb_conquer(tls
, begin
+ split_pos
+ 1, end
, comp
,
228 iam
+ num_threads_leftside
,
229 num_threads
- num_threads_leftside
,
238 * @brief Quicksort step doing load-balanced local sort.
239 * @param tls Array of thread-local storages.
240 * @param comp Comparator.
241 * @param iam Number of the thread processing this function.
243 template<typename RandomAccessIterator
, typename Comparator
>
245 qsb_local_sort_with_helping(QSBThreadLocal
<RandomAccessIterator
>** tls
,
246 Comparator
& comp
, int iam
, bool wait
)
248 typedef std::iterator_traits
<RandomAccessIterator
> traits_type
;
249 typedef typename
traits_type::value_type value_type
;
250 typedef typename
traits_type::difference_type difference_type
;
251 typedef std::pair
<RandomAccessIterator
, RandomAccessIterator
> Piece
;
253 QSBThreadLocal
<RandomAccessIterator
>& tl
= *tls
[iam
];
255 difference_type base_case_n
=
256 _Settings::get().sort_qsb_base_case_maximal_n
;
259 thread_index_t num_threads
= tl
.num_threads
;
261 // Every thread has its own random number generator.
262 random_number
rng(iam
+ 1);
264 Piece current
= tl
.initial
;
266 difference_type elements_done
= 0;
267 #if _GLIBCXX_ASSERTIONS
268 difference_type total_elements_done
= 0;
273 // Invariant: current must be a valid (maybe empty) range.
274 RandomAccessIterator begin
= current
.first
, end
= current
.second
;
275 difference_type n
= end
- begin
;
280 RandomAccessIterator pivot_pos
= begin
+ rng(n
);
282 // Swap pivot_pos value to end.
283 if (pivot_pos
!= (end
- 1))
284 std::swap(*pivot_pos
, *(end
- 1));
287 __gnu_parallel::binder2nd
288 <Comparator
, value_type
, value_type
, bool>
289 pred(comp
, *pivot_pos
);
291 // Divide, leave pivot unchanged in last place.
292 RandomAccessIterator split_pos1
, split_pos2
;
293 split_pos1
= __gnu_sequential::partition(begin
, end
- 1, pred
);
295 // Left side: < pivot_pos; right side: >= pivot_pos.
296 #if _GLIBCXX_ASSERTIONS
297 _GLIBCXX_PARALLEL_ASSERT(begin
<= split_pos1
&& split_pos1
< end
);
299 // Swap pivot back to middle.
300 if (split_pos1
!= pivot_pos
)
301 std::swap(*split_pos1
, *pivot_pos
);
302 pivot_pos
= split_pos1
;
304 // In case all elements are equal, split_pos1 == 0.
305 if ((split_pos1
+ 1 - begin
) < (n
>> 7)
306 || (end
- split_pos1
) < (n
>> 7))
308 // Very unequal split, one part smaller than one 128th
309 // elements not strictly larger than the pivot.
310 __gnu_parallel::unary_negate
<__gnu_parallel::binder1st
311 <Comparator
, value_type
, value_type
, bool>, value_type
>
312 pred(__gnu_parallel::binder1st
313 <Comparator
, value_type
, value_type
, bool>(comp
,
316 // Find other end of pivot-equal range.
317 split_pos2
= __gnu_sequential::partition(split_pos1
+ 1,
321 // Only skip the pivot.
322 split_pos2
= split_pos1
+ 1;
324 // Elements equal to pivot are done.
325 elements_done
+= (split_pos2
- split_pos1
);
326 #if _GLIBCXX_ASSERTIONS
327 total_elements_done
+= (split_pos2
- split_pos1
);
329 // Always push larger part onto stack.
330 if (((split_pos1
+ 1) - begin
) < (end
- (split_pos2
)))
332 // Right side larger.
333 if ((split_pos2
) != end
)
334 tl
.leftover_parts
.push_front(std::make_pair(split_pos2
,
337 //current.first = begin; //already set anyway
338 current
.second
= split_pos1
;
344 if (begin
!= split_pos1
)
345 tl
.leftover_parts
.push_front(std::make_pair(begin
,
348 current
.first
= split_pos2
;
349 //current.second = end; //already set anyway
355 __gnu_sequential::sort(begin
, end
, comp
);
357 #if _GLIBCXX_ASSERTIONS
358 total_elements_done
+= n
;
361 // Prefer own stack, small pieces.
362 if (tl
.leftover_parts
.pop_front(current
))
366 *tl
.elements_leftover
-= elements_done
;
370 #if _GLIBCXX_ASSERTIONS
371 double search_start
= omp_get_wtime();
374 // Look for new work.
375 bool successfully_stolen
= false;
376 while (wait
&& *tl
.elements_leftover
> 0 && !successfully_stolen
377 #if _GLIBCXX_ASSERTIONS
378 // Possible dead-lock.
379 && (omp_get_wtime() < (search_start
+ 1.0))
383 thread_index_t victim
;
384 victim
= rng(num_threads
);
387 successfully_stolen
= (victim
!= iam
)
388 && tls
[victim
]->leftover_parts
.pop_back(current
);
389 if (!successfully_stolen
)
391 #if !defined(__ICC) && !defined(__ECC)
396 #if _GLIBCXX_ASSERTIONS
397 if (omp_get_wtime() >= (search_start
+ 1.0))
400 _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
401 < (search_start
+ 1.0));
404 if (!successfully_stolen
)
406 #if _GLIBCXX_ASSERTIONS
407 _GLIBCXX_PARALLEL_ASSERT(*tl
.elements_leftover
== 0);
415 /** @brief Top-level quicksort routine.
416 * @param begin Begin iterator of sequence.
417 * @param end End iterator of sequence.
418 * @param comp Comparator.
419 * @param num_threads Number of threads that are allowed to work on
422 template<typename RandomAccessIterator
, typename Comparator
>
424 parallel_sort_qsb(RandomAccessIterator begin
, RandomAccessIterator end
,
426 thread_index_t num_threads
)
428 _GLIBCXX_CALL(end
- begin
)
430 typedef std::iterator_traits
<RandomAccessIterator
> traits_type
;
431 typedef typename
traits_type::value_type value_type
;
432 typedef typename
traits_type::difference_type difference_type
;
433 typedef std::pair
<RandomAccessIterator
, RandomAccessIterator
> Piece
;
435 typedef QSBThreadLocal
<RandomAccessIterator
> tls_type
;
437 difference_type n
= end
- begin
;
442 // At least one element per processor.
444 num_threads
= static_cast<thread_index_t
>(n
);
446 // Initialize thread local storage
447 tls_type
** tls
= new tls_type
*[num_threads
];
448 difference_type queue_size
= num_threads
* (thread_index_t
)(log2(n
) + 1);
449 for (thread_index_t t
= 0; t
< num_threads
; ++t
)
450 tls
[t
] = new QSBThreadLocal
<RandomAccessIterator
>(queue_size
);
452 // There can never be more than ceil(log2(n)) ranges on the stack, because
453 // 1. Only one processor pushes onto the stack
454 // 2. The largest range has at most length n
455 // 3. Each range is larger than half of the range remaining
456 volatile difference_type elements_leftover
= n
;
457 for (int i
= 0; i
< num_threads
; ++i
)
459 tls
[i
]->elements_leftover
= &elements_leftover
;
460 tls
[i
]->num_threads
= num_threads
;
461 tls
[i
]->global
= std::make_pair(begin
, end
);
463 // Just in case nothing is left to assign.
464 tls
[i
]->initial
= std::make_pair(end
, end
);
467 // Main recursion call.
468 qsb_conquer(tls
, begin
, begin
+ n
, comp
, 0, num_threads
, true);
470 #if _GLIBCXX_ASSERTIONS
471 // All stack must be empty.
473 for (int i
= 1; i
< num_threads
; ++i
)
474 _GLIBCXX_PARALLEL_ASSERT(!tls
[i
]->leftover_parts
.pop_back(dummy
));
477 for (int i
= 0; i
< num_threads
; ++i
)
481 } // namespace __gnu_parallel
483 #endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */