3 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
31 /** @file parallel/partition.h
32 * @brief Parallel implementation of std::partition(),
33 * std::nth_element(), and std::partial_sort().
34 * This file is a GNU parallel extension to the Standard C++ Library.
37 // Written by Johannes Singler and Felix Putze.
39 #ifndef _GLIBCXX_PARALLEL_PARTITION_H
40 #define _GLIBCXX_PARALLEL_PARTITION_H 1
42 #include <parallel/basic_iterator.h>
43 #include <parallel/sort.h>
44 #include <parallel/random_number.h>
45 #include <bits/stl_algo.h>
46 #include <parallel/parallel.h>
48 /** @brief Decide whether to declare certain variables volatile. */
49 #define _GLIBCXX_VOLATILE volatile
51 namespace __gnu_parallel
53 /** @brief Parallel implementation of std::partition.
54 * @param begin Begin iterator of input sequence to split.
55 * @param end End iterator of input sequence to split.
56 * @param pred Partition predicate, possibly including some kind of pivot.
57 * @param num_threads Maximum number of threads to use for this task.
58 * @return Number of elements not fulfilling the predicate. */
59 template<typename RandomAccessIterator
, typename Predicate
>
60 typename
std::iterator_traits
<RandomAccessIterator
>::difference_type
61 parallel_partition(RandomAccessIterator begin
, RandomAccessIterator end
,
62 Predicate pred
, thread_index_t num_threads
)
64 typedef std::iterator_traits
<RandomAccessIterator
> traits_type
;
65 typedef typename
traits_type::value_type value_type
;
66 typedef typename
traits_type::difference_type difference_type
;
68 difference_type n
= end
- begin
;
72 const _Settings
& __s
= _Settings::get();
75 _GLIBCXX_VOLATILE difference_type left
= 0, right
= n
- 1;
76 _GLIBCXX_VOLATILE difference_type leftover_left
, leftover_right
;
77 _GLIBCXX_VOLATILE difference_type leftnew
, rightnew
;
79 bool* reserved_left
= NULL
, * reserved_right
= NULL
;
81 difference_type chunk_size
;
83 omp_lock_t result_lock
;
84 omp_init_lock(&result_lock
);
86 //at least two chunks per thread
87 if(right
- left
+ 1 >= 2 * num_threads
* chunk_size
)
88 # pragma omp parallel num_threads(num_threads)
92 num_threads
= omp_get_num_threads();
93 reserved_left
= new bool[num_threads
];
94 reserved_right
= new bool[num_threads
];
96 if (__s
.partition_chunk_share
> 0.0)
97 chunk_size
= std::max
<difference_type
>(__s
.partition_chunk_size
,
98 (double)n
* __s
.partition_chunk_share
99 / (double)num_threads
);
101 chunk_size
= __s
.partition_chunk_size
;
104 while (right
- left
+ 1 >= 2 * num_threads
* chunk_size
)
108 difference_type num_chunks
= (right
- left
+ 1) / chunk_size
;
110 for (int r
= 0; r
< num_threads
; ++r
)
112 reserved_left
[r
] = false;
113 reserved_right
[r
] = false;
120 difference_type thread_left
, thread_left_border
,
121 thread_right
, thread_right_border
;
122 thread_left
= left
+ 1;
124 // Just to satisfy the condition below.
125 thread_left_border
= thread_left
- 1;
126 thread_right
= n
- 1;
127 thread_right_border
= thread_right
+ 1;
129 bool iam_finished
= false;
130 while (!iam_finished
)
132 if (thread_left
> thread_left_border
)
134 omp_set_lock(&result_lock
);
135 if (left
+ (chunk_size
- 1) > right
)
140 thread_left_border
= left
+ (chunk_size
- 1);
143 omp_unset_lock(&result_lock
);
146 if (thread_right
< thread_right_border
)
148 omp_set_lock(&result_lock
);
149 if (left
> right
- (chunk_size
- 1))
153 thread_right
= right
;
154 thread_right_border
= right
- (chunk_size
- 1);
157 omp_unset_lock(&result_lock
);
164 while (thread_left
< thread_right
)
166 while (pred(begin
[thread_left
])
167 && thread_left
<= thread_left_border
)
169 while (!pred(begin
[thread_right
])
170 && thread_right
>= thread_right_border
)
173 if (thread_left
> thread_left_border
174 || thread_right
< thread_right_border
)
175 // Fetch new chunk(s).
178 std::swap(begin
[thread_left
], begin
[thread_right
]);
184 // Now swap the leftover chunks to the right places.
185 if (thread_left
<= thread_left_border
)
188 if (thread_right
>= thread_right_border
)
196 leftnew
= left
- leftover_left
* chunk_size
;
197 rightnew
= right
+ leftover_right
* chunk_size
;
202 // <=> thread_left_border + (chunk_size - 1) >= leftnew
203 if (thread_left
<= thread_left_border
204 && thread_left_border
>= leftnew
)
206 // Chunk already in place, reserve spot.
207 reserved_left
[(left
- (thread_left_border
+ 1)) / chunk_size
]
211 // <=> thread_right_border - (chunk_size - 1) <= rightnew
212 if (thread_right
>= thread_right_border
213 && thread_right_border
<= rightnew
)
215 // Chunk already in place, reserve spot.
216 reserved_right
[((thread_right_border
- 1) - right
)
217 / chunk_size
] = true;
222 if (thread_left
<= thread_left_border
223 && thread_left_border
< leftnew
)
225 // Find spot and swap.
226 difference_type swapstart
= -1;
227 omp_set_lock(&result_lock
);
228 for (int r
= 0; r
< leftover_left
; ++r
)
229 if (!reserved_left
[r
])
231 reserved_left
[r
] = true;
232 swapstart
= left
- (r
+ 1) * chunk_size
;
235 omp_unset_lock(&result_lock
);
237 #if _GLIBCXX_ASSERTIONS
238 _GLIBCXX_PARALLEL_ASSERT(swapstart
!= -1);
241 std::swap_ranges(begin
+ thread_left_border
243 begin
+ thread_left_border
+ 1,
247 if (thread_right
>= thread_right_border
248 && thread_right_border
> rightnew
)
250 // Find spot and swap
251 difference_type swapstart
= -1;
252 omp_set_lock(&result_lock
);
253 for (int r
= 0; r
< leftover_right
; ++r
)
254 if (!reserved_right
[r
])
256 reserved_right
[r
] = true;
257 swapstart
= right
+ r
* chunk_size
+ 1;
260 omp_unset_lock(&result_lock
);
262 #if _GLIBCXX_ASSERTIONS
263 _GLIBCXX_PARALLEL_ASSERT(swapstart
!= -1);
266 std::swap_ranges(begin
+ thread_right_border
,
267 begin
+ thread_right_border
+ chunk_size
,
270 #if _GLIBCXX_ASSERTIONS
275 for (int r
= 0; r
< leftover_left
; ++r
)
276 _GLIBCXX_PARALLEL_ASSERT(reserved_left
[r
]);
277 for (int r
= 0; r
< leftover_right
; ++r
)
278 _GLIBCXX_PARALLEL_ASSERT(reserved_right
[r
]);
289 # pragma omp flush(left, right)
290 } // end "recursion" //parallel
292 difference_type final_left
= left
, final_right
= right
;
294 while (final_left
< final_right
)
296 // Go right until key is geq than pivot.
297 while (pred(begin
[final_left
]) && final_left
< final_right
)
300 // Go left until key is less than pivot.
301 while (!pred(begin
[final_right
]) && final_left
< final_right
)
304 if (final_left
== final_right
)
306 std::swap(begin
[final_left
], begin
[final_right
]);
311 // All elements on the left side are < piv, all elements on the
313 delete[] reserved_left
;
314 delete[] reserved_right
;
316 omp_destroy_lock(&result_lock
);
318 // Element "between" final_left and final_right might not have
320 if (final_left
< n
&& !pred(begin
[final_left
]))
324 return final_left
+ 1;
328 * @brief Parallel implementation of std::nth_element().
329 * @param begin Begin iterator of input sequence.
330 * @param nth Iterator of element that must be in position afterwards.
331 * @param end End iterator of input sequence.
332 * @param comp Comparator.
334 template<typename RandomAccessIterator
, typename Comparator
>
336 parallel_nth_element(RandomAccessIterator begin
, RandomAccessIterator nth
,
337 RandomAccessIterator end
, Comparator comp
)
339 typedef std::iterator_traits
<RandomAccessIterator
> traits_type
;
340 typedef typename
traits_type::value_type value_type
;
341 typedef typename
traits_type::difference_type difference_type
;
343 _GLIBCXX_CALL(end
- begin
)
345 RandomAccessIterator split
;
348 difference_type minimum_length
=
349 std::max
<difference_type
>(2, _Settings::get().partition_minimal_n
);
351 // Break if input range to small.
352 while (static_cast<sequence_index_t
>(end
- begin
) >= minimum_length
)
354 difference_type n
= end
- begin
;
356 RandomAccessIterator pivot_pos
= begin
+ rng(n
);
358 // Swap pivot_pos value to end.
359 if (pivot_pos
!= (end
- 1))
360 std::swap(*pivot_pos
, *(end
- 1));
363 // XXX Comparator must have first_value_type, second_value_type,
365 // Comparator == __gnu_parallel::lexicographic<S, int,
366 // __gnu_parallel::less<S, S> >
367 // pivot_pos == std::pair<S, int>*
368 // XXX binder2nd only for RandomAccessIterators??
369 __gnu_parallel::binder2nd
<Comparator
, value_type
, value_type
, bool>
370 pred(comp
, *pivot_pos
);
372 // Divide, leave pivot unchanged in last place.
373 RandomAccessIterator split_pos1
, split_pos2
;
374 split_pos1
= begin
+ parallel_partition(begin
, end
- 1, pred
,
377 // Left side: < pivot_pos; right side: >= pivot_pos
379 // Swap pivot back to middle.
380 if (split_pos1
!= pivot_pos
)
381 std::swap(*split_pos1
, *pivot_pos
);
382 pivot_pos
= split_pos1
;
384 // In case all elements are equal, split_pos1 == 0
385 if ((split_pos1
+ 1 - begin
) < (n
>> 7)
386 || (end
- split_pos1
) < (n
>> 7))
388 // Very unequal split, one part smaller than one 128th
389 // elements not strictly larger than the pivot.
390 __gnu_parallel::unary_negate
<__gnu_parallel::
391 binder1st
<Comparator
, value_type
, value_type
, bool>, value_type
>
392 pred(__gnu_parallel::binder1st
<Comparator
, value_type
,
393 value_type
, bool>(comp
, *pivot_pos
));
395 // Find other end of pivot-equal range.
396 split_pos2
= __gnu_sequential::partition(split_pos1
+ 1,
400 // Only skip the pivot.
401 split_pos2
= split_pos1
+ 1;
403 // Compare iterators.
404 if (split_pos2
<= nth
)
406 else if (nth
< split_pos1
)
412 // Only at most _Settings::partition_minimal_n elements left.
413 __gnu_sequential::sort(begin
, end
, comp
);
416 /** @brief Parallel implementation of std::partial_sort().
417 * @param begin Begin iterator of input sequence.
418 * @param middle Sort until this position.
419 * @param end End iterator of input sequence.
420 * @param comp Comparator. */
421 template<typename RandomAccessIterator
, typename Comparator
>
423 parallel_partial_sort(RandomAccessIterator begin
,
424 RandomAccessIterator middle
,
425 RandomAccessIterator end
, Comparator comp
)
427 parallel_nth_element(begin
, middle
, end
, comp
);
428 std::sort(begin
, middle
, comp
);
431 } //namespace __gnu_parallel
433 #undef _GLIBCXX_VOLATILE