Merge -r 127928:132243 from trunk
[official-gcc.git] / libstdc++-v3 / include / parallel / partition.h
blob0a49d8f60825b3c615ffa9519227cd7666713c5a
1 // -*- C++ -*-
3 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
9 // version.
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
29 // Public License.
31 /** @file parallel/partition.h
32 * @brief Parallel implementation of std::partition(),
33 * std::nth_element(), and std::partial_sort().
34 * This file is a GNU parallel extension to the Standard C++ Library.
37 // Written by Johannes Singler and Felix Putze.
39 #ifndef _GLIBCXX_PARALLEL_PARTITION_H
40 #define _GLIBCXX_PARALLEL_PARTITION_H 1
42 #include <parallel/basic_iterator.h>
43 #include <parallel/sort.h>
44 #include <parallel/random_number.h>
45 #include <bits/stl_algo.h>
46 #include <parallel/parallel.h>
48 /** @brief Decide whether to declare certain variables volatile. */
49 #define _GLIBCXX_VOLATILE volatile
51 namespace __gnu_parallel
53 /** @brief Parallel implementation of std::partition.
54 * @param begin Begin iterator of input sequence to split.
55 * @param end End iterator of input sequence to split.
56 * @param pred Partition predicate, possibly including some kind of pivot.
57 * @param num_threads Maximum number of threads to use for this task.
58 * @return Number of elements not fulfilling the predicate. */
59 template<typename RandomAccessIterator, typename Predicate>
60 typename std::iterator_traits<RandomAccessIterator>::difference_type
61 parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
62 Predicate pred, thread_index_t num_threads)
64 typedef std::iterator_traits<RandomAccessIterator> traits_type;
65 typedef typename traits_type::value_type value_type;
66 typedef typename traits_type::difference_type difference_type;
68 difference_type n = end - begin;
70 _GLIBCXX_CALL(n)
72 // Shared.
73 _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
74 _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
75 _GLIBCXX_VOLATILE difference_type leftnew, rightnew;
77 bool* reserved_left = NULL, * reserved_right = NULL;
79 difference_type chunk_size;
81 omp_lock_t result_lock;
82 omp_init_lock(&result_lock);
84 //at least two chunks per thread
85 if(right - left + 1 >= 2 * num_threads * chunk_size)
86 # pragma omp parallel num_threads(num_threads)
88 # pragma omp single
90 num_threads = omp_get_num_threads();
91 reserved_left = new bool[num_threads];
92 reserved_right = new bool[num_threads];
94 if (Settings::partition_chunk_share > 0.0)
95 chunk_size = std::max<difference_type>(Settings::
96 partition_chunk_size,
97 (double)n * Settings::
98 partition_chunk_share
99 / (double)num_threads);
100 else
101 chunk_size = Settings::partition_chunk_size;
104 while (right - left + 1 >= 2 * num_threads * chunk_size)
106 # pragma omp single
108 difference_type num_chunks = (right - left + 1) / chunk_size;
110 for (int r = 0; r < num_threads; ++r)
112 reserved_left[r] = false;
113 reserved_right[r] = false;
115 leftover_left = 0;
116 leftover_right = 0;
117 } //implicit barrier
119 // Private.
120 difference_type thread_left, thread_left_border,
121 thread_right, thread_right_border;
122 thread_left = left + 1;
124 // Just to satisfy the condition below.
125 thread_left_border = thread_left - 1;
126 thread_right = n - 1;
127 thread_right_border = thread_right + 1;
129 bool iam_finished = false;
130 while (!iam_finished)
132 if (thread_left > thread_left_border)
134 omp_set_lock(&result_lock);
135 if (left + (chunk_size - 1) > right)
136 iam_finished = true;
137 else
139 thread_left = left;
140 thread_left_border = left + (chunk_size - 1);
141 left += chunk_size;
143 omp_unset_lock(&result_lock);
146 if (thread_right < thread_right_border)
148 omp_set_lock(&result_lock);
149 if (left > right - (chunk_size - 1))
150 iam_finished = true;
151 else
153 thread_right = right;
154 thread_right_border = right - (chunk_size - 1);
155 right -= chunk_size;
157 omp_unset_lock(&result_lock);
160 if (iam_finished)
161 break;
163 // Swap as usual.
164 while (thread_left < thread_right)
166 while (pred(begin[thread_left])
167 && thread_left <= thread_left_border)
168 ++thread_left;
169 while (!pred(begin[thread_right])
170 && thread_right >= thread_right_border)
171 --thread_right;
173 if (thread_left > thread_left_border
174 || thread_right < thread_right_border)
175 // Fetch new chunk(s).
176 break;
178 std::swap(begin[thread_left], begin[thread_right]);
179 ++thread_left;
180 --thread_right;
184 // Now swap the leftover chunks to the right places.
185 if (thread_left <= thread_left_border)
186 # pragma omp atomic
187 ++leftover_left;
188 if (thread_right >= thread_right_border)
189 # pragma omp atomic
190 ++leftover_right;
192 # pragma omp barrier
194 # pragma omp single
196 leftnew = left - leftover_left * chunk_size;
197 rightnew = right + leftover_right * chunk_size;
200 # pragma omp barrier
202 // <=> thread_left_border + (chunk_size - 1) >= leftnew
203 if (thread_left <= thread_left_border
204 && thread_left_border >= leftnew)
206 // Chunk already in place, reserve spot.
207 reserved_left[(left - (thread_left_border + 1)) / chunk_size]
208 = true;
211 // <=> thread_right_border - (chunk_size - 1) <= rightnew
212 if (thread_right >= thread_right_border
213 && thread_right_border <= rightnew)
215 // Chunk already in place, reserve spot.
216 reserved_right[((thread_right_border - 1) - right)
217 / chunk_size] = true;
220 # pragma omp barrier
222 if (thread_left <= thread_left_border
223 && thread_left_border < leftnew)
225 // Find spot and swap.
226 difference_type swapstart = -1;
227 omp_set_lock(&result_lock);
228 for (int r = 0; r < leftover_left; ++r)
229 if (!reserved_left[r])
231 reserved_left[r] = true;
232 swapstart = left - (r + 1) * chunk_size;
233 break;
235 omp_unset_lock(&result_lock);
237 #if _GLIBCXX_ASSERTIONS
238 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
239 #endif
241 std::swap_ranges(begin + thread_left_border
242 - (chunk_size - 1),
243 begin + thread_left_border + 1,
244 begin + swapstart);
247 if (thread_right >= thread_right_border
248 && thread_right_border > rightnew)
250 // Find spot and swap
251 difference_type swapstart = -1;
252 omp_set_lock(&result_lock);
253 for (int r = 0; r < leftover_right; ++r)
254 if (!reserved_right[r])
256 reserved_right[r] = true;
257 swapstart = right + r * chunk_size + 1;
258 break;
260 omp_unset_lock(&result_lock);
262 #if _GLIBCXX_ASSERTIONS
263 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
264 #endif
266 std::swap_ranges(begin + thread_right_border,
267 begin + thread_right_border + chunk_size,
268 begin + swapstart);
270 #if _GLIBCXX_ASSERTIONS
271 # pragma omp barrier
273 # pragma omp single
275 for (int r = 0; r < leftover_left; ++r)
276 _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
277 for (int r = 0; r < leftover_right; ++r)
278 _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
281 # pragma omp barrier
282 #endif
284 # pragma omp barrier
286 left = leftnew;
287 right = rightnew;
289 # pragma omp flush(left, right)
290 } // end "recursion" //parallel
292 difference_type final_left = left, final_right = right;
294 while (final_left < final_right)
296 // Go right until key is geq than pivot.
297 while (pred(begin[final_left]) && final_left < final_right)
298 ++final_left;
300 // Go left until key is less than pivot.
301 while (!pred(begin[final_right]) && final_left < final_right)
302 --final_right;
304 if (final_left == final_right)
305 break;
306 std::swap(begin[final_left], begin[final_right]);
307 ++final_left;
308 --final_right;
311 // All elements on the left side are < piv, all elements on the
312 // right are >= piv
313 delete[] reserved_left;
314 delete[] reserved_right;
316 omp_destroy_lock(&result_lock);
318 // Element "between" final_left and final_right might not have
319 // been regarded yet
320 if (final_left < n && !pred(begin[final_left]))
321 // Really swapped.
322 return final_left;
323 else
324 return final_left + 1;
328 * @brief Parallel implementation of std::nth_element().
329 * @param begin Begin iterator of input sequence.
330 * @param nth Iterator of element that must be in position afterwards.
331 * @param end End iterator of input sequence.
332 * @param comp Comparator.
334 template<typename RandomAccessIterator, typename Comparator>
335 void
336 parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth,
337 RandomAccessIterator end, Comparator comp)
339 typedef std::iterator_traits<RandomAccessIterator> traits_type;
340 typedef typename traits_type::value_type value_type;
341 typedef typename traits_type::difference_type difference_type;
343 _GLIBCXX_CALL(end - begin)
345 RandomAccessIterator split;
346 random_number rng;
348 difference_type minimum_length =
349 std::max<difference_type>(2, Settings::partition_minimal_n);
351 // Break if input range to small.
352 while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
354 difference_type n = end - begin;
356 RandomAccessIterator pivot_pos = begin + rng(n);
358 // Swap pivot_pos value to end.
359 if (pivot_pos != (end - 1))
360 std::swap(*pivot_pos, *(end - 1));
361 pivot_pos = end - 1;
363 // XXX Comparator must have first_value_type, second_value_type,
364 // result_type
365 // Comparator == __gnu_parallel::lexicographic<S, int,
366 // __gnu_parallel::less<S, S> >
367 // pivot_pos == std::pair<S, int>*
368 // XXX binder2nd only for RandomAccessIterators??
369 __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
370 pred(comp, *pivot_pos);
372 // Divide, leave pivot unchanged in last place.
373 RandomAccessIterator split_pos1, split_pos2;
374 split_pos1 = begin + parallel_partition(begin, end - 1, pred,
375 get_max_threads());
377 // Left side: < pivot_pos; right side: >= pivot_pos
379 // Swap pivot back to middle.
380 if (split_pos1 != pivot_pos)
381 std::swap(*split_pos1, *pivot_pos);
382 pivot_pos = split_pos1;
384 // In case all elements are equal, split_pos1 == 0
385 if ((split_pos1 + 1 - begin) < (n >> 7)
386 || (end - split_pos1) < (n >> 7))
388 // Very unequal split, one part smaller than one 128th
389 // elements not strictly larger than the pivot.
390 __gnu_parallel::unary_negate<__gnu_parallel::
391 binder1st<Comparator, value_type, value_type, bool>, value_type>
392 pred(__gnu_parallel::binder1st<Comparator, value_type,
393 value_type, bool>(comp, *pivot_pos));
395 // Find other end of pivot-equal range.
396 split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
397 end, pred);
399 else
400 // Only skip the pivot.
401 split_pos2 = split_pos1 + 1;
403 // Compare iterators.
404 if (split_pos2 <= nth)
405 begin = split_pos2;
406 else if (nth < split_pos1)
407 end = split_pos1;
408 else
409 break;
412 // Only at most Settings::partition_minimal_n elements left.
413 __gnu_sequential::sort(begin, end, comp);
416 /** @brief Parallel implementation of std::partial_sort().
417 * @param begin Begin iterator of input sequence.
418 * @param middle Sort until this position.
419 * @param end End iterator of input sequence.
420 * @param comp Comparator. */
421 template<typename RandomAccessIterator, typename Comparator>
422 void
423 parallel_partial_sort(RandomAccessIterator begin,
424 RandomAccessIterator middle,
425 RandomAccessIterator end, Comparator comp)
427 parallel_nth_element(begin, middle, end, comp);
428 std::sort(begin, middle, comp);
431 } //namespace __gnu_parallel
433 #undef _GLIBCXX_VOLATILE
435 #endif