3 // Copyright (C) 2007 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
31 /** @file parallel/multiway_mergesort.h
32 * @brief Parallel multiway merge sort.
33 * This file is a GNU parallel extension to the Standard C++ Library.
36 // Written by Johannes Singler.
38 #ifndef _GLIBCXX_PARALLEL_MERGESORT_H
39 #define _GLIBCXX_PARALLEL_MERGESORT_H 1
43 #include <parallel/basic_iterator.h>
44 #include <bits/stl_algo.h>
45 #include <parallel/parallel.h>
46 #include <parallel/multiway_merge.h>
47 #include <parallel/timing.h>
49 namespace __gnu_parallel
52 /** @brief Subsequence description. */
53 template<typename _DifferenceTp
>
56 typedef _DifferenceTp difference_type
;
58 /** @brief Begin of subsequence. */
59 difference_type begin
;
61 /** @brief End of subsequence. */
65 /** @brief Data accessed by all threads.
67 * PMWMS = parallel multiway mergesort */
68 template<typename RandomAccessIterator
>
69 struct PMWMSSortingData
71 typedef std::iterator_traits
<RandomAccessIterator
> traits_type
;
72 typedef typename
traits_type::value_type value_type
;
73 typedef typename
traits_type::difference_type difference_type
;
75 /** @brief Input begin. */
76 RandomAccessIterator source
;
78 /** @brief Start indices, per thread. */
79 difference_type
* starts
;
81 /** @brief Temporary arrays for each thread.
83 * Indirection Allows using the temporary storage in different
84 * ways, without code duplication.
85 * @see _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST */
86 value_type
** temporaries
;
88 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
89 /** @brief Storage in which to sort. */
90 RandomAccessIterator
* sorting_places
;
92 /** @brief Storage into which to merge. */
93 value_type
** merging_places
;
95 /** @brief Storage in which to sort. */
96 value_type
** sorting_places
;
98 /** @brief Storage into which to merge. */
99 RandomAccessIterator
* merging_places
;
101 /** @brief Samples. */
104 /** @brief Offsets to add to the found positions. */
105 difference_type
* offsets
;
107 /** @brief Pieces of data to merge @c [thread][sequence] */
108 std::vector
<Piece
<difference_type
> >* pieces
;
111 /** @brief Thread local data for PMWMS. */
112 template<typename RandomAccessIterator
>
115 /** @brief Total number of thread involved. */
116 thread_index_t num_threads
;
117 /** @brief Number of owning thread. */
119 /** @brief Stable sorting desired. */
121 /** @brief Pointer to global data. */
122 PMWMSSortingData
<RandomAccessIterator
>* sd
;
126 * @brief Select samples from a sequence.
127 * @param d Pointer to thread-local data. Result will be placed in
129 * @param num_samples Number of samples to select.
131 template<typename RandomAccessIterator
, typename _DifferenceTp
>
133 determine_samples(PMWMSSorterPU
<RandomAccessIterator
>* d
,
134 _DifferenceTp
& num_samples
)
136 typedef _DifferenceTp difference_type
;
138 PMWMSSortingData
<RandomAccessIterator
>* sd
= d
->sd
;
140 num_samples
= Settings::sort_mwms_oversampling
* d
->num_threads
- 1;
142 difference_type es
[num_samples
+ 2];
143 equally_split(sd
->starts
[d
->iam
+ 1] - sd
->starts
[d
->iam
], num_samples
+ 1, es
);
145 for (difference_type i
= 0; i
< num_samples
; i
++)
146 sd
->samples
[d
->iam
* num_samples
+ i
] = sd
->source
[sd
->starts
[d
->iam
] + es
[i
+ 1]];
149 /** @brief PMWMS code executed by each thread.
150 * @param d Pointer to thread-local data.
151 * @param comp Comparator.
153 template<typename RandomAccessIterator
, typename Comparator
>
155 parallel_sort_mwms_pu(PMWMSSorterPU
<RandomAccessIterator
>* d
,
158 typedef std::iterator_traits
<RandomAccessIterator
> traits_type
;
159 typedef typename
traits_type::value_type value_type
;
160 typedef typename
traits_type::difference_type difference_type
;
162 Timing
<sequential_tag
> t
;
166 PMWMSSortingData
<RandomAccessIterator
>* sd
= d
->sd
;
167 thread_index_t iam
= d
->iam
;
169 // Length of this thread's chunk, before merging.
170 difference_type length_local
= sd
->starts
[iam
+ 1] - sd
->starts
[iam
];
172 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
173 typedef RandomAccessIterator SortingPlacesIterator
;
175 // Sort in input storage.
176 sd
->sorting_places
[iam
] = sd
->source
+ sd
->starts
[iam
];
178 typedef value_type
* SortingPlacesIterator
;
180 // Sort in temporary storage, leave space for sentinel.
181 sd
->sorting_places
[iam
] = sd
->temporaries
[iam
] = static_cast<value_type
*>(::operator new(sizeof(value_type
) *(length_local
+ 1)));
184 std::uninitialized_copy(sd
->source
+ sd
->starts
[iam
], sd
->source
+ sd
->starts
[iam
] + length_local
, sd
->sorting_places
[iam
]);
189 __gnu_sequential::stable_sort(sd
->sorting_places
[iam
], sd
->sorting_places
[iam
] + length_local
, comp
);
191 __gnu_sequential::sort(sd
->sorting_places
[iam
], sd
->sorting_places
[iam
] + length_local
, comp
);
193 #if _GLIBCXX_ASSERTIONS
194 _GLIBCXX_PARALLEL_ASSERT(is_sorted(sd
->sorting_places
[iam
], sd
->sorting_places
[iam
] + length_local
, comp
));
197 // Invariant: locally sorted subsequence in sd->sorting_places[iam],
198 // sd->sorting_places[iam] + length_local.
201 if (Settings::sort_splitting
== Settings::SAMPLING
)
203 difference_type num_samples
;
204 determine_samples(d
, num_samples
);
208 t
.tic("sample/wait");
211 __gnu_sequential::sort(sd
->samples
, sd
->samples
+ (num_samples
* d
->num_threads
), comp
);
215 for (int s
= 0; s
< d
->num_threads
; s
++)
217 // For each sequence.
218 if (num_samples
* iam
> 0)
219 sd
->pieces
[iam
][s
].begin
= std::lower_bound(sd
->sorting_places
[s
],
220 sd
->sorting_places
[s
] + sd
->starts
[s
+ 1] - sd
->starts
[s
],
221 sd
->samples
[num_samples
* iam
],
223 - sd
->sorting_places
[s
];
225 // Absolute beginning.
226 sd
->pieces
[iam
][s
].begin
= 0;
228 if ((num_samples
* (iam
+ 1)) < (num_samples
* d
->num_threads
))
229 sd
->pieces
[iam
][s
].end
= std::lower_bound(sd
->sorting_places
[s
],
230 sd
->sorting_places
[s
] + sd
->starts
[s
+ 1] - sd
->starts
[s
], sd
->samples
[num_samples
* (iam
+ 1)], comp
)
231 - sd
->sorting_places
[s
];
234 sd
->pieces
[iam
][s
].end
= sd
->starts
[s
+ 1] - sd
->starts
[s
];
238 else if (Settings::sort_splitting
== Settings::EXACT
)
244 std::vector
<std::pair
<SortingPlacesIterator
, SortingPlacesIterator
> > seqs(d
->num_threads
);
245 for (int s
= 0; s
< d
->num_threads
; s
++)
246 seqs
[s
] = std::make_pair(sd
->sorting_places
[s
], sd
->sorting_places
[s
] + sd
->starts
[s
+ 1] - sd
->starts
[s
]);
248 std::vector
<SortingPlacesIterator
> offsets(d
->num_threads
);
250 // If not last thread.
251 if (iam
< d
->num_threads
- 1)
252 multiseq_partition(seqs
.begin(), seqs
.end(), sd
->starts
[iam
+ 1], offsets
.begin(), comp
);
254 for (int seq
= 0; seq
< d
->num_threads
; seq
++)
256 // For each sequence.
257 if (iam
< (d
->num_threads
- 1))
258 sd
->pieces
[iam
][seq
].end
= offsets
[seq
] - seqs
[seq
].first
;
260 // Absolute end of this sequence.
261 sd
->pieces
[iam
][seq
].end
= sd
->starts
[seq
+ 1] - sd
->starts
[seq
];
266 for (int seq
= 0; seq
< d
->num_threads
; seq
++)
268 // For each sequence.
270 sd
->pieces
[iam
][seq
].begin
= sd
->pieces
[iam
- 1][seq
].end
;
272 // Absolute beginning.
273 sd
->pieces
[iam
][seq
].begin
= 0;
279 // Offset from target begin, length after merging.
280 difference_type offset
= 0, length_am
= 0;
281 for (int s
= 0; s
< d
->num_threads
; s
++)
283 length_am
+= sd
->pieces
[iam
][s
].end
- sd
->pieces
[iam
][s
].begin
;
284 offset
+= sd
->pieces
[iam
][s
].begin
;
287 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
288 // Merge to temporary storage, uninitialized creation not possible
289 // since there is no multiway_merge calling the placement new
290 // instead of the assignment operator.
291 sd
->merging_places
[iam
] = sd
->temporaries
[iam
] = new value_type
[length_am
];
293 // Merge directly to target.
294 sd
->merging_places
[iam
] = sd
->source
+ offset
;
296 std::vector
<std::pair
<SortingPlacesIterator
, SortingPlacesIterator
> > seqs(d
->num_threads
);
298 for (int s
= 0; s
< d
->num_threads
; s
++)
300 seqs
[s
] = std::make_pair(sd
->sorting_places
[s
] + sd
->pieces
[iam
][s
].begin
, sd
->sorting_places
[s
] + sd
->pieces
[iam
][s
].end
);
302 #if _GLIBCXX_ASSERTIONS
303 _GLIBCXX_PARALLEL_ASSERT(is_sorted(seqs
[s
].first
, seqs
[s
].second
, comp
));
307 multiway_merge(seqs
.begin(), seqs
.end(), sd
->merging_places
[iam
], comp
, length_am
, d
->stable
, false, sequential_tag());
311 #if _GLIBCXX_ASSERTIONS
312 _GLIBCXX_PARALLEL_ASSERT(is_sorted(sd
->merging_places
[iam
], sd
->merging_places
[iam
] + length_am
, comp
));
317 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
319 std::copy(sd
->merging_places
[iam
], sd
->merging_places
[iam
] + length_am
,
320 sd
->source
+ offset
);
323 delete[] sd
->temporaries
[iam
];
330 /** @brief PMWMS main call.
331 * @param begin Begin iterator of sequence.
332 * @param end End iterator of sequence.
333 * @param comp Comparator.
334 * @param n Length of sequence.
335 * @param num_threads Number of threads to use.
336 * @param stable Stable sorting.
338 template<typename RandomAccessIterator
, typename Comparator
>
340 parallel_sort_mwms(RandomAccessIterator begin
, RandomAccessIterator end
, Comparator comp
, typename
std::iterator_traits
<RandomAccessIterator
>::difference_type n
, int num_threads
, bool stable
)
344 typedef std::iterator_traits
<RandomAccessIterator
> traits_type
;
345 typedef typename
traits_type::value_type value_type
;
346 typedef typename
traits_type::difference_type difference_type
;
351 // At least one element per thread.
353 num_threads
= static_cast<thread_index_t
>(n
);
355 PMWMSSortingData
<RandomAccessIterator
> sd
;
358 sd
.temporaries
= new value_type
*[num_threads
];
360 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
361 sd
.sorting_places
= new RandomAccessIterator
[num_threads
];
362 sd
.merging_places
= new value_type
*[num_threads
];
364 sd
.sorting_places
= new value_type
*[num_threads
];
365 sd
.merging_places
= new RandomAccessIterator
[num_threads
];
368 if (Settings::sort_splitting
== Settings::SAMPLING
)
369 sd
.samples
= new value_type
[num_threads
* (Settings::sort_mwms_oversampling
* num_threads
- 1)];
373 sd
.offsets
= new difference_type
[num_threads
- 1];
374 sd
.pieces
= new std::vector
<Piece
<difference_type
> >[num_threads
];
375 for (int s
= 0; s
< num_threads
; s
++)
376 sd
.pieces
[s
].resize(num_threads
);
377 PMWMSSorterPU
<RandomAccessIterator
>* pus
= new PMWMSSorterPU
<RandomAccessIterator
>[num_threads
];
378 difference_type
* starts
= sd
.starts
= new difference_type
[num_threads
+ 1];
380 difference_type chunk_length
= n
/ num_threads
, split
= n
% num_threads
, start
= 0;
381 for (int i
= 0; i
< num_threads
; i
++)
384 start
+= (i
< split
) ? (chunk_length
+ 1) : chunk_length
;
385 pus
[i
].num_threads
= num_threads
;
388 pus
[i
].stable
= stable
;
390 starts
[num_threads
] = start
;
392 // Now sort in parallel.
393 #pragma omp parallel num_threads(num_threads)
394 parallel_sort_mwms_pu(&(pus
[omp_get_thread_num()]), comp
);
398 delete[] sd
.temporaries
;
399 delete[] sd
.sorting_places
;
400 delete[] sd
.merging_places
;
402 if (Settings::sort_splitting
== Settings::SAMPLING
)