3 // Copyright (C) 2007-2018 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file parallel/partial_sum.h
26 * @brief Parallel implementation of std::partial_sum(), i.e. prefix
28 * This file is a GNU parallel extension to the Standard C++ Library.
31 // Written by Johannes Singler.
33 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
34 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
38 #include <bits/stl_algobase.h>
39 #include <parallel/parallel.h>
40 #include <parallel/numericfwd.h>
42 namespace __gnu_parallel
44 // Problem: there is no 0-element given.
46 /** @brief Base case prefix sum routine.
47 * @param __begin Begin iterator of input sequence.
48 * @param __end End iterator of input sequence.
49 * @param __result Begin iterator of output sequence.
50 * @param __bin_op Associative binary function.
51 * @param __value Start value. Must be passed since the neutral
52 * element is unknown in general.
53 * @return End iterator of output sequence. */
54 template<typename _IIter
,
55 typename _OutputIterator
,
56 typename _BinaryOperation
>
58 __parallel_partial_sum_basecase(_IIter __begin
, _IIter __end
,
59 _OutputIterator __result
,
60 _BinaryOperation __bin_op
,
61 typename
std::iterator_traits
<_IIter
>::value_type __value
)
66 while (__begin
!= __end
)
68 __value
= __bin_op(__value
, *__begin
);
76 /** @brief Parallel partial sum implementation, two-phase approach,
78 * @param __begin Begin iterator of input sequence.
79 * @param __end End iterator of input sequence.
80 * @param __result Begin iterator of output sequence.
81 * @param __bin_op Associative binary function.
82 * @param __n Length of sequence.
83 * @return End iterator of output sequence.
85 template<typename _IIter
,
86 typename _OutputIterator
,
87 typename _BinaryOperation
>
89 __parallel_partial_sum_linear(_IIter __begin
, _IIter __end
,
90 _OutputIterator __result
,
91 _BinaryOperation __bin_op
,
92 typename
std::iterator_traits
<_IIter
>::difference_type __n
)
94 typedef std::iterator_traits
<_IIter
> _TraitsType
;
95 typedef typename
_TraitsType::value_type _ValueType
;
96 typedef typename
_TraitsType::difference_type _DifferenceType
;
101 _ThreadIndex __num_threads
=
102 std::min
<_DifferenceType
>(__get_max_threads(), __n
- 1);
104 if (__num_threads
< 2)
106 *__result
= *__begin
;
107 return __parallel_partial_sum_basecase(__begin
+ 1, __end
,
108 __result
+ 1, __bin_op
,
112 _DifferenceType
* __borders
;
115 const _Settings
& __s
= _Settings::get();
117 # pragma omp parallel num_threads(__num_threads)
121 __num_threads
= omp_get_num_threads();
123 __borders
= new _DifferenceType
[__num_threads
+ 2];
125 if (__s
.partial_sum_dilation
== 1.0f
)
126 __equally_split(__n
, __num_threads
+ 1, __borders
);
129 _DifferenceType __first_part_length
=
130 std::max
<_DifferenceType
>(1,
131 __n
/ (1.0f
+ __s
.partial_sum_dilation
* __num_threads
));
132 _DifferenceType __chunk_length
=
133 (__n
- __first_part_length
) / __num_threads
;
134 _DifferenceType __borderstart
=
135 __n
- __num_threads
* __chunk_length
;
137 for (_ThreadIndex __i
= 1; __i
< (__num_threads
+ 1); ++__i
)
139 __borders
[__i
] = __borderstart
;
140 __borderstart
+= __chunk_length
;
142 __borders
[__num_threads
+ 1] = __n
;
145 __sums
= static_cast<_ValueType
*>(::operator new(sizeof(_ValueType
)
147 _OutputIterator __target_end
;
150 _ThreadIndex __iam
= omp_get_thread_num();
153 *__result
= *__begin
;
154 __parallel_partial_sum_basecase(__begin
+ 1,
155 __begin
+ __borders
[1],
158 ::new(&(__sums
[__iam
])) _ValueType(*(__result
+ __borders
[1] - 1));
162 ::new(&(__sums
[__iam
]))
163 _ValueType(__gnu_parallel::accumulate(
164 __begin
+ __borders
[__iam
] + 1,
165 __begin
+ __borders
[__iam
+ 1],
166 *(__begin
+ __borders
[__iam
]),
168 __gnu_parallel::sequential_tag()));
174 __parallel_partial_sum_basecase(__sums
+ 1, __sums
+ __num_threads
,
175 __sums
+ 1, __bin_op
, __sums
[0]);
180 __parallel_partial_sum_basecase(__begin
+ __borders
[__iam
+ 1],
181 __begin
+ __borders
[__iam
+ 2],
182 __result
+ __borders
[__iam
+ 1],
183 __bin_op
, __sums
[__iam
]);
186 for (_ThreadIndex __i
= 0; __i
< __num_threads
; ++__i
)
187 __sums
[__i
].~_ValueType();
188 ::operator delete(__sums
);
192 return __result
+ __n
;
195 /** @brief Parallel partial sum front-__end.
196 * @param __begin Begin iterator of input sequence.
197 * @param __end End iterator of input sequence.
198 * @param __result Begin iterator of output sequence.
199 * @param __bin_op Associative binary function.
200 * @return End iterator of output sequence. */
201 template<typename _IIter
,
202 typename _OutputIterator
,
203 typename _BinaryOperation
>
205 __parallel_partial_sum(_IIter __begin
, _IIter __end
,
206 _OutputIterator __result
, _BinaryOperation __bin_op
)
208 _GLIBCXX_CALL(__begin
- __end
)
210 typedef std::iterator_traits
<_IIter
> _TraitsType
;
211 typedef typename
_TraitsType::value_type _ValueType
;
212 typedef typename
_TraitsType::difference_type _DifferenceType
;
214 _DifferenceType __n
= __end
- __begin
;
216 switch (_Settings::get().partial_sum_algorithm
)
219 // Need an initial offset.
220 return __parallel_partial_sum_linear(__begin
, __end
, __result
,
223 // Partial_sum algorithm not implemented.
224 _GLIBCXX_PARALLEL_ASSERT(0);
225 return __result
+ __n
;
230 #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */