2 //===-- unseq_backend_simd.h ----------------------------------------------===//
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 #ifndef _PSTL_UNSEQ_BACKEND_SIMD_H
11 #define _PSTL_UNSEQ_BACKEND_SIMD_H
13 #include <type_traits>
17 // This header defines the minimum set of vector routines required
18 // to support parallel STL.
21 namespace __unseq_backend
24 // Expect vector width up to 64 (or 512 bit)
25 const std::size_t __lane_size
= 64;
27 template <class _Iterator
, class _DifferenceType
, class _Function
>
29 __simd_walk_1(_Iterator __first
, _DifferenceType __n
, _Function __f
) noexcept
32 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
38 template <class _Iterator1
, class _DifferenceType
, class _Iterator2
, class _Function
>
40 __simd_walk_2(_Iterator1 __first1
, _DifferenceType __n
, _Iterator2 __first2
, _Function __f
) noexcept
43 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
44 __f(__first1
[__i
], __first2
[__i
]);
45 return __first2
+ __n
;
48 template <class _Iterator1
, class _DifferenceType
, class _Iterator2
, class _Iterator3
, class _Function
>
50 __simd_walk_3(_Iterator1 __first1
, _DifferenceType __n
, _Iterator2 __first2
, _Iterator3 __first3
,
51 _Function __f
) noexcept
54 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
55 __f(__first1
[__i
], __first2
[__i
], __first3
[__i
]);
56 return __first3
+ __n
;
59 // TODO: check whether __simd_first() can be used here
60 template <class _Index
, class _DifferenceType
, class _Pred
>
62 __simd_or(_Index __first
, _DifferenceType __n
, _Pred __pred
) noexcept
64 #if defined(_PSTL_EARLYEXIT_PRESENT)
66 _PSTL_PRAGMA_VECTOR_UNALIGNED
67 _PSTL_PRAGMA_SIMD_EARLYEXIT
68 for (__i
= 0; __i
< __n
; ++__i
)
69 if (__pred(__first
[__i
]))
73 _DifferenceType __block_size
= 4 < __n
? 4 : __n
;
74 const _Index __last
= __first
+ __n
;
75 while (__last
!= __first
)
77 __INT32_TYPE__ __flag
= 1;
78 _PSTL_PRAGMA_SIMD_REDUCTION(& : __flag
)
79 for (_DifferenceType __i
= 0; __i
< __block_size
; ++__i
)
80 if (__pred(*(__first
+ __i
)))
85 __first
+= __block_size
;
86 if (__last
- __first
>= __block_size
<< 1)
88 // Double the block _Size. Any unnecessary iterations can be amortized against work done so far.
93 __block_size
= __last
- __first
;
100 template <class _Index
, class _DifferenceType
, class _Compare
>
102 __simd_first(_Index __first
, _DifferenceType __begin
, _DifferenceType __end
, _Compare __comp
) noexcept
104 #if defined(_PSTL_EARLYEXIT_PRESENT)
105 _DifferenceType __i
= __begin
;
106 _PSTL_PRAGMA_VECTOR_UNALIGNED
// Do not generate peel loop part
107 _PSTL_PRAGMA_SIMD_EARLYEXIT
for (; __i
< __end
; ++__i
)
109 if (__comp(__first
, __i
))
114 return __first
+ __i
;
116 // Experiments show good block sizes like this
117 const _DifferenceType __block_size
= 8;
118 alignas(__lane_size
) _DifferenceType __lane
[__block_size
] = {0};
119 while (__end
- __begin
>= __block_size
)
121 _DifferenceType __found
= 0;
122 _PSTL_PRAGMA_VECTOR_UNALIGNED
// Do not generate peel loop part
123 _PSTL_PRAGMA_SIMD_REDUCTION(|
124 : __found
) for (_DifferenceType __i
= __begin
; __i
< __begin
+ __block_size
;
127 const _DifferenceType __t
= __comp(__first
, __i
);
128 __lane
[__i
- __begin
] = __t
;
134 // This will vectorize
135 for (__i
= 0; __i
< __block_size
; ++__i
)
142 return __first
+ __begin
+ __i
;
144 __begin
+= __block_size
;
147 //Keep remainder scalar
148 while (__begin
!= __end
)
150 if (__comp(__first
, __begin
))
152 return __first
+ __begin
;
156 return __first
+ __end
;
157 #endif //_PSTL_EARLYEXIT_PRESENT
160 template <class _Index1
, class _DifferenceType
, class _Index2
, class _Pred
>
161 std::pair
<_Index1
, _Index2
>
162 __simd_first(_Index1 __first1
, _DifferenceType __n
, _Index2 __first2
, _Pred __pred
) noexcept
164 #if defined(_PSTL_EARLYEXIT_PRESENT)
165 _DifferenceType __i
= 0;
166 _PSTL_PRAGMA_VECTOR_UNALIGNED
167 _PSTL_PRAGMA_SIMD_EARLYEXIT
168 for (; __i
< __n
; ++__i
)
169 if (__pred(__first1
[__i
], __first2
[__i
]))
171 return std::make_pair(__first1
+ __i
, __first2
+ __i
);
173 const _Index1 __last1
= __first1
+ __n
;
174 const _Index2 __last2
= __first2
+ __n
;
175 // Experiments show good block sizes like this
176 const _DifferenceType __block_size
= 8;
177 alignas(__lane_size
) _DifferenceType __lane
[__block_size
] = {0};
178 while (__last1
- __first1
>= __block_size
)
180 _DifferenceType __found
= 0;
182 _PSTL_PRAGMA_VECTOR_UNALIGNED
// Do not generate peel loop part
183 _PSTL_PRAGMA_SIMD_REDUCTION(|
184 : __found
) for (__i
= 0; __i
< __block_size
; ++__i
)
186 const _DifferenceType __t
= __pred(__first1
[__i
], __first2
[__i
]);
192 _DifferenceType __i2
;
193 // This will vectorize
194 for (__i2
= 0; __i2
< __block_size
; ++__i2
)
199 return std::make_pair(__first1
+ __i2
, __first2
+ __i2
);
201 __first1
+= __block_size
;
202 __first2
+= __block_size
;
205 //Keep remainder scalar
206 for (; __last1
!= __first1
; ++__first1
, ++__first2
)
207 if (__pred(*(__first1
), *(__first2
)))
208 return std::make_pair(__first1
, __first2
);
210 return std::make_pair(__last1
, __last2
);
211 #endif //_PSTL_EARLYEXIT_PRESENT
214 template <class _Index
, class _DifferenceType
, class _Pred
>
216 __simd_count(_Index __index
, _DifferenceType __n
, _Pred __pred
) noexcept
218 _DifferenceType __count
= 0;
219 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count
)
220 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
221 if (__pred(*(__index
+ __i
)))
227 template <class _InputIterator
, class _DifferenceType
, class _OutputIterator
, class _BinaryPredicate
>
229 __simd_unique_copy(_InputIterator __first
, _DifferenceType __n
, _OutputIterator __result
,
230 _BinaryPredicate __pred
) noexcept
235 _DifferenceType __cnt
= 1;
236 __result
[0] = __first
[0];
239 for (_DifferenceType __i
= 1; __i
< __n
; ++__i
)
241 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt
: 1)
242 if (!__pred(__first
[__i
], __first
[__i
- 1]))
244 __result
[__cnt
] = __first
[__i
];
248 return __result
+ __cnt
;
251 template <class _InputIterator
, class _DifferenceType
, class _OutputIterator
, class _Assigner
>
253 __simd_assign(_InputIterator __first
, _DifferenceType __n
, _OutputIterator __result
, _Assigner __assigner
) noexcept
255 _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
257 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
258 __assigner(__first
+ __i
, __result
+ __i
);
259 return __result
+ __n
;
262 template <class _InputIterator
, class _DifferenceType
, class _OutputIterator
, class _UnaryPredicate
>
264 __simd_copy_if(_InputIterator __first
, _DifferenceType __n
, _OutputIterator __result
, _UnaryPredicate __pred
) noexcept
266 _DifferenceType __cnt
= 0;
269 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
271 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt
: 1)
272 if (__pred(__first
[__i
]))
274 __result
[__cnt
] = __first
[__i
];
278 return __result
+ __cnt
;
281 template <class _InputIterator
, class _DifferenceType
, class _BinaryPredicate
>
283 __simd_calc_mask_2(_InputIterator __first
, _DifferenceType __n
, bool* __mask
, _BinaryPredicate __pred
) noexcept
285 _DifferenceType __count
= 0;
287 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count
)
288 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
290 __mask
[__i
] = !__pred(__first
[__i
], __first
[__i
- 1]);
291 __count
+= __mask
[__i
];
296 template <class _InputIterator
, class _DifferenceType
, class _UnaryPredicate
>
298 __simd_calc_mask_1(_InputIterator __first
, _DifferenceType __n
, bool* __mask
, _UnaryPredicate __pred
) noexcept
300 _DifferenceType __count
= 0;
302 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count
)
303 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
305 __mask
[__i
] = __pred(__first
[__i
]);
306 __count
+= __mask
[__i
];
311 template <class _InputIterator
, class _DifferenceType
, class _OutputIterator
, class _Assigner
>
313 __simd_copy_by_mask(_InputIterator __first
, _DifferenceType __n
, _OutputIterator __result
, bool* __mask
,
314 _Assigner __assigner
) noexcept
316 _DifferenceType __cnt
= 0;
318 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
322 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt
: 1)
324 __assigner(__first
+ __i
, __result
+ __cnt
);
331 template <class _InputIterator
, class _DifferenceType
, class _OutputIterator1
, class _OutputIterator2
>
333 __simd_partition_by_mask(_InputIterator __first
, _DifferenceType __n
, _OutputIterator1 __out_true
,
334 _OutputIterator2 __out_false
, bool* __mask
) noexcept
336 _DifferenceType __cnt_true
= 0, __cnt_false
= 0;
338 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
340 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(__cnt_true
: 1, __cnt_false
: 1)
343 __out_true
[__cnt_true
] = __first
[__i
];
348 __out_false
[__cnt_false
] = __first
[__i
];
354 template <class _Index
, class _DifferenceType
, class _Tp
>
356 __simd_fill_n(_Index __first
, _DifferenceType __n
, const _Tp
& __value
) noexcept
358 _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
360 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
361 __first
[__i
] = __value
;
362 return __first
+ __n
;
365 template <class _Index
, class _DifferenceType
, class _Generator
>
367 __simd_generate_n(_Index __first
, _DifferenceType __size
, _Generator __g
) noexcept
369 _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
371 for (_DifferenceType __i
= 0; __i
< __size
; ++__i
)
372 __first
[__i
] = __g();
373 return __first
+ __size
;
376 template <class _Index
, class _BinaryPredicate
>
378 __simd_adjacent_find(_Index __first
, _Index __last
, _BinaryPredicate __pred
, bool __or_semantic
) noexcept
380 if (__last
- __first
< 2)
383 typedef typename
std::iterator_traits
<_Index
>::difference_type _DifferenceType
;
384 _DifferenceType __i
= 0;
386 #if defined(_PSTL_EARLYEXIT_PRESENT)
387 //Some compiler versions fail to compile the following loop when iterators are used. Indices are used instead
388 const _DifferenceType __n
= __last
- __first
- 1;
389 _PSTL_PRAGMA_VECTOR_UNALIGNED
390 _PSTL_PRAGMA_SIMD_EARLYEXIT
391 for (; __i
< __n
; ++__i
)
392 if (__pred(__first
[__i
], __first
[__i
+ 1]))
395 return __i
< __n
? __first
+ __i
: __last
;
397 // Experiments show good block sizes like this
398 //TODO: to consider tuning block_size for various data types
399 const _DifferenceType __block_size
= 8;
400 alignas(__lane_size
) _DifferenceType __lane
[__block_size
] = {0};
401 while (__last
- __first
>= __block_size
)
403 _DifferenceType __found
= 0;
404 _PSTL_PRAGMA_VECTOR_UNALIGNED
// Do not generate peel loop part
405 _PSTL_PRAGMA_SIMD_REDUCTION(|
406 : __found
) for (__i
= 0; __i
< __block_size
- 1; ++__i
)
408 //TODO: to improve SIMD vectorization
409 const _DifferenceType __t
= __pred(*(__first
+ __i
), *(__first
+ __i
+ 1));
414 //Process a pair of elements on a boundary of a data block
415 if (__first
+ __block_size
< __last
&& __pred(*(__first
+ __i
), *(__first
+ __i
+ 1)))
416 __lane
[__i
] = __found
= 1;
423 // This will vectorize
424 for (__i
= 0; __i
< __block_size
; ++__i
)
427 return __first
+ __i
; //As far as found is true a __result (__lane[__i] is true) is guaranteed
429 __first
+= __block_size
;
431 //Process the rest elements
432 for (; __last
- __first
> 1; ++__first
)
433 if (__pred(*__first
, *(__first
+ 1)))
440 // It was created to reduce the code inside std::enable_if
441 template <typename _Tp
, typename _BinaryOperation
>
442 using is_arithmetic_plus
= std::integral_constant
<bool, std::is_arithmetic
<_Tp
>::value
&&
443 std::is_same
<_BinaryOperation
, std::plus
<_Tp
>>::value
>;
445 template <typename _DifferenceType
, typename _Tp
, typename _BinaryOperation
, typename _UnaryOperation
>
446 typename
std::enable_if
<is_arithmetic_plus
<_Tp
, _BinaryOperation
>::value
, _Tp
>::type
447 __simd_transform_reduce(_DifferenceType __n
, _Tp __init
, _BinaryOperation
, _UnaryOperation __f
) noexcept
449 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init
)
450 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
455 template <typename _Size
, typename _Tp
, typename _BinaryOperation
, typename _UnaryOperation
>
456 typename
std::enable_if
<!is_arithmetic_plus
<_Tp
, _BinaryOperation
>::value
, _Tp
>::type
457 __simd_transform_reduce(_Size __n
, _Tp __init
, _BinaryOperation __binary_op
, _UnaryOperation __f
) noexcept
459 const _Size __block_size
= __lane_size
/ sizeof(_Tp
);
460 if (__n
> 2 * __block_size
&& __block_size
> 1)
462 alignas(__lane_size
) char __lane_
[__lane_size
];
463 _Tp
* __lane
= reinterpret_cast<_Tp
*>(__lane_
);
467 for (_Size __i
= 0; __i
< __block_size
; ++__i
)
469 ::new (__lane
+ __i
) _Tp(__binary_op(__f(__i
), __f(__block_size
+ __i
)));
472 _Size __i
= 2 * __block_size
;
473 const _Size last_iteration
= __block_size
* (__n
/ __block_size
);
474 for (; __i
< last_iteration
; __i
+= __block_size
)
477 for (_Size __j
= 0; __j
< __block_size
; ++__j
)
479 __lane
[__j
] = __binary_op(__lane
[__j
], __f(__i
+ __j
));
484 for (_Size __j
= 0; __j
< __n
- last_iteration
; ++__j
)
486 __lane
[__j
] = __binary_op(__lane
[__j
], __f(last_iteration
+ __j
));
489 for (_Size __j
= 0; __j
< __block_size
; ++__j
)
491 __init
= __binary_op(__init
, __lane
[__j
]);
495 for (_Size __j
= 0; __j
< __block_size
; ++__j
)
502 for (_Size __i
= 0; __i
< __n
; ++__i
)
504 __init
= __binary_op(__init
, __f(__i
));
510 // Exclusive scan for "+" and arithmetic types
511 template <class _InputIterator
, class _Size
, class _OutputIterator
, class _UnaryOperation
, class _Tp
,
512 class _BinaryOperation
>
513 typename
std::enable_if
<is_arithmetic_plus
<_Tp
, _BinaryOperation
>::value
, std::pair
<_OutputIterator
, _Tp
>>::type
514 __simd_scan(_InputIterator __first
, _Size __n
, _OutputIterator __result
, _UnaryOperation __unary_op
, _Tp __init
,
515 _BinaryOperation
, /*Inclusive*/ std::false_type
)
517 _PSTL_PRAGMA_SIMD_SCAN(+ : __init
)
518 for (_Size __i
= 0; __i
< __n
; ++__i
)
520 __result
[__i
] = __init
;
521 _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init
)
522 __init
+= __unary_op(__first
[__i
]);
524 return std::make_pair(__result
+ __n
, __init
);
527 // As soon as we cannot call __binary_op in "combiner" we create a wrapper over _Tp to encapsulate __binary_op
528 template <typename _Tp
, typename _BinaryOp
>
532 _BinaryOp
* __bin_op
; // Here is a pointer to function because of default ctor
534 _Combiner() : __value
{}, __bin_op(nullptr) {}
535 _Combiner(const _Tp
& __v
, const _BinaryOp
* __b
) : __value(__v
), __bin_op(const_cast<_BinaryOp
*>(__b
)) {}
536 _Combiner(const _Combiner
& __obj
) : __value
{}, __bin_op(__obj
.__bin_op
) {}
539 operator()(const _Combiner
& __obj
)
541 __value
= (*__bin_op
)(__value
, __obj
.__value
);
545 // Exclusive scan for other binary operations and types
546 template <class _InputIterator
, class _Size
, class _OutputIterator
, class _UnaryOperation
, class _Tp
,
547 class _BinaryOperation
>
548 typename
std::enable_if
<!is_arithmetic_plus
<_Tp
, _BinaryOperation
>::value
, std::pair
<_OutputIterator
, _Tp
>>::type
549 __simd_scan(_InputIterator __first
, _Size __n
, _OutputIterator __result
, _UnaryOperation __unary_op
, _Tp __init
,
550 _BinaryOperation __binary_op
, /*Inclusive*/ std::false_type
)
552 typedef _Combiner
<_Tp
, _BinaryOperation
> _CombinerType
;
553 _CombinerType __init_
{__init
, &__binary_op
};
555 _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op
, _CombinerType
)
557 _PSTL_PRAGMA_SIMD_SCAN(__bin_op
: __init_
)
558 for (_Size __i
= 0; __i
< __n
; ++__i
)
560 __result
[__i
] = __init_
.__value
;
561 _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init_
)
562 _PSTL_PRAGMA_FORCEINLINE
563 __init_
.__value
= __binary_op(__init_
.__value
, __unary_op(__first
[__i
]));
565 return std::make_pair(__result
+ __n
, __init_
.__value
);
568 // Inclusive scan for "+" and arithmetic types
569 template <class _InputIterator
, class _Size
, class _OutputIterator
, class _UnaryOperation
, class _Tp
,
570 class _BinaryOperation
>
571 typename
std::enable_if
<is_arithmetic_plus
<_Tp
, _BinaryOperation
>::value
, std::pair
<_OutputIterator
, _Tp
>>::type
572 __simd_scan(_InputIterator __first
, _Size __n
, _OutputIterator __result
, _UnaryOperation __unary_op
, _Tp __init
,
573 _BinaryOperation
, /*Inclusive*/ std::true_type
)
575 _PSTL_PRAGMA_SIMD_SCAN(+ : __init
)
576 for (_Size __i
= 0; __i
< __n
; ++__i
)
578 __init
+= __unary_op(__first
[__i
]);
579 _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init
)
580 __result
[__i
] = __init
;
582 return std::make_pair(__result
+ __n
, __init
);
585 // Inclusive scan for other binary operations and types
586 template <class _InputIterator
, class _Size
, class _OutputIterator
, class _UnaryOperation
, class _Tp
,
587 class _BinaryOperation
>
588 typename
std::enable_if
<!is_arithmetic_plus
<_Tp
, _BinaryOperation
>::value
, std::pair
<_OutputIterator
, _Tp
>>::type
589 __simd_scan(_InputIterator __first
, _Size __n
, _OutputIterator __result
, _UnaryOperation __unary_op
, _Tp __init
,
590 _BinaryOperation __binary_op
, std::true_type
)
592 typedef _Combiner
<_Tp
, _BinaryOperation
> _CombinerType
;
593 _CombinerType __init_
{__init
, &__binary_op
};
595 _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op
, _CombinerType
)
597 _PSTL_PRAGMA_SIMD_SCAN(__bin_op
: __init_
)
598 for (_Size __i
= 0; __i
< __n
; ++__i
)
600 _PSTL_PRAGMA_FORCEINLINE
601 __init_
.__value
= __binary_op(__init_
.__value
, __unary_op(__first
[__i
]));
602 _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init_
)
603 __result
[__i
] = __init_
.__value
;
605 return std::make_pair(__result
+ __n
, __init_
.__value
);
608 // [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible.
609 // complexity [violation] - We will have at most (__n-1 + number_of_lanes) comparisons instead of at most __n-1.
610 template <typename _ForwardIterator
, typename _Size
, typename _Compare
>
612 __simd_min_element(_ForwardIterator __first
, _Size __n
, _Compare __comp
) noexcept
619 typedef typename
std::iterator_traits
<_ForwardIterator
>::value_type _ValueType
;
622 _ValueType __min_val
;
624 _Compare
* __min_comp
;
626 _ComplexType() : __min_val
{}, __min_ind
{}, __min_comp(nullptr) {}
627 _ComplexType(const _ValueType
& val
, const _Compare
* comp
)
628 : __min_val(val
), __min_ind(0), __min_comp(const_cast<_Compare
*>(comp
))
631 _ComplexType(const _ComplexType
& __obj
)
632 : __min_val(__obj
.__min_val
), __min_ind(__obj
.__min_ind
), __min_comp(__obj
.__min_comp
)
636 _PSTL_PRAGMA_DECLARE_SIMD
638 operator()(const _ComplexType
& __obj
)
640 if (!(*__min_comp
)(__min_val
, __obj
.__min_val
) &&
641 ((*__min_comp
)(__obj
.__min_val
, __min_val
) || __obj
.__min_ind
- __min_ind
< 0))
643 __min_val
= __obj
.__min_val
;
644 __min_ind
= __obj
.__min_ind
;
649 _ComplexType __init
{*__first
, &__comp
};
651 _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func
, _ComplexType
)
653 _PSTL_PRAGMA_SIMD_REDUCTION(__min_func
: __init
)
654 for (_Size __i
= 1; __i
< __n
; ++__i
)
656 const _ValueType __min_val
= __init
.__min_val
;
657 const _ValueType __current
= __first
[__i
];
658 if (__comp(__current
, __min_val
))
660 __init
.__min_val
= __current
;
661 __init
.__min_ind
= __i
;
664 return __first
+ __init
.__min_ind
;
667 // [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible.
668 // complexity [violation] - We will have at most (2*(__n-1) + 4*number_of_lanes) comparisons instead of at most [1.5*(__n-1)].
669 template <typename _ForwardIterator
, typename _Size
, typename _Compare
>
670 std::pair
<_ForwardIterator
, _ForwardIterator
>
671 __simd_minmax_element(_ForwardIterator __first
, _Size __n
, _Compare __comp
) noexcept
675 return std::make_pair(__first
, __first
);
677 typedef typename
std::iterator_traits
<_ForwardIterator
>::value_type _ValueType
;
681 _ValueType __min_val
;
682 _ValueType __max_val
;
685 _Compare
* __minmax_comp
;
687 _ComplexType() : __min_val
{}, __max_val
{}, __min_ind
{}, __max_ind
{}, __minmax_comp(nullptr) {}
688 _ComplexType(const _ValueType
& __min
, const _ValueType
& __max
, const _Compare
* __comp
)
689 : __min_val(__min
), __max_val(__max
), __min_ind(0), __max_ind(0),
690 __minmax_comp(const_cast<_Compare
*>(__comp
))
693 _ComplexType(const _ComplexType
& __obj
)
694 : __min_val(__obj
.__min_val
), __max_val(__obj
.__max_val
), __min_ind(__obj
.__min_ind
),
695 __max_ind(__obj
.__max_ind
), __minmax_comp(__obj
.__minmax_comp
)
700 operator()(const _ComplexType
& __obj
)
703 if ((*__minmax_comp
)(__obj
.__min_val
, __min_val
))
705 __min_val
= __obj
.__min_val
;
706 __min_ind
= __obj
.__min_ind
;
708 else if (!(*__minmax_comp
)(__min_val
, __obj
.__min_val
))
710 __min_val
= __obj
.__min_val
;
711 __min_ind
= (__min_ind
- __obj
.__min_ind
< 0) ? __min_ind
: __obj
.__min_ind
;
715 if ((*__minmax_comp
)(__max_val
, __obj
.__max_val
))
717 __max_val
= __obj
.__max_val
;
718 __max_ind
= __obj
.__max_ind
;
720 else if (!(*__minmax_comp
)(__obj
.__max_val
, __max_val
))
722 __max_val
= __obj
.__max_val
;
723 __max_ind
= (__max_ind
- __obj
.__max_ind
< 0) ? __obj
.__max_ind
: __max_ind
;
728 _ComplexType __init
{*__first
, *__first
, &__comp
};
730 _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func
, _ComplexType
);
732 _PSTL_PRAGMA_SIMD_REDUCTION(__min_func
: __init
)
733 for (_Size __i
= 1; __i
< __n
; ++__i
)
735 auto __min_val
= __init
.__min_val
;
736 auto __max_val
= __init
.__max_val
;
737 auto __current
= __first
+ __i
;
738 if (__comp(*__current
, __min_val
))
740 __init
.__min_val
= *__current
;
741 __init
.__min_ind
= __i
;
743 else if (!__comp(*__current
, __max_val
))
745 __init
.__max_val
= *__current
;
746 __init
.__max_ind
= __i
;
749 return std::make_pair(__first
+ __init
.__min_ind
, __first
+ __init
.__max_ind
);
752 template <class _InputIterator
, class _DifferenceType
, class _OutputIterator1
, class _OutputIterator2
,
753 class _UnaryPredicate
>
754 std::pair
<_OutputIterator1
, _OutputIterator2
>
755 __simd_partition_copy(_InputIterator __first
, _DifferenceType __n
, _OutputIterator1 __out_true
,
756 _OutputIterator2 __out_false
, _UnaryPredicate __pred
) noexcept
758 _DifferenceType __cnt_true
= 0, __cnt_false
= 0;
761 for (_DifferenceType __i
= 0; __i
< __n
; ++__i
)
763 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(__cnt_true
: 1, __cnt_false
: 1)
764 if (__pred(__first
[__i
]))
766 __out_true
[__cnt_true
] = __first
[__i
];
771 __out_false
[__cnt_false
] = __first
[__i
];
775 return std::make_pair(__out_true
+ __cnt_true
, __out_false
+ __cnt_false
);
778 template <class _ForwardIterator1
, class _ForwardIterator2
, class _BinaryPredicate
>
780 __simd_find_first_of(_ForwardIterator1 __first
, _ForwardIterator1 __last
, _ForwardIterator2 __s_first
,
781 _ForwardIterator2 __s_last
, _BinaryPredicate __pred
) noexcept
783 typedef typename
std::iterator_traits
<_ForwardIterator1
>::difference_type _DifferencType
;
785 const _DifferencType __n1
= __last
- __first
;
786 const _DifferencType __n2
= __s_last
- __s_first
;
787 if (__n1
== 0 || __n2
== 0)
789 return __last
; // according to the standard
793 // If first sequence larger than second then we'll run simd_first with parameters of first sequence.
794 // Otherwise, vice versa.
797 for (; __first
!= __last
; ++__first
)
799 if (__unseq_backend::__simd_or(
801 __internal::__equal_value_by_pred
<decltype(*__first
), _BinaryPredicate
>(*__first
, __pred
)))
809 for (; __s_first
!= __s_last
; ++__s_first
)
811 const auto __result
= __unseq_backend::__simd_first(
812 __first
, _DifferencType(0), __n1
, [__s_first
, &__pred
](_ForwardIterator1 __it
, _DifferencType __i
) {
813 return __pred(__it
[__i
], *__s_first
);
815 if (__result
!= __last
)
824 template <class _RandomAccessIterator
, class _DifferenceType
, class _UnaryPredicate
>
825 _RandomAccessIterator
826 __simd_remove_if(_RandomAccessIterator __first
, _DifferenceType __n
, _UnaryPredicate __pred
) noexcept
828 // find first element we need to remove
829 auto __current
= __unseq_backend::__simd_first(
830 __first
, _DifferenceType(0), __n
,
831 [&__pred
](_RandomAccessIterator __it
, _DifferenceType __i
) { return __pred(__it
[__i
]); });
832 __n
-= __current
- __first
;
834 // if we have in sequence only one element that pred(__current[1]) != false we can exit the function
840 _DifferenceType __cnt
= 0;
842 for (_DifferenceType __i
= 1; __i
< __n
; ++__i
)
844 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt
: 1)
845 if (!__pred(__current
[__i
]))
847 __current
[__cnt
] = std::move(__current
[__i
]);
851 return __current
+ __cnt
;
853 } // namespace __unseq_backend
854 } // namespace __pstl
856 #endif /* _PSTL_UNSEQ_BACKEND_SIMD_H */