1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
11 #ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP
12 #define BOOST_COMPUTE_ALGORITHM_FILL_HPP
16 #include <boost/mpl/int.hpp>
17 #include <boost/mpl/vector.hpp>
18 #include <boost/mpl/contains.hpp>
19 #include <boost/utility/enable_if.hpp>
21 #include <boost/compute/cl.hpp>
22 #include <boost/compute/system.hpp>
23 #include <boost/compute/command_queue.hpp>
24 #include <boost/compute/algorithm/copy.hpp>
25 #include <boost/compute/async/future.hpp>
26 #include <boost/compute/iterator/constant_iterator.hpp>
27 #include <boost/compute/iterator/discard_iterator.hpp>
28 #include <boost/compute/detail/is_buffer_iterator.hpp>
29 #include <boost/compute/detail/iterator_range_size.hpp>
35 namespace mpl
= boost::mpl
;
37 // fills the range [first, first + count) with value using copy()
38 template<class BufferIterator
, class T
>
39 inline void fill_with_copy(BufferIterator first
,
44 ::boost::compute::copy(
45 ::boost::compute::make_constant_iterator(value
, 0),
46 ::boost::compute::make_constant_iterator(value
, count
),
52 // fills the range [first, first + count) with value using copy_async()
53 template<class BufferIterator
, class T
>
54 inline future
<void> fill_async_with_copy(BufferIterator first
,
59 return ::boost::compute::copy_async(
60 ::boost::compute::make_constant_iterator(value
, 0),
61 ::boost::compute::make_constant_iterator(value
, count
),
67 #if defined(CL_VERSION_1_2)
69 // meta-function returing true if Iterator points to a range of values
70 // that can be filled using clEnqueueFillBuffer(). to meet this criteria
71 // it must have a buffer accessible through iter.get_buffer() and the
72 // size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}.
73 template<class Iterator
>
74 struct is_valid_fill_buffer_iterator
:
76 is_buffer_iterator
<Iterator
>,
89 sizeof(typename
std::iterator_traits
<Iterator
>::value_type
)
95 struct is_valid_fill_buffer_iterator
<discard_iterator
> : public boost::false_type
{};
97 // specialization which uses clEnqueueFillBuffer for buffer iterators
98 template<class BufferIterator
, class T
>
100 dispatch_fill(BufferIterator first
,
103 command_queue
&queue
,
104 typename
boost::enable_if
<
105 is_valid_fill_buffer_iterator
<BufferIterator
>
108 typedef typename
std::iterator_traits
<BufferIterator
>::value_type value_type
;
115 // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
116 if(!queue
.check_device_version(1, 2)){
117 return fill_with_copy(first
, count
, value
, queue
);
120 value_type pattern
= static_cast<value_type
>(value
);
121 size_t offset
= static_cast<size_t>(first
.get_index());
124 // use clEnqueueWriteBuffer() directly when writing a single value
125 // to the device buffer. this is potentially more efficient and also
126 // works around a bug in the intel opencl driver.
127 queue
.enqueue_write_buffer(
129 offset
* sizeof(value_type
),
135 queue
.enqueue_fill_buffer(
139 offset
* sizeof(value_type
),
140 count
* sizeof(value_type
)
145 template<class BufferIterator
, class T
>
147 dispatch_fill_async(BufferIterator first
,
150 command_queue
&queue
,
151 typename
boost::enable_if
<
152 is_valid_fill_buffer_iterator
<BufferIterator
>
155 typedef typename
std::iterator_traits
<BufferIterator
>::value_type value_type
;
157 // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
158 if(!queue
.check_device_version(1, 2)){
159 return fill_async_with_copy(first
, count
, value
, queue
);
162 value_type pattern
= static_cast<value_type
>(value
);
163 size_t offset
= static_cast<size_t>(first
.get_index());
166 queue
.enqueue_fill_buffer(first
.get_buffer(),
169 offset
* sizeof(value_type
),
170 count
* sizeof(value_type
));
172 return future
<void>(event_
);
175 #ifdef CL_VERSION_2_0
176 // specializations for svm_ptr<T>
178 inline void dispatch_fill(svm_ptr
<T
> first
,
181 command_queue
&queue
)
187 queue
.enqueue_svm_fill(
188 first
.get(), &value
, sizeof(T
), count
* sizeof(T
)
193 inline future
<void> dispatch_fill_async(svm_ptr
<T
> first
,
196 command_queue
&queue
)
199 return future
<void>();
202 event event_
= queue
.enqueue_svm_fill(
203 first
.get(), &value
, sizeof(T
), count
* sizeof(T
)
206 return future
<void>(event_
);
208 #endif // CL_VERSION_2_0
210 // default implementations
211 template<class BufferIterator
, class T
>
213 dispatch_fill(BufferIterator first
,
216 command_queue
&queue
,
217 typename
boost::disable_if
<
218 is_valid_fill_buffer_iterator
<BufferIterator
>
221 fill_with_copy(first
, count
, value
, queue
);
224 template<class BufferIterator
, class T
>
226 dispatch_fill_async(BufferIterator first
,
229 command_queue
&queue
,
230 typename
boost::disable_if
<
231 is_valid_fill_buffer_iterator
<BufferIterator
>
234 return fill_async_with_copy(first
, count
, value
, queue
);
237 template<class BufferIterator
, class T
>
238 inline void dispatch_fill(BufferIterator first
,
241 command_queue
&queue
)
243 fill_with_copy(first
, count
, value
, queue
);
246 template<class BufferIterator
, class T
>
247 inline future
<void> dispatch_fill_async(BufferIterator first
,
250 command_queue
&queue
)
252 return fill_async_with_copy(first
, count
, value
, queue
);
254 #endif // !defined(CL_VERSION_1_2)
256 } // end detail namespace
258 /// Fills the range [\p first, \p last) with \p value.
260 /// \param first first element in the range to fill
261 /// \param last last element in the range to fill
262 /// \param value value to copy to each element
263 /// \param queue command queue to perform the operation
265 /// For example, to fill a vector on the device with sevens:
267 /// // vector on the device
268 /// boost::compute::vector<int> vec(10, context);
270 /// // fill vector with sevens
271 /// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
274 /// \see boost::compute::fill_n()
275 template<class BufferIterator
, class T
>
276 inline void fill(BufferIterator first
,
279 command_queue
&queue
= system::default_queue())
281 size_t count
= detail::iterator_range_size(first
, last
);
286 detail::dispatch_fill(first
, count
, value
, queue
);
289 template<class BufferIterator
, class T
>
290 inline future
<void> fill_async(BufferIterator first
,
293 command_queue
&queue
= system::default_queue())
295 size_t count
= detail::iterator_range_size(first
, last
);
297 return future
<void>();
300 return detail::dispatch_fill_async(first
, count
, value
, queue
);
303 } // end compute namespace
304 } // end boost namespace
306 #endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP