1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
11 #ifndef BOOST_COMPUTE_KERNEL_HPP
12 #define BOOST_COMPUTE_KERNEL_HPP
16 #include <boost/assert.hpp>
17 #include <boost/utility/enable_if.hpp>
19 #include <boost/compute/config.hpp>
20 #include <boost/compute/program.hpp>
21 #include <boost/compute/exception.hpp>
22 #include <boost/compute/type_traits/is_fundamental.hpp>
23 #include <boost/compute/detail/get_object_info.hpp>
24 #include <boost/compute/detail/assert_cl_success.hpp>
30 template<class T
> struct set_kernel_arg
;
32 } // end detail namespace
35 /// \brief A compute kernel.
37 /// \see command_queue, program
41 /// Creates a null kernel object.
47 /// Creates a new kernel object for \p kernel. If \p retain is
48 /// \c true, the reference count for \p kernel will be incremented.
49 explicit kernel(cl_kernel kernel
, bool retain
= true)
52 if(m_kernel
&& retain
){
53 clRetainKernel(m_kernel
);
57 /// Creates a new kernel object with \p name from \p program.
58 kernel(const program
&program
, const std::string
&name
)
61 m_kernel
= clCreateKernel(program
.get(), name
.c_str(), &error
);
64 BOOST_THROW_EXCEPTION(opencl_error(error
));
68 /// Creates a new kernel object as a copy of \p other.
69 kernel(const kernel
&other
)
70 : m_kernel(other
.m_kernel
)
73 clRetainKernel(m_kernel
);
77 /// Copies the kernel object from \p other to \c *this.
78 kernel
& operator=(const kernel
&other
)
82 clReleaseKernel(m_kernel
);
85 m_kernel
= other
.m_kernel
;
88 clRetainKernel(m_kernel
);
95 #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
96 /// Move-constructs a new kernel object from \p other.
97 kernel(kernel
&& other
) BOOST_NOEXCEPT
98 : m_kernel(other
.m_kernel
)
103 /// Move-assigns the kernel from \p other to \c *this.
104 kernel
& operator=(kernel
&& other
) BOOST_NOEXCEPT
107 clReleaseKernel(m_kernel
);
110 m_kernel
= other
.m_kernel
;
115 #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
117 /// Destroys the kernel object.
121 BOOST_COMPUTE_ASSERT_CL_SUCCESS(
122 clReleaseKernel(m_kernel
)
127 /// Returns a reference to the underlying OpenCL kernel object.
128 cl_kernel
& get() const
130 return const_cast<cl_kernel
&>(m_kernel
);
133 /// Returns the function name for the kernel.
134 std::string
name() const
136 return get_info
<std::string
>(CL_KERNEL_FUNCTION_NAME
);
139 /// Returns the number of arguments for the kernel.
142 return get_info
<cl_uint
>(CL_KERNEL_NUM_ARGS
);
145 /// Returns the program for the kernel.
146 program
get_program() const
148 return program(get_info
<cl_program
>(CL_KERNEL_PROGRAM
));
151 /// Returns the context for the kernel.
152 context
get_context() const
154 return context(get_info
<cl_context
>(CL_KERNEL_CONTEXT
));
157 /// Returns information about the kernel.
159 /// \see_opencl_ref{clGetKernelInfo}
161 T
get_info(cl_kernel_info info
) const
163 return detail::get_object_info
<T
>(clGetKernelInfo
, m_kernel
, info
);
168 typename
detail::get_object_info_type
<kernel
, Enum
>::type
171 #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
172 /// Returns information about the argument at \p index.
174 /// For example, to get the name of the first argument:
176 /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME);
179 /// Note, this function requires that the program be compiled with the
180 /// \c "-cl-kernel-arg-info" flag. For example:
182 /// program.build("-cl-kernel-arg-info");
185 /// \opencl_version_warning{1,2}
187 /// \see_opencl_ref{clGetKernelArgInfo}
189 T
get_arg_info(size_t index
, cl_kernel_arg_info info
) const
191 return detail::get_object_info
<T
>(
192 clGetKernelArgInfo
, m_kernel
, info
, static_cast<cl_uint
>(index
)
198 typename
detail::get_object_info_type
<kernel
, Enum
>::type
199 get_arg_info(size_t index
) const;
200 #endif // CL_VERSION_1_2
202 /// Returns work-group information for the kernel with \p device.
204 /// \see_opencl_ref{clGetKernelWorkGroupInfo}
206 T
get_work_group_info(const device
&device
, cl_kernel_work_group_info info
) const
208 return detail::get_object_info
<T
>(clGetKernelWorkGroupInfo
, m_kernel
, info
, device
.id());
211 /// Sets the argument at \p index to \p value with \p size.
213 /// \see_opencl_ref{clSetKernelArg}
214 void set_arg(size_t index
, size_t size
, const void *value
)
216 BOOST_ASSERT(index
< arity());
218 cl_int ret
= clSetKernelArg(m_kernel
,
219 static_cast<cl_uint
>(index
),
222 if(ret
!= CL_SUCCESS
){
223 BOOST_THROW_EXCEPTION(opencl_error(ret
));
227 /// Sets the argument at \p index to \p value.
229 /// For built-in types (e.g. \c float, \c int4_), this is equivalent to
230 /// calling set_arg(index, sizeof(type), &value).
232 /// Additionally, this method is specialized for device memory objects
233 /// such as buffer and image2d. This allows for them to be passed directly
234 /// without having to extract their underlying cl_mem object.
236 /// This method is also specialized for device container types such as
237 /// vector<T> and array<T, N>. This allows for them to be passed directly
238 /// as kernel arguments without having to extract their underlying buffer.
240 /// For setting local memory arguments (e.g. "__local float *buf"), the
241 /// local_buffer<T> class may be used:
243 /// // set argument to a local buffer with storage for 32 float's
244 /// kernel.set_arg(0, local_buffer<float>(32));
247 void set_arg(size_t index
, const T
&value
)
249 // if you get a compilation error pointing here it means you
250 // attempted to set a kernel argument from an invalid type.
251 detail::set_kernel_arg
<T
>()(*this, index
, value
);
255 void set_arg(size_t index
, const cl_mem mem
)
257 set_arg(index
, sizeof(cl_mem
), static_cast<const void *>(&mem
));
261 void set_arg(size_t index
, const cl_sampler sampler
)
263 set_arg(index
, sizeof(cl_sampler
), static_cast<const void *>(&sampler
));
267 void set_arg_svm_ptr(size_t index
, void* ptr
)
269 #ifdef CL_VERSION_2_0
270 cl_int ret
= clSetKernelArgSVMPointer(m_kernel
, static_cast<cl_uint
>(index
), ptr
);
271 if(ret
!= CL_SUCCESS
){
272 BOOST_THROW_EXCEPTION(opencl_error(ret
));
277 BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE
));
281 #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
282 /// Sets the arguments for the kernel to \p args.
284 void set_args(T
&&... args
)
286 BOOST_ASSERT(sizeof...(T
) <= arity());
288 _set_args
<0>(args
...);
290 #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
292 #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
293 /// Sets additional execution information for the kernel.
295 /// \opencl_version_warning{2,0}
297 /// \see_opencl2_ref{clSetKernelExecInfo}
298 void set_exec_info(cl_kernel_exec_info info
, size_t size
, const void *value
)
300 cl_int ret
= clSetKernelExecInfo(m_kernel
, info
, size
, value
);
301 if(ret
!= CL_SUCCESS
){
302 BOOST_THROW_EXCEPTION(opencl_error(ret
));
305 #endif // CL_VERSION_2_0
307 /// Returns \c true if the kernel is the same at \p other.
308 bool operator==(const kernel
&other
) const
310 return m_kernel
== other
.m_kernel
;
313 /// Returns \c true if the kernel is different from \p other.
314 bool operator!=(const kernel
&other
) const
316 return m_kernel
!= other
.m_kernel
;
320 operator cl_kernel() const
326 static kernel
create_with_source(const std::string
&source
,
327 const std::string
&name
,
328 const context
&context
)
330 return program::build_with_source(source
, context
).create_kernel(name
);
334 #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
342 template<size_t N
, class T
, class... Args
>
343 void _set_args(T
&& arg
, Args
&&... rest
)
346 _set_args
<N
+1>(rest
...);
348 #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
354 inline kernel
program::create_kernel(const std::string
&name
) const
356 return kernel(*this, name
);
359 /// \internal_ define get_info() specializations for kernel
360 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel
,
361 ((std::string
, CL_KERNEL_FUNCTION_NAME
))
362 ((cl_uint
, CL_KERNEL_NUM_ARGS
))
363 ((cl_uint
, CL_KERNEL_REFERENCE_COUNT
))
364 ((cl_context
, CL_KERNEL_CONTEXT
))
365 ((cl_program
, CL_KERNEL_PROGRAM
))
368 #ifdef CL_VERSION_1_2
369 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel
,
370 ((std::string
, CL_KERNEL_ATTRIBUTES
))
372 #endif // CL_VERSION_1_2
374 /// \internal_ define get_arg_info() specializations for kernel
375 #ifdef CL_VERSION_1_2
376 #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \
378 template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \
380 template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \
381 return get_arg_info<result_type>(index, value); \
384 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier
, CL_KERNEL_ARG_ADDRESS_QUALIFIER
)
385 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier
, CL_KERNEL_ARG_ACCESS_QUALIFIER
)
386 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string
, CL_KERNEL_ARG_TYPE_NAME
)
387 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier
, CL_KERNEL_ARG_TYPE_QUALIFIER
)
388 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string
, CL_KERNEL_ARG_NAME
)
389 #endif // CL_VERSION_1_2
393 // set_kernel_arg implementation for built-in types
395 struct set_kernel_arg
397 typename
boost::enable_if
<is_fundamental
<T
> >::type
398 operator()(kernel
&kernel_
, size_t index
, const T
&value
)
400 kernel_
.set_arg(index
, sizeof(T
), &value
);
404 // set_kernel_arg specialization for char (different from built-in cl_char)
406 struct set_kernel_arg
<char>
408 void operator()(kernel
&kernel_
, size_t index
, const char c
)
410 kernel_
.set_arg(index
, sizeof(char), &c
);
414 } // end detail namespace
415 } // end namespace compute
416 } // end namespace boost
418 #endif // BOOST_COMPUTE_KERNEL_HPP