1 #ifndef VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_
55 template<
typename DestNumericT,
typename SrcNumericT>
58 assert(viennacl::traits::opencl_handle(dest).
context() == viennacl::traits::opencl_handle(src).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
60 std::string kernel_name(
"convert_");
75 template <
typename T,
typename ScalarType1>
77 vector_base<T> const & vec2, ScalarType1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha)
79 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
107 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)),
109 viennacl::traits::opencl_handle(vec2),
115 template <
typename T,
typename ScalarType1,
typename ScalarType2>
117 vector_base<T> const & vec2, ScalarType1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha,
118 vector_base<T> const & vec3, ScalarType2
const & beta,
vcl_size_t len_beta,
bool reciprocal_beta,
bool flip_sign_beta)
120 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
121 assert(viennacl::traits::opencl_handle(vec2).
context() == viennacl::traits::opencl_handle(vec3).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
126 std::string kernel_name;
128 kernel_name =
"avbv_cpu_cpu";
130 kernel_name =
"avbv_cpu_gpu";
132 kernel_name =
"avbv_gpu_cpu";
134 kernel_name =
"avbv_gpu_gpu";
164 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)),
166 viennacl::traits::opencl_handle(vec2),
169 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(beta)),
171 viennacl::traits::opencl_handle(vec3),
177 template <
typename T,
typename ScalarType1,
typename ScalarType2>
179 vector_base<T> const & vec2, ScalarType1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha,
180 vector_base<T> const & vec3, ScalarType2
const & beta,
vcl_size_t len_beta,
bool reciprocal_beta,
bool flip_sign_beta)
182 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
183 assert(viennacl::traits::opencl_handle(vec2).
context() == viennacl::traits::opencl_handle(vec3).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
188 std::string kernel_name;
190 kernel_name =
"avbv_v_cpu_cpu";
192 kernel_name =
"avbv_v_cpu_gpu";
194 kernel_name =
"avbv_v_gpu_cpu";
196 kernel_name =
"avbv_v_gpu_gpu";
226 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)),
228 viennacl::traits::opencl_handle(vec2),
231 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(beta)),
233 viennacl::traits::opencl_handle(vec3),
245 template <
typename T>
261 viennacl::traits::opencl_handle(T(alpha)) )
271 template <
typename T>
274 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
285 viennacl::traits::opencl_handle(vec2),
299 template <
typename T,
typename OP>
303 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
304 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
309 std::string kernel_name =
"element_pow";
314 kernel_name =
"element_div";
319 kernel_name =
"element_prod";
329 viennacl::traits::opencl_handle(proxy.lhs()),
333 viennacl::traits::opencl_handle(proxy.rhs()),
348 template <
typename T,
typename OP>
352 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
353 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
374 viennacl::traits::opencl_handle(proxy.lhs()),
387 template <
typename T>
392 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
393 assert(viennacl::traits::opencl_handle(vec2).
context() == viennacl::traits::opencl_handle(partial_result).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
399 &&
bool(
"Incompatible vector sizes in inner_prod_impl()!"));
419 viennacl::traits::opencl_handle(vec2),
422 viennacl::traits::opencl_handle(partial_result)
436 template <
typename T>
441 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
442 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
448 temp.
resize(work_groups, ctx);
463 viennacl::traits::opencl_handle(result) )
469 template<
typename NumericT>
487 template <
typename NumericT>
492 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
511 while (current_index < vec_tuple.
const_size())
513 switch (vec_tuple.
const_size() - current_index)
530 viennacl::traits::opencl_handle(temp)
535 cl_uint(work_groups),
537 viennacl::traits::opencl_handle(result),
556 viennacl::traits::opencl_handle(temp)
561 cl_uint(work_groups),
563 viennacl::traits::opencl_handle(result),
580 viennacl::traits::opencl_handle(temp)
585 cl_uint(work_groups),
587 viennacl::traits::opencl_handle(result),
602 viennacl::traits::opencl_handle(temp)
607 cl_uint(work_groups),
609 viennacl::traits::opencl_handle(result),
638 viennacl::traits::opencl_handle(temp)
643 cl_uint(work_groups),
645 viennacl::traits::opencl_handle(result),
668 template <
typename T>
673 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
679 temp.
resize(work_groups, ctx);
687 std::vector<T> temp_cpu(work_groups);
691 for (
typename std::vector<T>::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
704 template <
typename T>
709 assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(partial_result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
724 viennacl::traits::opencl_handle(partial_result) )
736 template <
typename T>
740 assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
769 template <
typename T>
780 typedef std::vector<typename viennacl::result_of::cl_type<T>::type> CPUVectorType;
782 CPUVectorType temp_cpu(work_groups);
786 for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
787 result += static_cast<T>(*it);
800 template <
typename T>
804 assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
833 template <
typename T>
844 typedef std::vector<typename viennacl::result_of::cl_type<T>::type> CPUVectorType;
846 CPUVectorType temp_cpu(work_groups);
850 for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
851 result += static_cast<T>(*it);
852 result = std::sqrt(result);
864 template <
typename T>
868 assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
897 template <
typename T>
908 typedef std::vector<typename viennacl::result_of::cl_type<T>::type> CPUVectorType;
910 CPUVectorType temp_cpu(work_groups);
914 for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
915 result =
std::max(result, static_cast<T>(*it));
929 template <
typename T>
952 cl_int err = clEnqueueReadBuffer(ctx.
get_queue().
handle().
get(), h.
get(), CL_TRUE, 0,
sizeof(cl_uint), &result, 0, NULL, NULL);
965 template<
typename NumericT>
969 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
985 viennacl::traits::opencl_handle(temp)
994 viennacl::traits::opencl_handle(result)
1003 template<
typename NumericT>
1021 viennacl::traits::opencl_handle(temp)
1025 typedef std::vector<typename viennacl::result_of::cl_type<NumericT>::type> CPUVectorType;
1027 CPUVectorType temp_cpu(work_groups);
1030 result =
static_cast<NumericT>(temp_cpu[0]);
1031 for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
1032 result =
std::max(result, static_cast<NumericT>(*it));
1044 template<
typename NumericT>
1048 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
1064 viennacl::traits::opencl_handle(temp)
1073 viennacl::traits::opencl_handle(result)
1082 template<
typename NumericT>
1100 viennacl::traits::opencl_handle(temp)
1104 typedef std::vector<typename viennacl::result_of::cl_type<NumericT>::type> CPUVectorType;
1106 CPUVectorType temp_cpu(work_groups);
1109 result =
static_cast<NumericT>(temp_cpu[0]);
1110 for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
1111 result =
std::min(result, static_cast<NumericT>(*it));
1121 template<
typename NumericT>
1125 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
1136 template<
typename NumericT>
1155 template <
typename T>
1160 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
1172 viennacl::traits::opencl_handle(vec2),
1176 viennacl::traits::opencl_handle(alpha),
1177 viennacl::traits::opencl_handle(beta))
1192 template<
typename NumericT>
1213 output, cl_uint(output.
start()), cl_uint(output.
stride()),
1214 cl_uint(is_inclusive ? 0 : 1), opencl_carries.opencl_handle())
1226 opencl_carries.opencl_handle())
1237 template<
typename NumericT>
1250 template<
typename NumericT>
void min_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the minimum of a vector, where the result is stored on a CPU scalar.
cl_uint stride
Increment between integers.
vcl_size_t const_size() const
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
void avbv(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
Represents an OpenCL device within ViennaCL.
void norm_1_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on CPU.
Generic size and resize functionality for different vector and matrix types.
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
viennacl::ocl::command_queue & get_queue()
Represents an OpenCL kernel within ViennaCL.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
cl_uint start
Starting value of the integer stride.
static std::string program_name()
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
void norm_reduction_impl(vector_base< T > const &vec, vector_base< T > &partial_result, cl_uint norm_id)
Computes the partial work group results for vector norms.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Main kernel class for generating OpenCL kernels for multiple inner products on/with viennacl::vector<...
void norm_inf_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the supremum-norm of a vector.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
void min_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the minimum of a vector, where the result is stored in an OpenCL buffer. ...
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
T max(const T &lhs, const T &rhs)
Maximum.
An expression template class that represents a binary operation that yields a vector.
static void init(viennacl::ocl::context &ctx)
void avbv_v(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Common implementations shared by OpenCL-based operations.
viennacl::ocl::handle< cl_command_queue > const & handle() const
size_type stride() const
Returns the stride within the buffer (in multiples of sizeof(NumericT))
#define VIENNACL_ERR_CHECK(err)
const OCL_TYPE & get() const
void inner_prod_impl(vector_base< T > const &vec1, vector_base< T > const &vec2, vector_base< T > &partial_result)
Computes the partial inner product of two vectors - implementation. Library users should call inner_p...
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
void sum_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the sum over all entries of a vector.
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
void max_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the maximum value of a vector, where the result is stored in an OpenCL buffer.
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
void max_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the maximum value of a vector, where the value is stored in a host value.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
void norm_2_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on CPU.
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
OpenCL kernel file for vector operations.
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
static void init(viennacl::ocl::context &ctx)
void norm_1_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^1-norm of a vector.
void resize(size_type new_size, bool preserve=true)
Resizes the allocated memory for the vector. Pads the memory to be a multiple of 'AlignmentV'.
Common base class for dense vectors, vector ranges, and vector slices.
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA.
Helper metafunction for checking whether the provided type is viennacl::op_div (for division) ...
OpenCL kernel file for scan operations. To be merged back to vector operations.
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, op_element_binary< OP > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
static void init(viennacl::ocl::context &ctx)
Main kernel class for generating OpenCL kernels for elementwise operations other than addition and su...
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA.
cl_uint index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
size_type size() const
Returns the length of the vector (cf. std::vector)
void norm_inf_cpu(vector_base< T > const &vec, T &result)
Computes the supremum-norm of a vector.
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
VectorType const & const_at(vcl_size_t i) const
viennacl::ocl::packed_cl_uint make_layout(vector_base< NumericT > const &vec)
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
OpenCL kernel file for element-wise vector operations.
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Forward declarations of the implicit_vector_base, vector_base class.
T min(const T &lhs, const T &rhs)
Minimum.
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Helper metafunction for checking whether the provided type is viennacl::op_prod (for products/multipl...
size_type start() const
Returns the offset within the buffer.
std::string op_to_string(op_abs)
Helper class for converting a type to its string representation.
void scan_impl(vector_base< NumericT > const &input, vector_base< NumericT > &output, bool is_inclusive)
Worker routine for scan routines using OpenCL.
void vector_swap(vector_base< T > &vec1, vector_base< T > &vec2)
Swaps the contents of two vectors, data is copied.
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
void inner_prod_cpu(vector_base< T > const &vec1, vector_base< T > const &vec2, T &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
Implementation of the ViennaCL scalar class.
static void init(viennacl::ocl::context &ctx)
void sum_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the sum over all entries of a vector.
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Simple enable-if variant that uses the SFINAE pattern.
cl_uint size
Number of values in the stride.
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
static void init(viennacl::ocl::context &ctx)
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.