1 #ifndef VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
59 template<
typename NumericT>
67 KernelClass::init(ctx);
68 program = &ctx.
get_program(KernelClass::program_name());
73 KernelClass::init(ctx);
74 program = &ctx.
get_program(KernelClass::program_name());
79 template<
typename NumericT>
87 KernelClass::init(ctx);
88 program = &ctx.
get_program(KernelClass::program_name());
93 KernelClass::init(ctx);
94 program = &ctx.
get_program(KernelClass::program_name());
99 template<
typename NumericT>
107 KernelClass::init(ctx);
108 program = &ctx.
get_program(KernelClass::program_name());
113 KernelClass::init(ctx);
114 program = &ctx.
get_program(KernelClass::program_name());
133 template<
typename DestNumericT,
typename SrcNumericT>
136 assert(dest.
row_major() == src.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
138 assert(viennacl::traits::opencl_handle(dest).
context() == viennacl::traits::opencl_handle(src).
context() &&
bool(
"Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
140 std::string kernel_name(
"convert_");
141 kernel_name += dest.
row_major() ?
"row_" :
"col_";
174 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
176 viennacl::traits::opencl_handle(mat2),
186 typename ScalarT1,
typename ScalarT2>
191 std::string kernel_name;
193 kernel_name =
"ambm_cpu_cpu";
195 kernel_name =
"ambm_cpu_gpu";
197 kernel_name =
"ambm_gpu_cpu";
199 kernel_name =
"ambm_gpu_gpu";
212 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
214 viennacl::traits::opencl_handle(mat2),
219 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(beta)),
221 viennacl::traits::opencl_handle(mat3),
231 typename ScalarT1,
typename ScalarT2>
236 std::string kernel_name;
238 kernel_name =
"ambm_m_cpu_cpu";
240 kernel_name =
"ambm_m_cpu_gpu";
242 kernel_name =
"ambm_m_gpu_cpu";
244 kernel_name =
"ambm_m_gpu_gpu";
257 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
259 viennacl::traits::opencl_handle(mat2),
264 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(beta)),
266 viennacl::traits::opencl_handle(mat3),
275 typename SizeT,
typename DistanceT>
279 std::string kernel_name(
"trans_kernel");
282 static_cast<cl_uint
>(proxy.lhs().start1()), static_cast<cl_uint>(proxy.lhs().start2()),
283 static_cast<cl_uint>(proxy.lhs().internal_size1()), static_cast<cl_uint>(proxy.lhs().internal_size2()),
284 static_cast<cl_uint>(proxy.lhs().size1()), static_cast<cl_uint>(proxy.lhs().size2()),
285 static_cast<cl_uint>(proxy.lhs().stride1()), static_cast<cl_uint>(proxy.lhs().stride2()),
288 static_cast<cl_uint>(temp_trans.
start1()), static_cast<cl_uint>(temp_trans.
start2()),
290 static_cast<cl_uint>(temp_trans.
stride1()), static_cast<cl_uint>(temp_trans.
stride2())));
293 template <
typename NumericT>
305 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(s))
310 template <
typename NumericT>
319 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(s))
324 template <
typename NumericT>
335 KernelClass::init(ctx);
337 cl_uint options_alpha = 0;
378 viennacl::traits::opencl_handle(
NumericT(1)),
380 viennacl::traits::opencl_handle(vec),
385 template <
typename NumericT>
391 KernelClass::init(ctx);
393 cl_uint options_alpha = 0;
435 viennacl::traits::opencl_handle(
NumericT(1)),
437 viennacl::traits::opencl_handle(mat),
442 template <
typename NumericT>
448 KernelClass::init(ctx);
450 cl_uint options_alpha = 0;
478 viennacl::traits::opencl_handle(
NumericT(1)),
480 viennacl::traits::opencl_handle(mat),
485 template <
typename NumericT>
491 KernelClass::init(ctx);
493 cl_uint options_alpha = 0;
521 viennacl::traits::opencl_handle(
NumericT(1)),
523 viennacl::traits::opencl_handle(mat),
539 template <
typename T,
typename OP>
543 assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
544 assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
560 viennacl::traits::opencl_handle(proxy.lhs()),
565 viennacl::traits::opencl_handle(proxy.rhs()),
582 template <
typename T,
typename OP>
586 assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
587 assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
597 viennacl::traits::opencl_handle(proxy.lhs()),
619 template <
typename NumericT>
634 viennacl::traits::opencl_handle(vec),
639 viennacl::traits::opencl_handle(result),
657 template<
typename NumericT,
typename ScalarType >
664 bool effective_A_trans = A_trans ^ A.
row_major();
665 bool effective_B_trans = B_trans ^ B.
row_major();
667 char cAt = effective_A_trans ?
'T' :
'N';
668 char cBt = effective_B_trans ?
'T' :
'N';
670 std::string kernel_prefix(
"prod_");
695 template<
typename NumericT,
typename ScalarT1>
697 ScalarT1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha,
714 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
717 viennacl::traits::opencl_handle(vec1),
722 viennacl::traits::opencl_handle(vec2),
731 template <
typename SCALARTYPE,
typename VectorType>
747 static_cast<cl_uint>(A.
size1()),
748 static_cast<cl_uint>(A.
size2()),
757 template <
typename NumericT>
796 template <
typename NumericT>
810 static_cast<cl_uint>(start + 1),
811 static_cast<cl_uint>(start),
825 static_cast<cl_uint>(start + 1),
826 static_cast<cl_uint>(start),
839 template <
typename NumericT>
853 static_cast<cl_uint>(0),
854 static_cast<cl_uint>(0),
869 static_cast<cl_uint>(0),
870 static_cast<cl_uint>(0),
883 template <
typename NumericT>
921 template<
typename NumericT>
944 static_cast<cl_uint>(l),
945 static_cast<cl_uint>(m - 1)
961 static_cast<cl_uint>(l),
962 static_cast<cl_uint>(m - 1)
969 template <
typename NumericT>
988 static_cast<cl_uint>(row_start),
989 static_cast<cl_uint>(col_start),
1003 static_cast<cl_uint>(row_start),
1004 static_cast<cl_uint>(col_start),
cl_uint stride
Increment between integers.
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
void matrix_diag_to_vector(const matrix_base< NumericT > &mat, int k, vector_base< NumericT > &vec)
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void prod_impl(const matrix_base< NumericT > &mat, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
Generic size and resize functionality for different vector and matrix types.
Represents an OpenCL kernel within ViennaCL.
viennacl::ocl::program & get_program(std::string const &name)
Returns the program with the provided name.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
cl_uint start
Starting value of the integer stride.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
const std::string SVD_BIDIAG_PACK_KERNEL
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
size_type stride2() const
Returns the number of columns.
const std::string SVD_GIVENS_NEXT_KERNEL
const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
static void init(viennacl::ocl::context &ctx)
Determines row and column increments for matrices and matrix proxies.
void bidiag_pack(matrix_base< NumericT > &A, viennacl::vector< NumericT > &dh, viennacl::vector< NumericT > &sh)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
viennacl::ocl::kernel & element_kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
void scaled_rank_1_update(matrix_base< NumericT > &A, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
OpenCL kernel file for singular value decomposition.
const std::string SVD_COPY_ROW_KERNEL
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Common implementations shared by OpenCL-based operations.
void copy_vec(matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
Main kernel class for generating OpenCL kernels for elementwise-operations such as element_sin() on/w...
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
static device_specific::execution_handler & execution_handler(bool is_row_major, viennacl::ocl::context &ctx)
viennacl::ocl::kernel & kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
OpenCL kernel file for vector operations.
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Main kernel class for generating OpenCL kernels for operations on/with dense matrix objects of type v...
const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
size_type stride1() const
Returns the number of rows.
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
size_type size2() const
Returns the number of columns.
void bidiag_pack_svd(viennacl::matrix< SCALARTYPE > &A, VectorType &dh, VectorType &sh)
Wrapper class for an OpenCL program.
void execute(template_base const &T, statements_container const &statements, viennacl::ocl::context &ctx=viennacl::ocl::current_context(), bool force_compilation=false)
Helper metafunction for checking whether the provided type is viennacl::op_div (for division) ...
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, op_element_binary< OP > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
size_type size1() const
Returns the number of rows.
Proxy classes for vectors.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
statement mat_mat_prod(NumericT alpha, viennacl::matrix_base< NumericT > const *A, bool A_trans, viennacl::matrix_base< NumericT > const *B, bool B_trans, NumericT beta, viennacl::matrix_base< NumericT > const *C)
Main kernel class for generating OpenCL kernels for operations on/with dense matrix objects of type v...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
viennacl::ocl::kernel & legacy_kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
void givens_next(matrix_base< NumericT > &matrix, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
A tag class representing transposed matrices.
size_type start2() const
Returns the number of columns.
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
const std::string SVD_COPY_COL_KERNEL
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
viennacl::ocl::kernel & get_kernel(std::string const &name)
Returns the kernel with the provided name.
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Helper metafunction for checking whether the provided type is viennacl::op_prod (for products/multipl...
std::string op_to_string(op_abs)
static void init(viennacl::ocl::context &ctx)
Helper class for converting a type to its string representation.
OpenCL kernel file for element-wise matrix operations.
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Implementation of the ViennaCL scalar class.
A collection of compile time type deductions.
static std::string program_name()
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Simple enable-if variant that uses the SFINAE pattern.
size_type start1() const
Returns the number of rows.
cl_uint size
Number of values in the stride.
Runtime generation of OpenCL kernels for matrix operations.
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
void matrix_row(matrix_base< NumericT > const &mat, unsigned int i, vector_base< NumericT > &vec)