1 #ifndef VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
55 template<
typename NumericT,
unsigned int AlignmentV>
65 viennacl::traits::opencl_handle(x),
67 cl_uint(info_selector)
81 template<
typename NumericT,
unsigned int AlignmentV>
91 bool with_alpha_beta = (alpha < NumericT(1) || alpha >
NumericT(1)) || (beta < 0 || beta > 0);
96 unsigned int alignment = AlignmentV;
97 if (use_nvidia_specific)
124 if (alignment == 4 || alignment == 8)
144 if (use_nvidia_specific)
190 template<
typename NumericT,
unsigned int AlignmentV>
201 viennacl::traits::opencl_handle(d_A),
206 viennacl::traits::opencl_handle(y),
222 template<
typename NumericT,
unsigned int AlignmentV>
235 viennacl::traits::opencl_handle(d_A.lhs()),
240 viennacl::traits::opencl_handle(y),
256 template<
typename NumericT,
unsigned int AlignmentV>
274 viennacl::traits::opencl_handle(upper_bound_nonzeros_per_row_A)
278 unsigned int * upper_bound_nonzeros_per_row_A_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(upper_bound_nonzeros_per_row_A.
handle());
280 unsigned int max_nnz_per_row_A = 0;
281 for (std::size_t i=0; i<upper_bound_nonzeros_per_row_A.
size(); ++i)
282 max_nnz_per_row_A =
std::max(max_nnz_per_row_A, upper_bound_nonzeros_per_row_A_ptr[i]);
284 if (max_nnz_per_row_A > 32)
287 unsigned int max_entries_in_G = 32;
288 if (max_nnz_per_row_A <= 256)
289 max_entries_in_G = 16;
290 if (max_nnz_per_row_A <= 64)
291 max_entries_in_G = 8;
296 cl_uint(max_entries_in_G),
297 viennacl::traits::opencl_handle(exclusive_scan_helper)
302 unsigned int augmented_size = exclusive_scan_helper[A.
size1()];
310 viennacl::ocl::enqueue(k_fill_A2(A2.handle1().opencl_handle(), A2.handle2().opencl_handle(), A2.handle().opencl_handle(), cl_uint(A2.size1()),
311 viennacl::traits::opencl_handle(exclusive_scan_helper)
316 viennacl::ocl::enqueue(k_fill_G1(G1.handle1().opencl_handle(), G1.handle2().opencl_handle(), G1.handle().opencl_handle(), cl_uint(G1.size1()),
318 cl_uint(max_entries_in_G),
319 viennacl::traits::opencl_handle(exclusive_scan_helper)
347 unsigned int current_offset = 0;
348 for (std::size_t i=0; i<C.
size1(); ++i)
350 unsigned int tmp = row_buffer[i];
351 row_buffer.set(i, current_offset);
352 current_offset += tmp;
354 row_buffer.
set(C.
size1(), current_offset);
362 C.
reserve(current_offset,
false);
381 template<
typename NumericT,
unsigned int MAT_AlignmentV>
393 viennacl::traits::opencl_handle(x),
404 template<
typename NumericT,
unsigned int AlignmentV>
417 viennacl::traits::opencl_handle(x),
429 template<
typename NumericT,
unsigned int AlignmentV>
441 viennacl::traits::opencl_handle(x),
452 template<
typename NumericT,
unsigned int AlignmentV>
465 viennacl::traits::opencl_handle(x),
482 template<
typename NumericT,
unsigned int AlignmentV>
497 L.lhs().handle2().opencl_handle(),
498 L.lhs().handle().opencl_handle(),
499 block_indices.opencl_handle(),
501 static_cast<cl_uint
>(x.
size())));
505 template<
typename NumericT,
unsigned int AlignmentV>
520 U.lhs().handle2().opencl_handle(),
521 U.lhs().handle().opencl_handle(),
523 block_indices.opencl_handle(),
525 static_cast<cl_uint
>(x.
size())));
537 template<
typename NumericT,
unsigned int AlignmentV>
550 viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
551 viennacl::traits::opencl_handle(x),
552 cl_uint(proxy_L.lhs().size1())
563 template<
typename NumericT,
unsigned int AlignmentV>
578 k.local_work_size(0, 128);
579 k.global_work_size(0, k.local_work_size());
580 viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
581 viennacl::traits::opencl_handle(diagonal),
582 viennacl::traits::opencl_handle(x),
583 cl_uint(proxy_L.lhs().size1())
593 template<
typename NumericT,
unsigned int AlignmentV>
606 viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
607 viennacl::traits::opencl_handle(x),
608 cl_uint(proxy_U.lhs().size1())
619 template<
typename NumericT,
unsigned int AlignmentV>
634 k.local_work_size(0, 128);
635 k.global_work_size(0, k.local_work_size());
636 viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
637 viennacl::traits::opencl_handle(diagonal),
638 viennacl::traits::opencl_handle(x),
639 cl_uint(proxy_U.lhs().size1())
657 template<
typename NumericT>
668 if (beta < 0 || beta > 0)
700 template<
typename NumericT,
unsigned int AlignmentV>
708 unsigned int thread_num = 128;
714 viennacl::traits::opencl_handle(x),
715 cl_uint(info_selector),
729 template<
typename NumericT,
unsigned int AlignmentV>
739 if (beta < 0 || beta > 0)
759 unsigned int thread_num = 128;
766 viennacl::traits::opencl_handle(x),
769 viennacl::traits::opencl_handle(y),
786 template<
typename NumericT,
unsigned int AlignmentV>
799 unsigned int thread_num = 128;
804 viennacl::traits::opencl_handle(d_A),
809 viennacl::traits::opencl_handle(y),
827 template<
typename NumericT,
unsigned int AlignmentV>
842 unsigned int thread_num = 128;
847 viennacl::traits::opencl_handle(d_A),
852 viennacl::traits::opencl_handle(y),
867 template<
typename NumericT,
unsigned int AlignmentV>
880 bool with_alpha_beta = (alpha < NumericT(1) || alpha >
NumericT(1)) || (beta < 0 || beta > 0);
894 std::stringstream ss;
895 ss <<
"vec_mul_" << 1;
898 unsigned int thread_num = 128;
899 unsigned int group_num = 256;
906 A.
handle().opencl_handle(),
907 viennacl::traits::opencl_handle(x),
910 viennacl::traits::opencl_handle(y),
922 A.
handle().opencl_handle(),
923 viennacl::traits::opencl_handle(x),
925 viennacl::traits::opencl_handle(y),
947 template<
typename NumericT,
unsigned int AlignmentV>
964 cl_uint(sp_A.
size1()),
965 cl_uint(sp_A.
size2()),
969 viennacl::traits::opencl_handle(d_A),
974 viennacl::traits::opencl_handle(y),
992 template<
typename NumericT,
unsigned int AlignmentV>
1011 cl_uint(sp_A.
size1()),
1012 cl_uint(sp_A.
size2()),
1016 viennacl::traits::opencl_handle(d_A.lhs()),
1021 viennacl::traits::opencl_handle(y),
1034 template<
typename ScalarT,
typename IndexT>
1047 bool with_alpha_beta = (alpha < ScalarT(1) || alpha > ScalarT(1)) || (beta < 0 || beta > 0);
1061 std::stringstream ss;
1062 ss <<
"vec_mul_" << 1;
1066 unsigned int group_num = 256;
1074 if (with_alpha_beta)
1078 A.
handle().opencl_handle(),
1079 viennacl::traits::opencl_handle(x),
1082 viennacl::traits::opencl_handle(y),
1091 A.
handle().opencl_handle(),
1092 viennacl::traits::opencl_handle(x),
1094 viennacl::traits::opencl_handle(y),
1105 template<
typename NumericT,
unsigned int AlignmentV>
1118 bool with_alpha_beta = (alpha < NumericT(1) || alpha >
NumericT(1)) || (beta < 0 || beta > 0);
1134 if (with_alpha_beta)
1136 A.
handle().opencl_handle(),
1140 viennacl::traits::opencl_handle(x),
1143 viennacl::traits::opencl_handle(y),
1154 A.
handle().opencl_handle(),
1158 viennacl::traits::opencl_handle(x),
1160 viennacl::traits::opencl_handle(y),
1170 template<
typename NumericT,
unsigned int AlignmentV>
1181 A.
handle().opencl_handle(),
1189 viennacl::traits::opencl_handle(d_A),
1194 viennacl::traits::opencl_handle(y),
1203 template<
typename NumericT,
unsigned int AlignmentV>
1216 A.
handle().opencl_handle(),
1224 viennacl::traits::opencl_handle(d_A.lhs()),
1229 viennacl::traits::opencl_handle(y),
const vcl_size_t & size2() const
Returns the number of columns.
vcl_size_t internal_ellnnz() const
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
cl_uint stride
Increment between integers.
static void init(viennacl::ocl::context &ctx)
viennacl::ocl::device const & current_device() const
Returns the current device.
Helper class implementing an array on the host. Default case: No conversion necessary.
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Implementations of NMF operations using OpenCL.
void prod_impl(const matrix_base< NumericT > &mat, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
const handle_type & handle3() const
const vcl_size_t & size1() const
Returns the number of rows.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Represents an OpenCL kernel within ViennaCL.
cl_uint start
Starting value of the integer stride.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
const handle_type & handle() const
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major)
Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices...
A tag class representing a lower triangular matrix.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Main kernel class for generating OpenCL kernels for coordinate_matrix.
vcl_size_t internal_size1() const
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
const handle_type & handle4() const
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
T max(const T &lhs, const T &rhs)
Maximum.
vcl_size_t rows_per_block() const
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
vcl_size_t internal_size1() const
Common implementations shared by OpenCL-based operations.
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
Main kernel class for generating OpenCL kernels for ell_matrix.
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
const handle_type & handle2() const
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Main kernel class for generating OpenCL kernels for compressed_matrix (except solvers).
Sparse matrix class using the ELLPACK format for storing the nonzeros.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
static void init(viennacl::ocl::context &ctx)
OpenCL kernel file for compressed_matrix operations.
A tag class representing an upper triangular matrix.
OpenCL kernel file for ell_matrix operations.
Sparse matrix class using the sliced ELLPACK with parameters C, .
void clear()
Resets all entries to zero.
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
static void init(viennacl::ocl::context &ctx)
Main kernel class for triangular solver OpenCL kernels for compressed_matrix.
Main kernel class for generating OpenCL kernels for ell_matrix.
Common routines for single-threaded or OpenMP-enabled execution on CPU.
OpenCL kernel file for sliced_ell_matrix operations.
vcl_size_t maxnnz() const
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
const handle_type & handle3() const
Returns the OpenCL handle to the group start index array.
OpenCL kernel file for hyb_matrix operations.
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for dense triangular systems. Matlab notation: A \ B.
const handle_type & handle3() const
Returns the OpenCL handle to the row block array.
void clear()
Resets all entries to zero. Does not change the size of the vector.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
static void init(viennacl::ocl::context &ctx)
OpenCL kernel file for vector operations.
void set(vcl_size_t index, U value)
size_type size() const
Returns the length of the vector (cf. std::vector)
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
vcl_size_t ell_nnz() const
A tag class representing a lower triangular matrix with unit diagonal.
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
OpenCL kernel file for coordinate_matrix operations.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
A tag class representing transposed matrices.
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
A sparse square matrix in compressed sparse rows format.
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
const handle_type & handle5() const
void block_inplace_solve(const matrix_expression< const compressed_matrix< NumericT, AlignmentV >, const compressed_matrix< NumericT, AlignmentV >, op_trans > &L, viennacl::backend::mem_handle const &block_indices, vcl_size_t num_blocks, vector_base< NumericT > const &, vector_base< NumericT > &x, viennacl::linalg::unit_lower_tag)
static void init(viennacl::ocl::context &ctx)
const vcl_size_t & blocks1() const
Returns the internal number of row blocks for an adaptive SpMV.
vcl_size_t internal_maxnnz() const
Implementation of the ViennaCL scalar class.
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
const handle_type & handle() const
Returns the memory handle.
static void init(viennacl::ocl::context &ctx)
size_t max_work_group_size() const
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
A tag class representing an upper triangular matrix with unit diagonal.
Main kernel class for generating OpenCL kernels for compressed_compressed_matrix. ...
cl_uint size
Number of values in the stride.
Main kernel class for generating OpenCL kernels for hyb_matrix.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void switch_memory_context(viennacl::context new_ctx)
void row_info(compressed_matrix< NumericT, AlignmentV > const &A, vector_base< NumericT > &x, viennacl::linalg::detail::row_info_types info_selector)