1 #ifndef VIENNACL_LINALG_OPENCL_ITERATIVE_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_ITERATIVE_OPERATIONS_HPP_
50 template<
typename NumericT>
77 template<
typename NumericT>
91 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
102 if (use_nvidia_blocked)
109 buffer_size_per_vector,
121 buffer_size_per_vector,
130 template<
typename NumericT>
140 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
145 unsigned int thread_num = 256;
158 buffer_size_per_vector,
164 template<
typename NumericT>
174 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
178 unsigned int thread_num = 128;
179 unsigned int group_num = 256;
191 A.
handle().opencl_handle(),
195 viennacl::traits::opencl_handle(p),
196 viennacl::traits::opencl_handle(Ap),
199 buffer_size_per_vector,
206 template<
typename NumericT>
216 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
221 unsigned int group_num = 256;
232 A.
handle().opencl_handle(),
233 viennacl::traits::opencl_handle(p),
234 viennacl::traits::opencl_handle(Ap),
238 buffer_size_per_vector,
246 template<
typename NumericT>
256 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
260 unsigned int thread_num = 128;
261 unsigned int group_num = 128;
273 A.
handle().opencl_handle(),
280 viennacl::traits::opencl_handle(p),
281 viennacl::traits::opencl_handle(Ap),
284 buffer_size_per_vector,
294 template<
typename NumericT>
317 cl_uint chunk_size = cl_uint(buffer_chunk_size);
318 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
320 inner_prod_buffer, chunk_size, chunk_offset, vec_size,
325 template<
typename NumericT>
332 (void)buffer_chunk_size;
359 template<
typename NumericT>
376 cl_uint chunk_size = cl_uint(buffer_chunk_size);
377 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
388 if (use_nvidia_blocked)
395 inner_prod_buffer, chunk_size, chunk_offset,
408 inner_prod_buffer, chunk_size, chunk_offset,
418 template<
typename NumericT>
431 cl_uint chunk_size = cl_uint(buffer_chunk_size);
432 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
437 unsigned int thread_num = 256;
450 inner_prod_buffer, chunk_size, chunk_offset,
457 template<
typename NumericT>
470 cl_uint chunk_size = cl_uint(buffer_chunk_size);
471 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
475 unsigned int thread_num = 128;
476 unsigned int group_num = 128;
488 A.
handle().opencl_handle(),
492 viennacl::traits::opencl_handle(p),
493 viennacl::traits::opencl_handle(Ap),
496 inner_prod_buffer, chunk_size, chunk_offset,
504 template<
typename NumericT>
517 cl_uint chunk_size = cl_uint(buffer_chunk_size);
518 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
523 unsigned int group_num = 256;
534 A.
handle().opencl_handle(),
535 viennacl::traits::opencl_handle(p),
536 viennacl::traits::opencl_handle(Ap),
540 inner_prod_buffer, chunk_size, chunk_offset,
549 template<
typename NumericT>
562 cl_uint chunk_size = cl_uint(buffer_chunk_size);
563 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
567 unsigned int thread_num = 256;
568 unsigned int group_num = 128;
580 A.
handle().opencl_handle(),
587 viennacl::traits::opencl_handle(p),
588 viennacl::traits::opencl_handle(Ap),
591 inner_prod_buffer, chunk_size, chunk_offset,
608 template <
typename T>
626 cl_uint size_vk = cl_uint(v_k.
size());
628 cl_uint R_offset = cl_uint(offset_in_R);
629 cl_uint chunk_size = cl_uint(buffer_chunk_size);
630 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
634 inner_prod_buffer, chunk_size,
635 r_dot_vk_buffer, chunk_offset,
641 template <
typename T>
657 cl_uint size_vk = cl_uint(v_k_size);
658 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
659 cl_uint ocl_k = cl_uint(param_k);
660 cl_uint chunk_size = cl_uint(buffer_chunk_size);
662 vi_in_vk_buffer, chunk_size
666 template <
typename T>
685 cl_uint size_vk = cl_uint(v_k_size);
686 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
687 cl_uint ocl_k = cl_uint(param_k);
688 cl_uint chunk_size = cl_uint(buffer_chunk_size);
689 cl_uint ocl_krylov_dim = cl_uint(krylov_dim);
691 vi_in_vk_buffer, chunk_size,
692 R_buffer, ocl_krylov_dim,
698 template <
typename T>
715 cl_uint size_vk = cl_uint(v_k_size);
716 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
717 cl_uint ocl_k = cl_uint(param_k);
720 krylov_basis, size_vk, internal_size_vk,
726 template <
typename T>
740 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
753 if (use_nvidia_blocked)
760 buffer_size_per_vector,
772 buffer_size_per_vector,
780 template <
typename T>
790 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
795 inner_prod_buffer.
clear();
798 unsigned int thread_num = 128;
811 buffer_size_per_vector,
817 template <
typename T>
827 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
834 unsigned int group_num = 128;
840 A.
handle().opencl_handle(),
844 viennacl::traits::opencl_handle(p), start_p,
845 viennacl::traits::opencl_handle(Ap), start_Ap,
848 buffer_size_per_vector,
855 template <
typename T>
865 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
872 unsigned int group_num = 128;
883 A.
handle().opencl_handle(),
884 viennacl::traits::opencl_handle(p), start_p,
885 viennacl::traits::opencl_handle(Ap), start_Ap,
889 buffer_size_per_vector,
897 template <
typename T>
907 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
914 unsigned int group_num = 128;
921 A.
handle().opencl_handle(),
928 viennacl::traits::opencl_handle(p), start_p,
929 viennacl::traits::opencl_handle(Ap), start_Ap,
932 buffer_size_per_vector,
vcl_size_t internal_ellnnz() const
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
viennacl::ocl::device const & current_device() const
Returns the current device.
Main kernel class for generating specialized OpenCL kernels for fast iterative solvers.
Represents an OpenCL device within ViennaCL.
void pipelined_bicgstab_prod(compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Generic size and resize functionality for different vector and matrix types.
const handle_type & handle3() const
const vcl_size_t & size1() const
Returns the number of rows.
Represents an OpenCL kernel within ViennaCL.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
static void init(viennacl::ocl::context &ctx)
const handle_type & handle() const
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
vcl_size_t internal_size1() const
void pipelined_gmres_gram_schmidt_stage2(vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
const handle_type & handle4() const
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
T max(const T &lhs, const T &rhs)
Maximum.
vcl_size_t rows_per_block() const
void pipelined_gmres_normalize_vk(vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
vcl_size_t internal_size1() const
Common implementations shared by OpenCL-based operations.
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
const handle_type & handle2() const
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
OpenCL kernel file for specialized iterative solver kernels.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Implementation of a smart-pointer-like class for handling OpenCL handles.
void pipelined_cg_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer)
result_of::size_type< T >::type start(T const &obj)
void pipelined_bicgstab_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
vcl_size_t maxnnz() const
const handle_type & handle3() const
Returns the OpenCL handle to the group start index array.
void pipelined_gmres_gram_schmidt_stage1(vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void pipelined_bicgstab_update_s(vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
const handle_type & handle3() const
Returns the OpenCL handle to the row block array.
void clear()
Resets all entries to zero. Does not change the size of the vector.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
size_type size() const
Returns the length of the vector (cf. std::vector)
vcl_size_t ell_nnz() const
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
void pipelined_cg_prod(compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
Forward declarations of the implicit_vector_base, vector_base class.
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
const handle_type & handle5() const
void pipelined_gmres_update_result(vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k)
const vcl_size_t & blocks1() const
Returns the internal number of row blocks for an adaptive SpMV.
vcl_size_t internal_maxnnz() const
Implementation of the ViennaCL scalar class.
void pipelined_gmres_prod(compressed_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
Simple enable-if variant that uses the SFINAE pattern.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...