1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_HPP
44 #include "boost/numeric/ublas/vector.hpp"
45 #include "boost/numeric/ublas/matrix.hpp"
46 #include "boost/numeric/ublas/matrix_proxy.hpp"
47 #include "boost/numeric/ublas/vector_proxy.hpp"
48 #include "boost/numeric/ublas/storage.hpp"
49 #include "boost/numeric/ublas/io.hpp"
50 #include "boost/numeric/ublas/lu.hpp"
51 #include "boost/numeric/ublas/triangular.hpp"
52 #include "boost/numeric/ublas/matrix_expression.hpp"
68 #define VIENNACL_SPAI_K_b 20
80 template<
typename SparseVectorT>
83 for (
typename SparseVectorT::const_iterator vec_it = v.begin(); vec_it!= v.end(); ++vec_it)
84 std::cout <<
"[ " << vec_it->first <<
" ]:" << vec_it->second << std::endl;
87 template<
typename DenseMatrixT>
90 for (
int i = 0; i < m.size2(); ++i)
92 for (
int j = 0; j < m.size1(); ++j)
93 std::cout<<m(j, i)<<
" ";
104 template<
typename SparseVectorT,
typename NumericT>
107 for (
typename SparseVectorT::const_iterator v_it = v.begin(); v_it != v.end(); ++v_it)
108 res_v[v_it->first] += b*v_it->second;
119 template<
typename SparseVectorT,
typename NumericT>
121 SparseVectorT
const & v,
125 for (
typename SparseVectorT::const_iterator v_it = v.begin(); v_it != v.end(); ++v_it)
138 template<
typename SparseVectorT>
140 SparseVectorT
const & v,
141 std::vector<unsigned int> & J,
142 std::vector<unsigned int> & I)
155 template<
typename SparseMatrixT,
typename DenseMatrixT>
157 std::vector<unsigned int>
const & J,
158 std::vector<unsigned int> & I,
159 DenseMatrixT & A_out)
161 A_out.resize(I.size(), J.size(),
false);
164 A_out(i,j) = A_in(I[i],J[j]);
180 template<
typename SparseMatrixT,
typename DenseMatrixT,
typename SparseVectorT,
typename VectorT>
182 std::vector<SparseVectorT>
const & A_v_c,
183 std::vector<SparseVectorT>
const & M_v,
184 std::vector<std::vector<unsigned int> >& g_I,
185 std::vector<std::vector<unsigned int> >& g_J,
186 std::vector<DenseMatrixT>& g_A_I_J,
187 std::vector<VectorT>& g_b_v)
189 #ifdef VIENNACL_WITH_OPENMP
190 #pragma omp parallel for
192 for (
long i2 = 0; i2 < static_cast<long>(M_v.size()); ++i2)
210 template<
typename SparseVectorT>
212 std::vector<SparseVectorT>
const & M_v,
213 std::vector<std::vector<unsigned int> > & g_J,
214 std::vector<std::vector<unsigned int> > & g_I)
216 #ifdef VIENNACL_WITH_OPENMP
217 #pragma omp parallel for
219 for (
long i2 = 0; i2 < static_cast<long>(M_v.size()); ++i2)
239 template<
typename NumericT,
unsigned int AlignmentV,
typename SparseVectorT>
241 std::vector<SparseVectorT>
const & A_v_c,
242 std::vector<SparseVectorT>
const & M_v,
243 std::vector<cl_uint> g_is_update,
244 std::vector<std::vector<unsigned int> > & g_I,
245 std::vector<std::vector<unsigned int> > & g_J,
254 block_assembly(A, g_J, g_I, g_A_I_J, g_is_update, is_empty_block);
255 block_qr<NumericT>(g_I, g_J, g_A_I_J, g_bv, g_is_update, ctx);
270 template<
typename NumericT,
typename SparseVectorT>
272 unsigned int start_m_ind,
273 std::vector<unsigned int>
const & J,
276 unsigned int cnt = 0;
278 m[J[i]] = m_in[start_m_ind + cnt++];
297 template<
typename SparseVectorT,
typename NumericT>
299 std::vector<SparseVectorT> & M_v,
300 std::vector<std::vector<unsigned int> >& g_I,
301 std::vector<std::vector<unsigned int> > & g_J,
304 std::vector<SparseVectorT> & g_res,
305 std::vector<cl_uint> & g_is_update,
310 unsigned int y_sz, m_sz;
311 std::vector<cl_uint> y_inds(M_v.size() + 1,
static_cast<cl_uint
>(0));
312 std::vector<cl_uint> m_inds(M_v.size() + 1,
static_cast<cl_uint
>(0));
319 std::vector<NumericT> y_v(y_sz,
NumericT(0));
322 for (
vcl_size_t j = 0; j < g_I[i].size(); ++j)
330 std::vector<NumericT> m_v(m_sz, static_cast<cl_uint>(0));
336 static_cast<unsigned int>(
sizeof(
NumericT)*y_v.size()),
339 static_cast<unsigned int>(
sizeof(
NumericT)*m_v.size()),
342 static_cast<unsigned int>(
sizeof(cl_uint)*(g_I.size() + 1)),
345 static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
353 g_A_I_J_vcl.
handle1(), g_is_update_vcl,
355 static_cast<unsigned int>(M_v.size())));
360 &(m_v[0]), 0, NULL, NULL);
365 for (
long i = 0; i < static_cast<long>(M_v.size()); ++i)
367 if (g_is_update[static_cast<vcl_size_t>(i)])
370 custom_fan_out(m_v, m_inds[static_cast<vcl_size_t>(i)], g_J[static_cast<vcl_size_t>(i)], M_v[static_cast<vcl_size_t>(i)]);
372 compute_spai_residual<SparseVectorT, NumericT>(A_v_c, M_v[
static_cast<vcl_size_t>(i)], static_cast<unsigned int>(i), g_res[
static_cast<vcl_size_t>(i)]);
397 template<
typename SparseVectorT,
typename DenseMatrixT,
typename VectorT>
399 std::vector<DenseMatrixT> & g_R,
400 std::vector<VectorT> & g_b_v,
401 std::vector<std::vector<unsigned int> > & g_I,
402 std::vector<std::vector<unsigned int> > & g_J,
403 std::vector<SparseVectorT> & g_res,
404 std::vector<bool> & g_is_update,
405 std::vector<SparseVectorT> & M_v,
408 typedef typename DenseMatrixT::value_type NumericType;
410 #ifdef VIENNACL_WITH_OPENMP
411 #pragma omp parallel for
413 for (
long i2 = 0; i2 < static_cast<long>(M_v.size()); ++i2)
418 VectorT y = boost::numeric::ublas::zero_vector<NumericType>(g_I[i].size());
420 projectI<VectorT, NumericType>(g_I[i], y,
static_cast<unsigned int>(tag.
getBegInd() + long(i)));
423 VectorT m_new = boost::numeric::ublas::zero_vector<NumericType>(g_R[i].size2());
428 compute_spai_residual<SparseVectorT, NumericType>(A_v_c, M_v[i],
static_cast<unsigned int>(tag.
getBegInd() + long(i)), g_res[i]);
430 NumericType res_norm = 0;
442 template<
typename VectorType>
445 for (
unsigned int i = 0; i < parallel_is_update.size(); ++i)
447 if (parallel_is_update[i])
461 template<
typename SparseMatrixT,
typename SparseVectorT>
463 std::vector<SparseVectorT> & M_v)
465 for (
typename SparseMatrixT::const_iterator1 row_it = M_in.begin1(); row_it!= M_in.end1(); ++row_it)
466 for (
typename SparseMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
467 M_v[static_cast<unsigned int>(col_it.index2())][static_cast<unsigned int>(col_it.index1())] = *col_it;
471 template<
typename SparseMatrixT,
typename SparseVectorT>
473 std::vector<SparseVectorT> & M_v)
475 for (
typename SparseMatrixT::const_iterator1 row_it = M_in.begin1(); row_it!= M_in.end1(); ++row_it)
476 for (
typename SparseMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
477 M_v[static_cast<unsigned int>(col_it.index1())][static_cast<unsigned int>(col_it.index2())] = *col_it;
483 template<
typename SizeT>
485 std::vector<cl_uint> & a)
489 for (
vcl_size_t i = 0; i < ind_set.size(); ++i)
490 for (
vcl_size_t j = 0; j < ind_set[i].size(); ++j)
491 a[cnt++] = static_cast<cl_uint>(ind_set[i][j]);
506 template<
typename NumericT,
unsigned int AlignmentV>
508 std::vector<std::vector<unsigned int> >
const & g_J,
509 std::vector<std::vector<unsigned int> >
const & g_I,
511 std::vector<cl_uint> & g_is_update,
512 bool & is_empty_block)
515 unsigned int sz_I, sz_J, sz_blocks;
516 std::vector<cl_uint> matrix_dims(g_I.size()*2,
static_cast<cl_uint
>(0));
517 std::vector<cl_uint> i_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
518 std::vector<cl_uint> j_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
519 std::vector<cl_uint> blocks_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
526 std::vector<cl_uint> I_set(sz_I, static_cast<cl_uint>(0));
528 std::vector<cl_uint> J_set(sz_J, static_cast<cl_uint>(0));
536 if (I_set.size() > 0 && J_set.size() > 0)
541 std::vector<NumericT> con_A_I_J(sz_blocks,
NumericT(0));
547 static_cast<unsigned int>(
sizeof(
NumericT)*(sz_blocks)),
553 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<cl_uint>(g_I.size())),
559 static_cast<unsigned int>(
sizeof(cl_uint)*(g_I.size() + 1)),
565 static_cast<unsigned int>(
sizeof(cl_uint)*sz_I),
571 static_cast<unsigned int>(
sizeof(cl_uint)*sz_J),
577 static_cast<unsigned int>(
sizeof(cl_uint)*(g_I.size() + 1)),
583 static_cast<unsigned int>(
sizeof(cl_uint)*(g_I.size() + 1)),
588 static_cast<unsigned int>(
sizeof(cl_uint)*g_is_update.size()),
600 static_cast<unsigned int>(g_I.size())));
601 is_empty_block =
false;
604 is_empty_block =
true;
615 template<
typename SparseMatrixT,
typename SparseVectorT>
622 for (
unsigned int i = 0; i < M_v.size(); ++i)
623 for (
typename SparseVectorT::const_iterator vec_it = M_v[i].begin(); vec_it!=M_v[i].end(); ++vec_it)
624 M(vec_it->first, i) = vec_it->second;
628 for (
unsigned int i = 0; i < M_v.size(); ++i)
629 for (
typename SparseVectorT::const_iterator vec_it = M_v[i].begin(); vec_it!=M_v[i].end(); ++vec_it)
630 M(i, vec_it->first) = vec_it->second;
639 template<
typename MatrixT>
642 typedef typename MatrixT::value_type NumericType;
644 std::vector<std::map<vcl_size_t, NumericType> > temp_A(A_in.size2());
645 A.resize(A_in.size2(), A_in.size1(),
false);
647 for (
typename MatrixT::const_iterator1 row_it = A_in.begin1();
648 row_it != A_in.end1();
651 for (
typename MatrixT::const_iterator2 col_it = row_it.begin();
652 col_it != row_it.end();
655 temp_A[col_it.index2()][col_it.index1()] = *col_it;
661 for (
typename std::map<vcl_size_t, NumericType>::const_iterator it = temp_A[i].begin();
662 it != temp_A[i].end();
664 A(i, it->first) = it->second;
685 template<
typename MatrixT>
690 typedef typename MatrixT::value_type
NumericT;
691 typedef typename boost::numeric::ublas::vector<NumericT> VectorType;
693 typedef typename boost::numeric::ublas::matrix<NumericT> DenseMatrixType;
696 unsigned int cur_iter = 0;
699 std::vector<SparseVectorType> A_v_c(M.size2());
700 std::vector<SparseVectorType> M_v(M.size2());
707 go_on = (tag.
getEndInd() <
static_cast<long>(M.size2()));
711 std::vector<bool> g_is_update(l_sz,
true);
720 std::vector<SparseVectorType> l_M_v(l_sz);
732 std::vector<DenseMatrixType> g_A_I_J(l_sz);
733 std::vector<VectorType> g_b_v(l_sz);
734 std::vector<SparseVectorType> g_res(l_sz);
735 std::vector<std::vector<unsigned int> > g_I(l_sz);
736 std::vector<std::vector<unsigned int> > g_J(l_sz);
743 block_set_up(A, A_v_c, l_M_v, g_I, g_J, g_A_I_J, g_b_v);
745 block_update(A, A_v_c, g_res, g_is_update, g_I, g_J, g_b_v, g_A_I_J, tag);
760 M.resize(M.size1(), M.size2(),
false);
774 template<
typename NumericT,
unsigned int AlignmentV>
776 boost::numeric::ublas::compressed_matrix<NumericT>
const & cpu_A,
777 boost::numeric::ublas::compressed_matrix<NumericT> & cpu_M,
785 unsigned int cur_iter = 0;
786 std::vector<cl_uint> g_is_update(cpu_M.size2(),
static_cast<cl_uint
>(1));
789 std::vector<SparseVectorType> A_v_c(cpu_M.size2());
790 std::vector<SparseVectorType> M_v(cpu_M.size2());
793 std::vector<SparseVectorType> g_res(cpu_M.size2());
794 std::vector<std::vector<unsigned int> > g_I(cpu_M.size2());
795 std::vector<std::vector<unsigned int> > g_J(cpu_M.size2());
807 block_set_up(A, A_v_c, M_v, g_is_update, g_I, g_J, g_A_I_J_vcl, g_bv_vcl);
809 block_update(A, A_v_c, g_is_update, g_res, g_J, g_I, g_A_I_J_vcl, g_bv_vcl, tag);
814 least_square_solve<SparseVectorType, NumericT>(A_v_c, M_v, g_I, g_J, g_A_I_J_vcl, g_bv_vcl, g_res, g_is_update, tag,
viennacl::traits::context(A));
816 if (tag.getIsStatic())
821 cpu_M.resize(cpu_M.size1(), cpu_M.size2(),
false);
824 M.
resize(static_cast<unsigned int>(cpu_M.size1()), static_cast<unsigned int>(cpu_M.size2()));
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
void insert_sparse_columns(std::vector< SparseVectorT > const &M_v, SparseMatrixT &M, bool is_right)
Insertion of vectorized matrix column into original sparse matrix.
Represents contigious matrices on GPU.
bool is_all_update(VectorType ¶llel_is_update)
void add_sparse_vectors(SparseVectorT const &v, NumericT b, SparseVectorT &res_v)
Add two sparse vectors res_v = b*v.
Implementations of dense matrix related operations including matrix-vector products.
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements.
void sparse_norm_2(SparseVectorT const &v, NumericT &norm)
Computation of Euclidean norm for sparse vector.
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices.
viennacl::ocl::command_queue & get_queue()
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Implementation of the dense matrix class.
void sparse_transpose(MatrixT const &A_in, MatrixT &A)
Transposition of sparse matrix.
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
OpenCL kernel file for sparse approximate inverse operations.
void build_index_set(std::vector< SparseVectorT > const &A_v_c, SparseVectorT const &v, std::vector< unsigned int > &J, std::vector< unsigned int > &I)
Setting up index set of columns and rows for certain column.
void setBegInd(long beg_ind)
void computeSPAI(MatrixT const &A, MatrixT &M, spai_tag &tag)
Construction of SPAI preconditioner on CPU.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
void compute_spai_residual(std::vector< SparseVectorT > const &A_v_c, SparseVectorT const &v, unsigned int ind, SparseVectorT &res)
Computation of residual res = A*v - e.
void compute_blocks_size(std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
void custom_fan_out(std::vector< NumericT > const &m_in, unsigned int start_m_ind, std::vector< unsigned int > const &J, SparseVectorT &m)
Elicitation of sparse vector m for particular column from m_in - contigious vector for all columns...
void get_size(std::vector< std::vector< SizeT > > const &inds, SizeT &size)
Computes size of particular container of index set.
void least_square_solve(std::vector< SparseVectorT > &A_v_c, std::vector< SparseVectorT > &M_v, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, std::vector< SparseVectorT > &g_res, std::vector< cl_uint > &g_is_update, const spai_tag &tag, viennacl::context ctx)
Solution of Least square problem on GPU.
void vectorize_row_matrix(SparseMatrixT const &M_in, std::vector< SparseVectorT > &M_v)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Implementation of a helper sparse vector class for SPAI. Experimental.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
void print_matrix(DenseMatrixT &m)
void block_update(SparseMatrixT const &A, std::vector< SparseVectorT > const &A_v_c, std::vector< SparseVectorT > &g_res, std::vector< bool > &g_is_update, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, std::vector< VectorT > &g_b_v, std::vector< DenseMatrixT > &g_A_I_J, spai_tag const &tag)
CPU-based dynamic update for SPAI preconditioner.
viennacl::ocl::context const & context() const
viennacl::ocl::handle< cl_command_queue > const & handle() const
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
#define VIENNACL_ERR_CHECK(err)
void single_qr(MatrixT &R, VectorT &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224.
void apply_q_trans_vec(MatrixT const &R, VectorT const &b_v, VectorT &y)
Recovery Q from matrix R and vector of betas b_v.
void block_assembly(viennacl::compressed_matrix< NumericT, AlignmentV > const &A, std::vector< std::vector< unsigned int > > const &g_J, std::vector< std::vector< unsigned int > > const &g_I, block_matrix &g_A_I_J_vcl, std::vector< cl_uint > &g_is_update, bool &is_empty_block)
Assembly of blocks on GPU by a gived set of row indices: g_I and column indices: g_J.
void projectRows(std::vector< SparseVectorT > const &A_v_c, std::vector< unsigned int > const &J, std::vector< unsigned int > &I)
Row projection for matrix A(:,J) -> A(I,J), building index set of non-zero rows.
Implementation of a bunch of (small) matrices on GPU. Experimental.
const OCL_TYPE & get() const
void backwardSolve(MatrixT const &R, VectorT const &y, VectorT &x)
Solution of linear:R*x=y system by backward substitution.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
void init_start_inds(std::vector< std::vector< SizeT > > const &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set.
Implementations of incomplete factorization preconditioners. Convenience header file.
Implementation of a static SPAI. Experimental.
Implementation of the compressed_matrix class.
Implementation of a bunch of vectors on GPU. Experimental.
Implementations of operations using sparse matrices.
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions.
void initProjectSubMatrix(SparseMatrixT const &A_in, std::vector< unsigned int > const &J, std::vector< unsigned int > &I, DenseMatrixT &A_out)
Initializes a dense matrix from a sparse one.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Represents a sparse vector based on std::map
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements.
Provides a QR factorization using a block-based approach.
Implementation of the spai tag holding SPAI configuration parameters. Experimental.
double getResidualNormThreshold() const
#define VIENNACL_SPAI_K_b
Implementations of the OpenCL backend, where all contexts are stored in.
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix.
void block_set_up(SparseMatrixT const &A, std::vector< SparseVectorT > const &A_v_c, std::vector< SparseVectorT > const &M_v, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, std::vector< DenseMatrixT > &g_A_I_J, std::vector< VectorT > &g_b_v)
Setting up blocks and QR factorizing them on CPU.
void setEndInd(long end_ind)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
void write_set_to_array(std::vector< std::vector< SizeT > > const &ind_set, std::vector< cl_uint > &a)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Represents a contiguous vector on the GPU to represent a concatentation of small vectors.
void fanOutVector(VectorT const &m_in, std::vector< unsigned int > const &J, SparseVectorT &m)
Projects solution of LS problem onto original column m.
void buildColumnIndexSet(SparseVectorT const &v, std::vector< unsigned int > &J)
Builds index set of projected columns for current column of preconditioner.
unsigned int getIterationLimit() const
static void init(viennacl::ocl::context &ctx)
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
void print_sparse_vector(SparseVectorT const &v)
void vectorize_column_matrix(SparseMatrixT const &M_in, std::vector< SparseVectorT > &M_v)
Solution of Least square problem on CPU.
A sparse square matrix in compressed sparse rows format.
T min(const T &lhs, const T &rhs)
Minimum.
void index_set_up(std::vector< SparseVectorT > const &A_v_c, std::vector< SparseVectorT > const &M_v, std::vector< std::vector< unsigned int > > &g_J, std::vector< std::vector< unsigned int > > &g_I)
Setting up index set of columns and rows for all columns.
Implementation of the ViennaCL scalar class.
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
Implementation of a dynamic SPAI. Provides the routines for automatic pattern updates Experimental...
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.