1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
38 #include "boost/numeric/ublas/vector.hpp"
39 #include "boost/numeric/ublas/matrix.hpp"
40 #include "boost/numeric/ublas/matrix_proxy.hpp"
41 #include "boost/numeric/ublas/vector_proxy.hpp"
42 #include "boost/numeric/ublas/storage.hpp"
43 #include "boost/numeric/ublas/io.hpp"
44 #include "boost/numeric/ublas/lu.hpp"
45 #include "boost/numeric/ublas/triangular.hpp"
46 #include "boost/numeric/ublas/matrix_expression.hpp"
79 template<
typename T1,
typename T2>
80 bool operator()(std::pair<T1, T2>
const & left, std::pair<T1, T2>
const & right)
82 return static_cast<double>(left.second) > static_cast<double>(right.second);
93 template<
typename MatrixT>
98 typedef typename MatrixT::value_type NumericType;
100 vcl_size_t row_n = R_n.size1() - (A.size1() - R.size2());
101 MatrixT C = boost::numeric::ublas::zero_matrix<NumericType>(R.size1() + row_n, R.size2() + A.size2());
107 R.size2() + A.size2())) +=
111 if (R_n.size1() > 0 && R_n.size2() > 0)
123 template<
typename VectorT>
127 typedef typename VectorT::value_type NumericType;
129 VectorT w = boost::numeric::ublas::zero_vector<NumericType>(v.size() + v_n.size());
140 template<
typename SparseVectorT,
typename NumericT>
144 for (
typename SparseVectorT::const_iterator vec_it = v.begin(); vec_it != v.end(); ++vec_it)
145 norm += (vec_it->second)*(vec_it->second);
147 norm = std::sqrt(norm);
156 template<
typename SparseVectorT,
typename NumericT>
158 SparseVectorT
const &
v2,
161 typename SparseVectorT::const_iterator v_it1 = v1.begin();
162 typename SparseVectorT::const_iterator v_it2 = v2.begin();
164 while ((v_it1 != v1.end())&&(v_it2 != v2.end()))
166 if (v_it1->first == v_it2->first)
168 res_v += (v_it1->second)*(v_it2->second);
172 else if (v_it1->first < v_it2->first)
187 template<
typename SparseVectorT,
typename NumericT>
189 SparseVectorT
const & res,
190 std::vector<unsigned int> & J,
191 std::vector<unsigned int> & J_u,
194 std::vector<std::pair<unsigned int, NumericT> > p;
198 for (
typename SparseVectorT::const_iterator res_it = res.begin(); res_it != res.end(); ++res_it)
205 p.push_back(std::pair<unsigned int, NumericT>(res_it->first, (inprod*inprod)/(norm2*norm2)));
210 while ((cur_size < J.size()) && (p.size() > 0))
212 J_u.push_back(p[0].first);
217 return (cur_size > 0);
227 template<
typename SparseVectorT>
229 std::vector<unsigned int>
const & I,
230 std::vector<unsigned int>
const & J_n,
231 std::vector<unsigned int> & I_n)
235 for (
typename SparseVectorT::const_iterator col_it = A_v_c[J_n[i]].begin(); col_it!=A_v_c[J_n[i]].end(); ++col_it)
238 I_n.push_back(col_it->first);
249 template<
typename MatrixT>
251 MatrixT
const & A_I_J_u,
254 typedef typename MatrixT::value_type NumericType;
256 vcl_size_t row_n1 = A_I_J_u.size1() - A_I_J.size2();
261 MatrixT C = boost::numeric::ublas::zero_matrix<NumericType>(row_n, col_n);
287 template<
typename SparseMatrixT,
288 typename SparseVectorT,
289 typename DenseMatrixT,
292 std::vector<SparseVectorT>
const & A_v_c,
293 std::vector<SparseVectorT> & g_res,
294 std::vector<bool> & g_is_update,
295 std::vector<std::vector<unsigned int> >& g_I,
296 std::vector<std::vector<unsigned int> >& g_J,
297 std::vector<VectorT> & g_b_v,
298 std::vector<DenseMatrixT> & g_A_I_J,
301 typedef typename DenseMatrixT::value_type NumericType;
304 std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
305 std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
306 std::vector<DenseMatrixT> g_A_I_J_u(g_J.size());
307 std::vector<DenseMatrixT> g_A_I_u_J_u(g_J.size());
308 std::vector<VectorT> g_b_v_u(g_J.size());
310 #ifdef VIENNACL_WITH_OPENMP
311 #pragma omp parallel for
313 for (
long i = 0; i < static_cast<long>(g_J.size()); ++i)
315 if (g_is_update[static_cast<vcl_size_t>(i)])
317 if (buildAugmentedIndexSet<SparseVectorT, NumericType>(A_v_c, g_res[static_cast<vcl_size_t>(i)], g_J[
static_cast<vcl_size_t>(i)], g_J_u[static_cast<vcl_size_t>(i)], tag))
320 initProjectSubMatrix(A, g_J_u[static_cast<vcl_size_t>(i)], g_I[static_cast<vcl_size_t>(i)], g_A_I_J_u[static_cast<vcl_size_t>(i)]);
322 apply_q_trans_mat(g_A_I_J[static_cast<vcl_size_t>(i)], g_b_v[static_cast<vcl_size_t>(i)], g_A_I_J_u[static_cast<vcl_size_t>(i)]);
324 buildNewRowSet(A_v_c, g_I[static_cast<vcl_size_t>(i)], g_J_u[static_cast<vcl_size_t>(i)], g_I_u[static_cast<vcl_size_t>(i)]);
325 initProjectSubMatrix(A, g_J_u[static_cast<vcl_size_t>(i)], g_I_u[static_cast<vcl_size_t>(i)], g_A_I_u_J_u[static_cast<vcl_size_t>(i)]);
327 QRBlockComposition(g_A_I_J[static_cast<vcl_size_t>(i)], g_A_I_J_u[static_cast<vcl_size_t>(i)], g_A_I_u_J_u[static_cast<vcl_size_t>(i)]);
329 single_qr(g_A_I_u_J_u[static_cast<vcl_size_t>(i)], g_b_v_u[static_cast<vcl_size_t>(i)]);
331 composeNewR(g_A_I_J_u[static_cast<vcl_size_t>(i)], g_A_I_u_J_u[static_cast<vcl_size_t>(i)], g_A_I_J[static_cast<vcl_size_t>(i)]);
332 composeNewVector(g_b_v_u[static_cast<vcl_size_t>(i)], g_b_v[static_cast<vcl_size_t>(i)]);
339 g_is_update[
static_cast<vcl_size_t>(i)] =
false;
360 template<
typename NumericT>
362 std::vector<std::vector<unsigned int> >
const & g_I,
366 std::vector<cl_uint> & g_is_update,
370 unsigned int local_r_n = 0;
371 unsigned int local_c_n = 0;
372 unsigned int sz_blocks = 0;
378 std::vector<cl_uint> matrix_dims(g_I.size()*2,
static_cast<cl_uint
>(0));
379 std::vector<cl_uint> blocks_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
384 static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
395 static_cast<cl_uint
>(g_I.size())));
405 template<
typename SizeT>
407 std::vector<std::vector<SizeT> >
const & g_J,
408 std::vector<std::vector<SizeT> >
const & g_I_u,
409 std::vector<std::vector<SizeT> > & g_I_q)
411 #ifdef VIENNACL_WITH_OPENMP
412 #pragma omp parallel for
414 for (
long i = 0; i < static_cast<long>(g_I.size()); ++i)
416 for (
vcl_size_t j = g_J[static_cast<vcl_size_t>(i)].size(); j < g_I[static_cast<vcl_size_t>(i)].
size(); ++j)
417 g_I_q[static_cast<vcl_size_t>(i)].push_back(g_I[static_cast<vcl_size_t>(i)][j]);
419 for (
vcl_size_t j = 0; j < g_I_u[static_cast<vcl_size_t>(i)].
size(); ++j)
420 g_I_q[static_cast<vcl_size_t>(i)].push_back(g_I_u[static_cast<vcl_size_t>(i)][j]);
438 template<
typename NumericT>
440 std::vector<std::vector<unsigned int> >
const& g_I,
441 std::vector<std::vector<unsigned int> >
const& g_J_u,
442 std::vector<std::vector<unsigned int> >
const& g_I_u,
443 std::vector<std::vector<unsigned int> >& g_I_q,
447 std::vector<cl_uint> & g_is_update,
455 unsigned int sz_blocks;
456 std::vector<cl_uint> matrix_dims(g_I.size()*2,
static_cast<cl_uint
>(0));
457 std::vector<cl_uint> blocks_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
461 std::vector<NumericT> con_A_I_J_q(sz_blocks, static_cast<NumericT>(0));
466 static_cast<unsigned int>(
sizeof(
NumericT)*sz_blocks),
471 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
476 static_cast<unsigned int>(
sizeof(cl_uint)*static_cast<unsigned int>(g_I.size() + 1)),
481 static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
501 static_cast<unsigned int>(g_I.size())));
513 static_cast<unsigned int>(g_I.size())));
532 template<
typename NumericT>
533 void assemble_r(std::vector<std::vector<unsigned int> > & g_I,
534 std::vector<std::vector<unsigned int> > & g_J,
540 std::vector<cl_uint> & g_is_update,
544 std::vector<cl_uint> matrix_dims(g_I.size()*2,
static_cast<cl_uint
>(0));
545 std::vector<cl_uint> blocks_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
546 std::vector<cl_uint> start_bv_r_inds(g_I.size() + 1, 0);
547 unsigned int sz_blocks, bv_size;
553 std::vector<NumericT> con_A_I_J_r(sz_blocks, static_cast<NumericT>(0));
554 std::vector<NumericT> b_v_r(bv_size, static_cast<NumericT>(0));
559 static_cast<unsigned int>(
sizeof(
NumericT)*sz_blocks),
564 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
569 static_cast<unsigned int>(
sizeof(cl_uint)*static_cast<unsigned int>(g_I.size() + 1)),
574 static_cast<unsigned int>(
sizeof(
NumericT)*bv_size),
579 static_cast<unsigned int>(
sizeof(cl_uint)*(g_I.size() + 1)),
580 &(start_bv_r_inds[0]));
584 static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
595 g_is_update_vcl,
static_cast<cl_uint
>(g_I.size())));
599 bv_assembly_kernel.global_work_size(0, 256);
603 static_cast<cl_uint
>(g_I.size())));
624 template<
typename NumericT,
unsigned int AlignmentV,
typename SparseVectorT>
626 std::vector<SparseVectorT>
const & A_v_c,
627 std::vector<cl_uint> & g_is_update,
628 std::vector<SparseVectorT> & g_res,
629 std::vector<std::vector<unsigned int> > & g_J,
630 std::vector<std::vector<unsigned int> > & g_I,
637 std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
639 std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
641 std::vector<std::vector<unsigned int> > g_I_q(g_J.size());
650 #ifdef VIENNACL_WITH_OPENMP
651 #pragma omp parallel for
653 for (
long i = 0; i < static_cast<long>(g_J.size()); ++i)
655 if (g_is_update[static_cast<vcl_size_t>(i)])
657 if (buildAugmentedIndexSet<SparseVectorT, NumericT>(A_v_c, g_res[static_cast<vcl_size_t>(i)], g_J[
static_cast<vcl_size_t>(i)], g_J_u[static_cast<vcl_size_t>(i)], tag))
658 buildNewRowSet(A_v_c, g_I[static_cast<vcl_size_t>(i)], g_J_u[static_cast<vcl_size_t>(i)], g_I_u[static_cast<vcl_size_t>(i)]);
662 block_assembly(A, g_J_u, g_I, g_A_I_J_u_vcl, g_is_update, is_empty_block);
664 block_q_multiplication<NumericT>(g_J_u, g_I, g_A_I_J_vcl, g_bv_vcl, g_A_I_J_u_vcl, g_is_update, ctx);
666 block_assembly(A, g_J_u, g_I_u, g_A_I_u_J_u_vcl, g_is_update, is_empty_block);
667 assemble_qr_block<NumericT>(g_J, g_I, g_J_u, g_I_u, g_I_q, g_A_I_J_u_vcl, g_A_I_J_vcl.handle1(),
668 g_A_I_u_J_u_vcl, g_is_update, is_empty_block, ctx);
670 block_qr<NumericT>(g_I_q, g_J_u, g_A_I_u_J_u_vcl, g_bv_u_vcl, g_is_update, ctx);
672 #ifdef VIENNACL_WITH_OPENMP
673 #pragma omp parallel for
675 for (
long i = 0; i < static_cast<long>(g_J.size()); ++i)
680 assemble_r<NumericT>(g_I, g_J, g_A_I_J_vcl, g_A_I_J_u_vcl, g_A_I_u_J_u_vcl, g_bv_vcl, g_bv_u_vcl, g_is_update, ctx);
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
bool buildAugmentedIndexSet(std::vector< SparseVectorT > const &A_v_c, SparseVectorT const &res, std::vector< unsigned int > &J, std::vector< unsigned int > &J_u, spai_tag const &tag)
Building a new set of column indices J_u, cf. Kallischko dissertation p.31.
Represents contigious matrices on GPU.
Implementations of dense matrix related operations including matrix-vector products.
Helper functor for comparing std::pair<> based on the second member.
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements.
void sparse_norm_2(SparseVectorT const &v, NumericT &norm)
Computation of Euclidean norm for sparse vector.
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
bool operator()(std::pair< T1, T2 > const &left, std::pair< T1, T2 > const &right)
Represents an OpenCL kernel within ViennaCL.
Implementation of the dense matrix class.
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
OpenCL kernel file for sparse approximate inverse operations.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
void compute_blocks_size(std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
void get_size(std::vector< std::vector< SizeT > > const &inds, SizeT &size)
Computes size of particular container of index set.
void assemble_qr_row_inds(std::vector< std::vector< SizeT > > const &g_I, std::vector< std::vector< SizeT > > const &g_J, std::vector< std::vector< SizeT > > const &g_I_u, std::vector< std::vector< SizeT > > &g_I_q)
Assembly of container of index row sets: I_q, row indices for new "QR block".
Main implementation of SPAI (not FSPAI). Experimental.
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
void block_update(SparseMatrixT const &A, std::vector< SparseVectorT > const &A_v_c, std::vector< SparseVectorT > &g_res, std::vector< bool > &g_is_update, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, std::vector< VectorT > &g_b_v, std::vector< DenseMatrixT > &g_A_I_J, spai_tag const &tag)
CPU-based dynamic update for SPAI preconditioner.
viennacl::ocl::context const & context() const
bool isInIndexSet(std::vector< SizeT > const &J, SizeT ind)
Determines if element ind is in set {J}.
void buildNewRowSet(std::vector< SparseVectorT > const &A_v_c, std::vector< unsigned int > const &I, std::vector< unsigned int > const &J_n, std::vector< unsigned int > &I_n)
Building a new indices to current set of row indices I_n, cf. Kallischko dissertation p...
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
void single_qr(MatrixT &R, VectorT &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224.
void block_assembly(viennacl::compressed_matrix< NumericT, AlignmentV > const &A, std::vector< std::vector< unsigned int > > const &g_J, std::vector< std::vector< unsigned int > > const &g_I, block_matrix &g_A_I_J_vcl, std::vector< cl_uint > &g_is_update, bool &is_empty_block)
Assembly of blocks on GPU by a gived set of row indices: g_I and column indices: g_J.
viennacl::vector< float > v1
Implementation of a bunch of (small) matrices on GPU. Experimental.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
void insert(MatrixType &matrix, long row, long col, ScalarType value)
Implementation of a simultaneous QR factorization of multiple matrices. Experimental.
void composeNewVector(VectorT const &v_n, VectorT &v)
Composition of new vector of coefficients beta from QR factorizations(necessary for Q recovery) ...
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
void init_start_inds(std::vector< std::vector< SizeT > > const &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set.
Implementations of incomplete factorization preconditioners. Convenience header file.
Implementation of a static SPAI. Experimental.
void assemble_r(std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_matrix &g_A_I_J_u_vcl, block_matrix &g_A_I_u_J_u_vcl, block_vector &g_bv_vcl, block_vector &g_bv_vcl_u, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Performs assembly for new R matrix on GPU.
Implementation of the compressed_matrix class.
Implementation of a bunch of vectors on GPU. Experimental.
Implementations of operations using sparse matrices.
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions.
void assemble_qr_block(std::vector< std::vector< unsigned int > > const &g_J, std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J_u, std::vector< std::vector< unsigned int > > const &g_I_u, std::vector< std::vector< unsigned int > > &g_I_q, block_matrix &g_A_I_J_u_vcl, viennacl::ocl::handle< cl_mem > &matrix_dimensions, block_matrix &g_A_I_u_J_u_vcl, std::vector< cl_uint > &g_is_update, bool is_empty_block, viennacl::context ctx)
Performs assembly for new QR block.
void initProjectSubMatrix(SparseMatrixT const &A_in, std::vector< unsigned int > const &J, std::vector< unsigned int > &I, DenseMatrixT &A_out)
Initializes a dense matrix from a sparse one.
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements.
Implementation of the spai tag holding SPAI configuration parameters. Experimental.
The conjugate gradient method is implemented here.
Implementations of the OpenCL backend, where all contexts are stored in.
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix.
void sparse_inner_prod(SparseVectorT const &v1, SparseVectorT const &v2, NumericT &res_v)
Dot product of two sparse vectors.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
void get_max_block_size(std::vector< std::vector< SizeT > > const &inds, SizeT &max_size)
Getting max size of rows/columns from container of index set.
viennacl::vector< int > v2
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
void block_q_multiplication(std::vector< std::vector< unsigned int > > const &g_J_u, std::vector< std::vector< unsigned int > > const &g_I, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, block_matrix &g_A_I_J_u_vcl, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Performs multiplication Q'*A(I, \tilde J) on GPU.
Represents a contiguous vector on the GPU to represent a concatentation of small vectors.
static void init(viennacl::ocl::context &ctx)
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
void apply_q_trans_mat(MatrixT const &R, VectorT const &b_v, MatrixT &A)
Multiplication of Q'*A, where Q is in implicit for lower part of R and vector of betas - b_v...
A sparse square matrix in compressed sparse rows format.
Implementation of the ViennaCL scalar class.
void QRBlockComposition(MatrixT const &A_I_J, MatrixT const &A_I_J_u, MatrixT &A_I_u_J_u)
Composition of new block for QR factorization cf. Kallischko dissertation p.82, figure 4...
void composeNewR(MatrixT const &A, MatrixT const &R_n, MatrixT &R)
Composition of new matrix R, that is going to be used in Least Square problem solving.
double getResidualThreshold() const
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.