1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
37 #include "boost/numeric/ublas/vector.hpp"
38 #include "boost/numeric/ublas/matrix.hpp"
39 #include "boost/numeric/ublas/matrix_proxy.hpp"
40 #include "boost/numeric/ublas/storage.hpp"
41 #include "boost/numeric/ublas/io.hpp"
42 #include "boost/numeric/ublas/matrix_expression.hpp"
43 #include "boost/numeric/ublas/detail/matrix_assign.hpp"
62 template<
typename T,
typename InputIteratorT>
63 void Print(std::ostream & ostr, InputIteratorT it_begin, InputIteratorT it_end)
66 std::string delimiters =
" ";
67 std::copy(it_begin, it_end, std::ostream_iterator<T>(ostr, delimiters.c_str()));
71 template<
typename VectorT,
typename MatrixT>
73 unsigned int start_ind,
74 std::vector<unsigned int>
const & I,
75 std::vector<unsigned int>
const & J,
78 m.resize(I.size(), J.size(),
false);
81 m(j,i) = con_A_I_J[start_ind + i*I.size() + j];
84 template<
typename VectorT>
86 std::vector<cl_uint> & blocks_ind,
87 std::vector<std::vector<unsigned int> >
const & g_I,
88 std::vector<std::vector<unsigned int> >
const & g_J)
90 typedef typename VectorT::value_type NumericType;
92 std::vector<boost::numeric::ublas::matrix<NumericType> > com_A_I_J(g_I.size());
95 write_to_block(con_A_I_J, blocks_ind[i], g_I[i], g_J[i], com_A_I_J[i]);
96 std::cout << com_A_I_J[i] << std::endl;
100 template<
typename VectorT>
102 std::vector<cl_uint> & block_ind,
103 std::vector<std::vector<unsigned int> >
const & g_J)
105 typedef typename VectorT::value_type NumericType;
107 std::vector<boost::numeric::ublas::vector<NumericType> > com_v(g_J.size());
111 com_v[i].resize(g_J[i].
size());
112 for (
vcl_size_t j = 0; j < g_J[i].size(); ++j)
113 com_v[i](j) = con_v[block_ind[i] + j];
114 std::cout << com_v[i] << std::endl;
130 std::vector<std::vector<unsigned int> >
const & g_J,
132 std::vector<cl_uint> & blocks_ind,
133 std::vector<cl_uint> & matrix_dims)
138 sz +=
static_cast<unsigned int>(g_I[i].size()*g_J[i].size());
139 matrix_dims[2*i] =
static_cast<cl_uint
>(g_I[i].size());
140 matrix_dims[2*i + 1] =
static_cast<cl_uint
>(g_J[i].size());
141 blocks_ind[i+1] = blocks_ind[i] +
static_cast<cl_uint
>(g_I[i].size()*g_J[i].size());
150 template<
typename SizeT>
151 void get_size(std::vector<std::vector<SizeT> >
const & inds,
156 size += static_cast<unsigned int>(inds[i].
size());
164 template<
typename SizeT>
166 std::vector<cl_uint>& start_inds)
169 start_inds[i+1] = start_inds[i] + static_cast<cl_uint>(inds[i].
size());
181 template<
typename MatrixT,
typename NumericT>
183 unsigned int beg_ind,
187 for (
vcl_size_t i = beg_ind; i < A.size1(); ++i)
188 res += A(i, beg_ind-1)*A(i, beg_ind-1);
199 template<
typename MatrixT,
typename VectorT,
typename NumericT>
202 unsigned int col_ind,
203 unsigned int start_ind,
207 for (
unsigned int i = start_ind; i < static_cast<unsigned int>(A.size1()); ++i)
208 res += A(i, col_ind)*v(i);
217 template<
typename MatrixT,
typename VectorT>
220 unsigned int beg_ind)
222 for (
unsigned int i = beg_ind; i < static_cast<unsigned int>(A.size1()); ++i)
223 v(i) = A( i, beg_ind-1);
235 template<
typename MatrixT,
typename VectorT,
typename NumericT>
251 mu = std::sqrt(A(j,j)*A(j, j) + sg);
255 v(j) = -sg/(A(j, j) + mu);
257 b = 2*(v(j)*v(j))/(sg + v(j)*v(j));
270 template<
typename MatrixT,
typename VectorT,
typename NumericT>
272 unsigned int iter_cnt,
279 for (
unsigned int i = iter_cnt; i < static_cast<unsigned int>(A.size2()); ++i)
283 for (
unsigned int j = iter_cnt; j < static_cast<unsigned int>(A.size1()); ++j)
284 A(j, i) -= b*in_prod_res*v(j);
294 template<
typename MatrixT,
typename VectorT>
299 for (
unsigned int i = ind; i < static_cast<unsigned int>(A.size1()); ++i)
310 template<
typename MatrixT,
typename VectorT>
313 typedef typename MatrixT::value_type NumericType;
315 if ((R.size1() > 0) && (R.size2() > 0))
317 VectorT v =
static_cast<VectorT
>(boost::numeric::ublas::zero_vector<NumericType>(R.size1()));
318 b_v =
static_cast<VectorT
>(boost::numeric::ublas::zero_vector<NumericType>(R.size2()));
320 for (
unsigned int i = 0; i < static_cast<unsigned int>(R.size2()); ++i)
337 template<
typename SizeT>
343 if (inds[i].
size() > max_size)
344 max_size = static_cast<SizeT>(inds[i].
size());
354 template<
typename MatrixT,
typename VectorT,
typename NumericT>
361 for (
unsigned int j = ind; j < A.size1(); ++j)
366 res += A(j, ind)*v(j);
376 template<
typename MatrixT,
typename VectorT>
381 typedef typename MatrixT::value_type
NumericT;
390 y(j) -= b_v(i)*inn_prod;
392 y(j) -= b_v(i)*inn_prod*R(j,i);
403 template<
typename MatrixT,
typename VectorT>
411 tmp_v =
static_cast<VectorT
>(
column(A,i));
427 template<
typename NumericT>
428 void block_qr(std::vector<std::vector<unsigned int> > & g_I,
429 std::vector<std::vector<unsigned int> > & g_J,
432 std::vector<cl_uint> & g_is_update,
438 unsigned int bv_size = 0;
439 unsigned int v_size = 0;
442 unsigned int local_r_n = 0;
443 unsigned int local_c_n = 0;
451 std::vector<cl_uint> start_bv_inds(g_I.size() + 1, 0);
452 std::vector<cl_uint> start_v_inds(g_I.size() + 1, 0);
456 std::vector<NumericT> b_v(bv_size,
NumericT(0));
457 std::vector<NumericT> v(v_size,
NumericT(0));
462 static_cast<unsigned int>(
sizeof(
NumericT)*bv_size),
466 static_cast<unsigned int>(
sizeof(
NumericT)*v_size),
470 static_cast<unsigned int>(
sizeof(cl_uint)*g_I.size()),
471 &(start_bv_inds[0]));
474 static_cast<unsigned int>(
sizeof(cl_uint)*g_I.size()),
477 static_cast<unsigned int>(
sizeof(cl_uint)*g_is_update.size()),
490 static_cast<cl_uint
>(g_I.size())));
void householder_vector(MatrixT const &A, unsigned int j, VectorT &v, NumericT &b)
Computation of Householder vector, householder reflection c.f. Gene H. Golub, Charles F...
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
Represents contigious matrices on GPU.
void print_continious_vector(VectorT &con_v, std::vector< cl_uint > &block_ind, std::vector< std::vector< unsigned int > > const &g_J)
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements.
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices.
Represents an OpenCL kernel within ViennaCL.
Implementation of the dense matrix class.
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
OpenCL kernel file for sparse approximate inverse operations.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
void compute_blocks_size(std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
void get_size(std::vector< std::vector< SizeT > > const &inds, SizeT &size)
Computes size of particular container of index set.
void dot_prod(MatrixT const &A, unsigned int beg_ind, NumericT &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
void write_to_block(VectorT &con_A_I_J, unsigned int start_ind, std::vector< unsigned int > const &I, std::vector< unsigned int > const &J, MatrixT &m)
void copy_vector(MatrixT const &A, VectorT &v, unsigned int beg_ind)
Copying part of matrix column.
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
void single_qr(MatrixT &R, VectorT &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224.
void apply_q_trans_vec(MatrixT const &R, VectorT const &b_v, VectorT &y)
Recovery Q from matrix R and vector of betas b_v.
Implementation of a bunch of (small) matrices on GPU. Experimental.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
void Print(std::ostream &ostr, InputIteratorT it_begin, InputIteratorT it_end)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
void store_householder_vector(MatrixT &A, unsigned int ind, VectorT &v)
Storage of vector v in column(A, ind), starting from ind-1 index of a column.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
void init_start_inds(std::vector< std::vector< SizeT > > const &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set.
Implementation of a bunch of vectors on GPU. Experimental.
void custom_inner_prod(MatrixT const &A, VectorT const &v, unsigned int col_ind, unsigned int start_ind, NumericT &res)
Dot prod of particular matrix column with arbitrary vector: A(:, col_ind)
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions.
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements.
Implementations of the OpenCL backend, where all contexts are stored in.
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix.
void get_max_block_size(std::vector< std::vector< SizeT > > const &inds, SizeT &max_size)
Getting max size of rows/columns from container of index set.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void custom_dot_prod(MatrixT const &A, VectorT const &v, unsigned int ind, NumericT &res)
Dot_prod(column(A, ind), v) starting from index ind+1.
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Represents a contiguous vector on the GPU to represent a concatentation of small vectors.
static void init(viennacl::ocl::context &ctx)
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
void apply_q_trans_mat(MatrixT const &R, VectorT const &b_v, MatrixT &A)
Multiplication of Q'*A, where Q is in implicit for lower part of R and vector of betas - b_v...
void block_qr(std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224 performed on GPU.
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
void print_continious_matrix(VectorT &con_A_I_J, std::vector< cl_uint > &blocks_ind, std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J)
void apply_householder_reflection(MatrixT &A, unsigned int iter_cnt, VectorT &v, NumericT b)
Inplace application of Householder vector to a matrix A.
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.