doc/detail_2spai_2qr_8hpp_source.html

 #ifndef VIENNACL_LINALG_DETAIL_SPAI_QR_HPP

 #define VIENNACL_LINALG_DETAIL_SPAI_QR_HPP


 /* =========================================================================

    Copyright (c) 2010-2016, Institute for Microelectronics,

                             Institute for Analysis and Scientific Computing,

                             TU Wien.

    Portions of this software are copyright by UChicago Argonne, LLC.


                             -----------------

                   ViennaCL - The Vienna Computing Library

                             -----------------


    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at


    (A list of authors and contributors can be found in the manual)


    License:         MIT (X11), see file LICENSE in the base directory

 ============================================================================= */


 #include <utility>

 #include <iostream>

 #include <fstream>

 #include <string>

 #include <algorithm>

 #include <vector>

 #include <math.h>

 #include <cmath>

 #include <sstream>

 #include "viennacl/ocl/backend.hpp"

 #include "boost/numeric/ublas/vector.hpp"

 #include "boost/numeric/ublas/matrix.hpp"

 #include "boost/numeric/ublas/matrix_proxy.hpp"

 #include "boost/numeric/ublas/storage.hpp"

 #include "boost/numeric/ublas/io.hpp"

 #include "boost/numeric/ublas/matrix_expression.hpp"

 #include "boost/numeric/ublas/detail/matrix_assign.hpp"


 #include "viennacl/vector.hpp"

 #include "viennacl/matrix.hpp"


 #include "viennacl/linalg/detail/spai/block_matrix.hpp"

 #include "viennacl/linalg/detail/spai/block_vector.hpp"

 #include "viennacl/linalg/opencl/kernels/spai.hpp"


 namespace viennacl

 {

 namespace linalg

 {

 namespace detail

 {

 namespace spai

 {


 //********** DEBUG FUNCTIONS *****************//

 template< typename T, typename InputIteratorT>

 void Print(std::ostream & ostr, InputIteratorT it_begin, InputIteratorT it_end)

 {

   //std::ostream_iterator<int> it_os(ostr, delimiter);

   std::string delimiters = " ";

   std::copy(it_begin, it_end, std::ostream_iterator<T>(ostr, delimiters.c_str()));

   ostr << std::endl;

 }


 template<typename VectorT, typename MatrixT>

 void write_to_block(VectorT & con_A_I_J,

                     unsigned int start_ind,

                     std::vector<unsigned int> const & I,

                     std::vector<unsigned int> const & J,

                     MatrixT& m)

 {

   m.resize(I.size(), J.size(), false);

   for (vcl_size_t i = 0; i < J.size(); ++i)

     for (vcl_size_t j = 0; j < I.size(); ++j)

       m(j,i) = con_A_I_J[start_ind + i*I.size() + j];

 }


 template<typename VectorT>

 void print_continious_matrix(VectorT & con_A_I_J,

                              std::vector<cl_uint> & blocks_ind,

                              std::vector<std::vector<unsigned int> > const & g_I,

                              std::vector<std::vector<unsigned int> > const & g_J)

 {

   typedef typename VectorT::value_type        NumericType;


   std::vector<boost::numeric::ublas::matrix<NumericType> > com_A_I_J(g_I.size());

   for (vcl_size_t i = 0; i < g_I.size(); ++i)

   {

     write_to_block(con_A_I_J, blocks_ind[i], g_I[i], g_J[i], com_A_I_J[i]);

     std::cout << com_A_I_J[i] << std::endl;

   }

 }


 template<typename VectorT>

 void print_continious_vector(VectorT & con_v,

                              std::vector<cl_uint> & block_ind,

                              std::vector<std::vector<unsigned int> > const & g_J)

 {

   typedef typename VectorT::value_type     NumericType;


   std::vector<boost::numeric::ublas::vector<NumericType> > com_v(g_J.size());

   //Print<ScalarType>(std::cout, con_v.begin(), con_v.end());

   for (vcl_size_t i = 0; i < g_J.size(); ++i)

   {

     com_v[i].resize(g_J[i].size());

     for (vcl_size_t j = 0; j < g_J[i].size(); ++j)

       com_v[i](j) = con_v[block_ind[i] + j];

     std::cout << com_v[i] << std::endl;

   }

 }


 inline void compute_blocks_size(std::vector<std::vector<unsigned int> > const & g_I,

                                 std::vector<std::vector<unsigned int> > const & g_J,

                                 unsigned int& sz,

                                 std::vector<cl_uint> & blocks_ind,

                                 std::vector<cl_uint> & matrix_dims)

 {

   sz = 0;

   for (vcl_size_t i = 0; i < g_I.size(); ++i)

   {

     sz += static_cast<unsigned int>(g_I[i].size()*g_J[i].size());

     matrix_dims[2*i] = static_cast<cl_uint>(g_I[i].size());

     matrix_dims[2*i + 1] = static_cast<cl_uint>(g_J[i].size());

     blocks_ind[i+1] = blocks_ind[i] + static_cast<cl_uint>(g_I[i].size()*g_J[i].size());

   }

 }


 template<typename SizeT>

 void get_size(std::vector<std::vector<SizeT> > const & inds,

               SizeT & size)

 {

   size = 0;

   for (vcl_size_t i = 0; i < inds.size(); ++i)

     size += static_cast<unsigned int>(inds[i].size());

 }


 template<typename SizeT>

 void init_start_inds(std::vector<std::vector<SizeT> > const & inds,

                      std::vector<cl_uint>& start_inds)

 {

   for (vcl_size_t i = 0; i < inds.size(); ++i)

     start_inds[i+1] = start_inds[i] + static_cast<cl_uint>(inds[i].size());

 }


 //*************************************  QR FUNCTIONS  ***************************************//


 template<typename MatrixT, typename NumericT>

 void dot_prod(MatrixT const & A,

               unsigned int beg_ind,

               NumericT & res)

 {

   res = NumericT(0);

   for (vcl_size_t i = beg_ind; i < A.size1(); ++i)

     res += A(i, beg_ind-1)*A(i, beg_ind-1);

 }


 template<typename MatrixT, typename VectorT, typename NumericT>

 void custom_inner_prod(MatrixT const & A,

                        VectorT const & v,

                        unsigned int col_ind,

                        unsigned int start_ind,

                        NumericT & res)

 {

   res = static_cast<NumericT>(0);

   for (unsigned int i = start_ind; i < static_cast<unsigned int>(A.size1()); ++i)

     res += A(i, col_ind)*v(i);

 }


 template<typename MatrixT, typename VectorT>

 void copy_vector(MatrixT const & A,

                  VectorT       & v,

                  unsigned int beg_ind)

 {

   for (unsigned int i = beg_ind; i < static_cast<unsigned int>(A.size1()); ++i)

     v(i) = A( i, beg_ind-1);

 }


 //householder reflection c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.210

 template<typename MatrixT, typename VectorT, typename NumericT>

 void householder_vector(MatrixT const & A,

                         unsigned int j,

                         VectorT & v,

                         NumericT & b)

 {

   NumericT sg;


   dot_prod(A, j+1, sg);

   copy_vector(A, v, j+1);

   NumericT mu;

   v(j) = static_cast<NumericT>(1.0);

   if (!sg)

     b = 0;

   else

   {

     mu = std::sqrt(A(j,j)*A(j, j) + sg);

     if (A(j, j) <= 0)

       v(j) = A(j, j) - mu;

     else

       v(j) = -sg/(A(j, j) + mu);


     b = 2*(v(j)*v(j))/(sg + v(j)*v(j));

     v = v/v(j);

   }

 }


 template<typename MatrixT, typename VectorT, typename NumericT>

 void apply_householder_reflection(MatrixT & A,

                                   unsigned int iter_cnt,

                                   VectorT & v,

                                   NumericT b)

 {

   //update every column of matrix A

   NumericT in_prod_res;


   for (unsigned int i = iter_cnt; i < static_cast<unsigned int>(A.size2()); ++i)

   {

     //update each column in a fashion: ai = ai - b*v*(v'*ai)

     custom_inner_prod(A, v, i, iter_cnt, in_prod_res);

     for (unsigned int j = iter_cnt; j < static_cast<unsigned int>(A.size1()); ++j)

       A(j, i) -= b*in_prod_res*v(j);

   }

 }


 template<typename MatrixT, typename VectorT>

 void store_householder_vector(MatrixT & A,

                               unsigned int ind,

                               VectorT & v)

 {

   for (unsigned int i = ind; i < static_cast<unsigned int>(A.size1()); ++i)

     A(i, ind-1) = v(i);

 }


 //QR algorithm

 template<typename MatrixT, typename VectorT>

 void single_qr(MatrixT & R, VectorT & b_v)

 {

   typedef typename MatrixT::value_type     NumericType;


   if ((R.size1() > 0) && (R.size2() > 0))

   {

     VectorT v = static_cast<VectorT>(boost::numeric::ublas::zero_vector<NumericType>(R.size1()));

     b_v = static_cast<VectorT>(boost::numeric::ublas::zero_vector<NumericType>(R.size2()));


     for (unsigned int i = 0; i < static_cast<unsigned int>(R.size2()); ++i)

     {

       householder_vector(R, i, v, b_v[i]);

       apply_householder_reflection(R, i, v, b_v[i]);

       if (i < R.size1())

         store_householder_vector(R, i+1, v);

     }

   }

 }


 //********************** HELP FUNCTIONS FOR GPU-based QR factorization *************************//


 template<typename SizeT>

 void get_max_block_size(std::vector<std::vector<SizeT> > const & inds,

                         SizeT & max_size)

 {

   max_size = 0;

   for (vcl_size_t i = 0; i < inds.size(); ++i)

     if (inds[i].size() > max_size)

       max_size = static_cast<SizeT>(inds[i].size());

 }


 template<typename MatrixT, typename VectorT, typename NumericT>

 void custom_dot_prod(MatrixT const & A,

                      VectorT const & v,

                      unsigned int ind,

                      NumericT & res)

 {

   res = static_cast<NumericT>(0);

   for (unsigned int j = ind; j < A.size1(); ++j)

   {

     if (j == ind)

       res += v(j);

     else

       res += A(j, ind)*v(j);

   }

 }


 template<typename MatrixT, typename VectorT>

 void apply_q_trans_vec(MatrixT const & R,

                        VectorT const & b_v,

                        VectorT       & y)

 {

   typedef typename MatrixT::value_type     NumericT;


   NumericT inn_prod = NumericT(0);

   for (vcl_size_t i = 0; i < R.size2(); ++i)

   {

     custom_dot_prod(R, y, static_cast<unsigned int>(i), inn_prod);

     for (vcl_size_t j = i; j < R.size1(); ++j)

     {

       if (i == j)

         y(j) -= b_v(i)*inn_prod;

       else

         y(j) -= b_v(i)*inn_prod*R(j,i);

     }

   }

 }


 template<typename MatrixT, typename VectorT>

 void apply_q_trans_mat(MatrixT const & R,

                        VectorT const & b_v,

                        MatrixT       & A)

 {

   VectorT tmp_v;

   for (vcl_size_t i = 0; i < A.size2(); ++i)

   {

     tmp_v = static_cast<VectorT>(column(A,i));

     apply_q_trans_vec(R, b_v, tmp_v);

     column(A,i) = tmp_v;

   }

 }


 //parallel QR for GPU

 template<typename NumericT>

 void block_qr(std::vector<std::vector<unsigned int> > & g_I,

               std::vector<std::vector<unsigned int> > & g_J,

               block_matrix & g_A_I_J_vcl,

               block_vector & g_bv_vcl,

               std::vector<cl_uint> & g_is_update,

               viennacl::context ctx)

 {

   viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context());


   //typedef typename MatrixType::value_type ScalarType;

   unsigned int bv_size = 0;

   unsigned int v_size = 0;

   //set up arguments for GPU

   //find maximum size of rows/columns

   unsigned int local_r_n = 0;

   unsigned int local_c_n = 0;

   //find max size for blocks

   get_max_block_size(g_I, local_r_n);

   get_max_block_size(g_J, local_c_n);

   //get size

   get_size(g_J, bv_size);

   get_size(g_I, v_size);

   //get start indices

   std::vector<cl_uint> start_bv_inds(g_I.size() + 1, 0);

   std::vector<cl_uint> start_v_inds(g_I.size() + 1, 0);

   init_start_inds(g_J, start_bv_inds);

   init_start_inds(g_I, start_v_inds);

   //init arrays

   std::vector<NumericT> b_v(bv_size, NumericT(0));

   std::vector<NumericT>   v(v_size,  NumericT(0));

   //call qr program

   block_vector v_vcl;


   g_bv_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,

                                                static_cast<unsigned int>(sizeof(NumericT)*bv_size),

                                                &(b_v[0]));


   v_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,

                                             static_cast<unsigned int>(sizeof(NumericT)*v_size),

                                             &(v[0]));

   //the same as j_start_inds

   g_bv_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,

                                                 static_cast<unsigned int>(sizeof(cl_uint)*g_I.size()),

                                                 &(start_bv_inds[0]));


   v_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,

                                              static_cast<unsigned int>(sizeof(cl_uint)*g_I.size()),

                                              &(start_v_inds[0]));

   viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE,

                                                                            static_cast<unsigned int>(sizeof(cl_uint)*g_is_update.size()),

                                                                            &(g_is_update[0]));

   //local memory

   //viennacl::ocl::enqueue(k(vcl_vec, size, viennacl::ocl::local_mem(sizeof(SCALARTYPE) * k.local_work_size()), temp));

   viennacl::linalg::opencl::kernels::spai<NumericT>::init(opencl_ctx);

   viennacl::ocl::kernel & qr_kernel = opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<NumericT>::program_name(), "block_qr");


   qr_kernel.local_work_size(0, local_c_n);

   qr_kernel.global_work_size(0, local_c_n*256);

   viennacl::ocl::enqueue(qr_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle1(), g_bv_vcl.handle(),

                                   v_vcl.handle(), g_A_I_J_vcl.handle2(),

                                   g_bv_vcl.handle1(), v_vcl.handle1(), g_is_update_vcl,

                                   viennacl::ocl::local_mem(static_cast<unsigned int>(sizeof(NumericT)*(local_r_n*local_c_n))),

                                   static_cast<cl_uint>(g_I.size())));


 }

 }

 }

 }

 }

 #endif

viennacl::linalg::detail::spai::householder_vector
void householder_vector(MatrixT const &A, unsigned int j, VectorT &v, NumericT &b)
Computation of Householder vector, householder reflection c.f. Gene H. Golub, Charles F...
Definition: qr.hpp:236

viennacl::linalg::opencl::kernels::spai
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
Definition: spai.hpp:587

viennacl::linalg::detail::spai::block_matrix
Represents contigious matrices on GPU.
Definition: block_matrix.hpp:49

viennacl::linalg::detail::spai::print_continious_vector
void print_continious_vector(VectorT &con_v, std::vector< cl_uint > &block_ind, std::vector< std::vector< unsigned int > > const &g_J)
Definition: qr.hpp:101

viennacl::linalg::detail::spai::block_matrix::handle
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements.
Definition: block_matrix.hpp:56

viennacl::linalg::detail::spai::block_vector::handle1
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices.
Definition: block_vector.hpp:58

viennacl::ocl::kernel
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58

matrix.hpp
Implementation of the dense matrix class.

viennacl::ocl::kernel::local_work_size
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742

spai.hpp
OpenCL kernel file for sparse approximate inverse operations.

viennacl::ocl::context
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55

viennacl::linalg::detail::spai::compute_blocks_size
void compute_blocks_size(std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
Definition: qr.hpp:129

viennacl::linalg::detail::spai::get_size
void get_size(std::vector< std::vector< SizeT > > const &inds, SizeT &size)
Computes size of particular container of index set.
Definition: qr.hpp:151

viennacl::linalg::detail::spai::dot_prod
void dot_prod(MatrixT const &A, unsigned int beg_ind, NumericT &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
Definition: qr.hpp:182

viennacl::linalg::detail::spai::write_to_block
void write_to_block(VectorT &con_A_I_J, unsigned int start_ind, std::vector< unsigned int > const &I, std::vector< unsigned int > const &J, MatrixT &m)
Definition: qr.hpp:72

viennacl::linalg::detail::spai::copy_vector
void copy_vector(MatrixT const &A, VectorT &v, unsigned int beg_ind)
Copying part of matrix column.
Definition: qr.hpp:218

NumericT
float NumericT
Definition: bisect.cpp:40

viennacl::context
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39

viennacl::linalg::detail::spai::single_qr
void single_qr(MatrixT &R, VectorT &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224.
Definition: qr.hpp:311

viennacl::linalg::detail::spai::apply_q_trans_vec
void apply_q_trans_vec(MatrixT const &R, VectorT const &b_v, VectorT &y)
Recovery Q from matrix R and vector of betas b_v.
Definition: qr.hpp:377

block_matrix.hpp
Implementation of a bunch of (small) matrices on GPU. Experimental.

viennacl::traits::size
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239

viennacl::linalg::detail::spai::Print
void Print(std::ostream &ostr, InputIteratorT it_begin, InputIteratorT it_end)
Definition: qr.hpp:63

viennacl::ocl::local_mem
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33

viennacl::linalg::detail::spai::store_householder_vector
void store_householder_vector(MatrixT &A, unsigned int ind, VectorT &v)
Storage of vector v in column(A, ind), starting from ind-1 index of a column.
Definition: qr.hpp:295

viennacl::ocl::context::get_kernel
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605

viennacl::linalg::detail::spai::init_start_inds
void init_start_inds(std::vector< std::vector< SizeT > > const &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set.
Definition: qr.hpp:165

block_vector.hpp
Implementation of a bunch of vectors on GPU. Experimental.

viennacl::linalg::detail::spai::custom_inner_prod
void custom_inner_prod(MatrixT const &A, VectorT const &v, unsigned int col_ind, unsigned int start_ind, NumericT &res)
Dot prod of particular matrix column with arbitrary vector: A(:, col_ind)
Definition: qr.hpp:200

viennacl::linalg::detail::spai::block_matrix::handle1
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions.
Definition: block_matrix.hpp:59

viennacl::linalg::detail::spai::block_vector::handle
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements.
Definition: block_vector.hpp:55

viennacl::vcl_size_t
std::size_t vcl_size_t
Definition: forwards.h:75

backend.hpp
Implementations of the OpenCL backend, where all contexts are stored in.

viennacl::linalg::detail::spai::block_matrix::handle2
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix.
Definition: block_matrix.hpp:62

viennacl::linalg::detail::spai::get_max_block_size
void get_max_block_size(std::vector< std::vector< SizeT > > const &inds, SizeT &max_size)
Getting max size of rows/columns from container of index set.
Definition: qr.hpp:338

viennacl::ocl::enqueue
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50

vector.hpp
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...

viennacl::linalg::detail::spai::custom_dot_prod
void custom_dot_prod(MatrixT const &A, VectorT const &v, unsigned int ind, NumericT &res)
Dot_prod(column(A, ind), v) starting from index ind+1.
Definition: qr.hpp:355

viennacl::copy
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150

viennacl::linalg::detail::spai::block_vector
Represents a contiguous vector on the GPU to represent a concatentation of small vectors.
Definition: block_vector.hpp:48

viennacl::linalg::opencl::kernels::spai::init
static void init(viennacl::ocl::context &ctx)
Definition: spai.hpp:594

viennacl::ocl::kernel::global_work_size
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751

viennacl::linalg::detail::spai::apply_q_trans_mat
void apply_q_trans_mat(MatrixT const &R, VectorT const &b_v, MatrixT &A)
Multiplication of Q'*A, where Q is in implicit for lower part of R and vector of betas - b_v...
Definition: qr.hpp:404

viennacl::linalg::detail::spai::block_qr
void block_qr(std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224 performed on GPU.
Definition: qr.hpp:428

viennacl::column
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
Definition: matrix.hpp:918

viennacl::ocl::handle< cl_mem >

viennacl::linalg::detail::spai::print_continious_matrix
void print_continious_matrix(VectorT &con_A_I_J, std::vector< cl_uint > &blocks_ind, std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J)
Definition: qr.hpp:85

viennacl::linalg::detail::spai::apply_householder_reflection
void apply_householder_reflection(MatrixT &A, unsigned int iter_cnt, VectorT &v, NumericT b)
Inplace application of Householder vector to a matrix A.
Definition: qr.hpp:271

viennacl::ocl::context::create_memory
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.
Definition: context.hpp:216