doc/host__based_2ilu__operations_8hpp_source.html

 #ifndef VIENNACL_LINALG_HOST_BASED_ILU_OPERATIONS_HPP_

 #define VIENNACL_LINALG_HOST_BASED_ILU_OPERATIONS_HPP_


 /* =========================================================================

    Copyright (c) 2010-2016, Institute for Microelectronics,

                             Institute for Analysis and Scientific Computing,

                             TU Wien.

    Portions of this software are copyright by UChicago Argonne, LLC.


                             -----------------

                   ViennaCL - The Vienna Computing Library

                             -----------------


    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at


    (A list of authors and contributors can be found in the PDF manual)


    License:         MIT (X11), see file LICENSE in the base directory

 ============================================================================= */


 #include <cmath>

 #include <algorithm>  //for std::max and std::min


 #include "viennacl/forwards.h"

 #include "viennacl/scalar.hpp"

 #include "viennacl/tools/tools.hpp"

 #include "viennacl/meta/predicate.hpp"

 #include "viennacl/meta/enable_if.hpp"

 #include "viennacl/traits/size.hpp"

 #include "viennacl/traits/start.hpp"

 #include "viennacl/linalg/host_based/common.hpp"

 #include "viennacl/linalg/vector_operations.hpp"

 #include "viennacl/traits/stride.hpp"


 // Minimum vector size for using OpenMP on vector operations:

 #ifndef VIENNACL_OPENMP_ILU_MIN_SIZE

   #define VIENNACL_OPENMP_ILU_MIN_SIZE  5000

 #endif


 namespace viennacl

 {

 namespace linalg

 {

 namespace host_based

 {


 template<typename NumericT>

 void extract_L(compressed_matrix<NumericT> const & A,

                compressed_matrix<NumericT>       & L)

 {

   // L is known to have correct dimensions


   unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.handle1());

   unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.handle2());

   NumericT     const *A_elements   = detail::extract_raw_pointer<NumericT>(A.handle());


   unsigned int       *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());


   //

   // Step 1: Count elements in L

   //

 #ifdef VIENNACL_WITH_OPENMP

     #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(A.size1()); ++row)

   {

     unsigned int col_begin = A_row_buffer[row];

     unsigned int col_end   = A_row_buffer[row+1];


     for (unsigned int j = col_begin; j < col_end; ++j)

     {

       unsigned int col = A_col_buffer[j];

       if (long(col) <= row)

         ++L_row_buffer[row];

     }

   }


   //

   // Step 2: Exclusive scan on row_buffer arrays to get correct starting indices

   //

   viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), L.size1() + 1, 0, 1);

   viennacl::linalg::exclusive_scan(wrapped_L_row_buffer);

   L.reserve(wrapped_L_row_buffer[L.size1()], false);


   unsigned int       *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());

   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.handle());


   //

   // Step 3: Write entries:

   //

 #ifdef VIENNACL_WITH_OPENMP

     #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(A.size1()); ++row)

   {

     unsigned int col_begin = A_row_buffer[row];

     unsigned int col_end   = A_row_buffer[row+1];


     unsigned int index_L = L_row_buffer[row];

     for (unsigned int j = col_begin; j < col_end; ++j)

     {

       unsigned int col = A_col_buffer[j];

       NumericT value = A_elements[j];


       if (long(col) <= row)

       {

         L_col_buffer[index_L] = col;

         L_elements[index_L]   = value;

         ++index_L;

       }

     }

   }


 } // extract_L


 template<typename NumericT>

 void icc_scale(compressed_matrix<NumericT> const & A,

                compressed_matrix<NumericT>       & L)

 {

   viennacl::vector<NumericT> D(A.size1(), viennacl::traits::context(A));


   unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.handle1());

   unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.handle2());

   NumericT     const *A_elements   = detail::extract_raw_pointer<NumericT>(A.handle());


   NumericT           *D_elements   = detail::extract_raw_pointer<NumericT>(D.handle());


   //

   // Step 1: Determine D

   //

 #ifdef VIENNACL_WITH_OPENMP

   #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(A.size1()); ++row)

   {

     unsigned int col_begin = A_row_buffer[row];

     unsigned int col_end   = A_row_buffer[row+1];


     for (unsigned int j = col_begin; j < col_end; ++j)

     {

       unsigned int col = A_col_buffer[j];

       if (row == col)

       {

         D_elements[row] = NumericT(1) / std::sqrt(std::fabs(A_elements[j]));

         break;

       }

     }

   }


   //

   // Step 2: Scale values in L:

   //

   unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());

   unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());

   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.handle());


 #ifdef VIENNACL_WITH_OPENMP

   #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(A.size1()); ++row)

   {

     unsigned int col_begin = L_row_buffer[row];

     unsigned int col_end   = L_row_buffer[row+1];


     NumericT D_row = D_elements[row];


     for (unsigned int j = col_begin; j < col_end; ++j)

       L_elements[j] *= D_row * D_elements[L_col_buffer[j]];

   }


   L.generate_row_block_information();

 }


 template<typename NumericT>

 void icc_chow_patel_sweep(compressed_matrix<NumericT> & L,

                           vector<NumericT>            & aij_L)

 {

   unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());

   unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());

   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.handle());


   NumericT           *aij_ptr   = detail::extract_raw_pointer<NumericT>(aij_L.handle());


   // temporary workspace

   NumericT *L_backup = (NumericT *)malloc(sizeof(NumericT) * L.nnz());


   // backup:

 #ifdef VIENNACL_WITH_OPENMP

     #pragma omp parallel for if (L.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long i = 0; i < static_cast<long>(L.nnz()); ++i)

     L_backup[i] = L_elements[i];


   // sweep

 #ifdef VIENNACL_WITH_OPENMP

     #pragma omp parallel for if (L.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(L.size1()); ++row)

   {

     //

     // update L:

     //

     unsigned int row_Li_start = L_row_buffer[row];

     unsigned int row_Li_end   = L_row_buffer[row + 1];


     for (unsigned int i = row_Li_start; i < row_Li_end; ++i)

     {

       unsigned int col = L_col_buffer[i];


       unsigned int row_Lj_start = L_row_buffer[col];

       unsigned int row_Lj_end   = L_row_buffer[col+1];


       // compute \sum_{k=1}^{j-1} l_ik l_jk

       unsigned int index_Lj = row_Lj_start;

       unsigned int col_Lj = L_col_buffer[index_Lj];

       NumericT s = aij_ptr[i];

       for (unsigned int index_Li = row_Li_start; index_Li < i; ++index_Li)

       {

         unsigned int col_Li = L_col_buffer[index_Li];


         // find element in row j

         while (col_Lj < col_Li)

         {

           ++index_Lj;

           col_Lj = L_col_buffer[index_Lj];

         }


         if (col_Lj == col_Li)

           s -= L_backup[index_Li] * L_backup[index_Lj];

       }


       if (row != col)

         L_elements[i] = s / L_backup[row_Lj_end - 1]; // diagonal element is last in row!

       else

         L_elements[i] = std::sqrt(s);

     }

   }


   free(L_backup);

 }


 template<typename NumericT>

 void extract_LU(compressed_matrix<NumericT> const & A,

                 compressed_matrix<NumericT>       & L,

                 compressed_matrix<NumericT>       & U)

 {

   // L and U are known to have correct dimensions


   unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.handle1());

   unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.handle2());

   NumericT     const *A_elements   = detail::extract_raw_pointer<NumericT>(A.handle());


   unsigned int       *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());

   unsigned int       *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1());


   //

   // Step 1: Count elements in L and U

   //

 #ifdef VIENNACL_WITH_OPENMP

     #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(A.size1()); ++row)

   {

     unsigned int col_begin = A_row_buffer[row];

     unsigned int col_end   = A_row_buffer[row+1];


     for (unsigned int j = col_begin; j < col_end; ++j)

     {

       unsigned int col = A_col_buffer[j];

       if (long(col) <= row)

         ++L_row_buffer[row];

       if (long(col) >= row)

         ++U_row_buffer[row];

     }

   }


   //

   // Step 2: Exclusive scan on row_buffer arrays to get correct starting indices

   //

   viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), L.size1() + 1, 0, 1);

   viennacl::linalg::exclusive_scan(wrapped_L_row_buffer);

   L.reserve(wrapped_L_row_buffer[L.size1()], false);


   viennacl::vector_base<unsigned int> wrapped_U_row_buffer(U.handle1(), U.size1() + 1, 0, 1);

   viennacl::linalg::exclusive_scan(wrapped_U_row_buffer);

   U.reserve(wrapped_U_row_buffer[U.size1()], false);


   unsigned int       *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());

   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.handle());


   unsigned int       *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2());

   NumericT           *U_elements   = detail::extract_raw_pointer<NumericT>(U.handle());


   //

   // Step 3: Write entries:

   //

 #ifdef VIENNACL_WITH_OPENMP

     #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(A.size1()); ++row)

   {

     unsigned int col_begin = A_row_buffer[row];

     unsigned int col_end   = A_row_buffer[row+1];


     unsigned int index_L = L_row_buffer[row];

     unsigned int index_U = U_row_buffer[row];

     for (unsigned int j = col_begin; j < col_end; ++j)

     {

       unsigned int col = A_col_buffer[j];

       NumericT value = A_elements[j];


       if (long(col) <= row)

       {

         L_col_buffer[index_L] = col;

         L_elements[index_L]   = value;

         ++index_L;

       }


       if (long(col) >= row)

       {

         U_col_buffer[index_U] = col;

         U_elements[index_U]   = value;

         ++index_U;

       }

     }

   }


 } // extract_LU


 template<typename NumericT>

 void ilu_scale(compressed_matrix<NumericT> const & A,

                compressed_matrix<NumericT>       & L,

                compressed_matrix<NumericT>       & U)

 {

   viennacl::vector<NumericT> D(A.size1(), viennacl::traits::context(A));


   unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.handle1());

   unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.handle2());

   NumericT     const *A_elements   = detail::extract_raw_pointer<NumericT>(A.handle());


   NumericT           *D_elements   = detail::extract_raw_pointer<NumericT>(D.handle());


   //

   // Step 1: Determine D

   //

 #ifdef VIENNACL_WITH_OPENMP

   #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(A.size1()); ++row)

   {

     unsigned int col_begin = A_row_buffer[row];

     unsigned int col_end   = A_row_buffer[row+1];


     for (unsigned int j = col_begin; j < col_end; ++j)

     {

       unsigned int col = A_col_buffer[j];

       if (row == col)

       {

         D_elements[row] = NumericT(1) / std::sqrt(std::fabs(A_elements[j]));

         break;

       }

     }

   }


   //

   // Step 2: Scale values in L:

   //

   unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());

   unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());

   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.handle());


 #ifdef VIENNACL_WITH_OPENMP

   #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(A.size1()); ++row)

   {

     unsigned int col_begin = L_row_buffer[row];

     unsigned int col_end   = L_row_buffer[row+1];


     NumericT D_row = D_elements[row];


     for (unsigned int j = col_begin; j < col_end; ++j)

       L_elements[j] *= D_row * D_elements[L_col_buffer[j]];

   }


   //

   // Step 3: Scale values in U:

   //

   unsigned int const *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1());

   unsigned int const *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2());

   NumericT           *U_elements   = detail::extract_raw_pointer<NumericT>(U.handle());


 #ifdef VIENNACL_WITH_OPENMP

   #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(A.size1()); ++row)

   {

     unsigned int col_begin = U_row_buffer[row];

     unsigned int col_end   = U_row_buffer[row+1];


     NumericT D_row = D_elements[row];


     for (unsigned int j = col_begin; j < col_end; ++j)

       U_elements[j] *= D_row * D_elements[U_col_buffer[j]];

   }


   L.generate_row_block_information();

   // Note: block information for U will be generated after transposition


 }


 template<typename NumericT>

 void ilu_transpose(compressed_matrix<NumericT> const & A,

                    compressed_matrix<NumericT>       & B)

 {

   NumericT     const * A_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());

   unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());

   unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());


   // initialize datastructures for B:

   B = compressed_matrix<NumericT>(A.size2(), A.size1(), A.nnz(), viennacl::traits::context(A));


   NumericT     * B_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(B.handle());

   unsigned int * B_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(B.handle1());

   unsigned int * B_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(B.handle2());


   // prepare uninitialized B_row_buffer:

   for (std::size_t i = 0; i < B.size1(); ++i)

     B_row_buffer[i] = 0;


   //

   // Stage 1: Compute pattern for B

   //

   for (std::size_t row = 0; row < A.size1(); ++row)

   {

     unsigned int row_start = A_row_buffer[row];

     unsigned int row_stop  = A_row_buffer[row+1];


     for (unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)

       B_row_buffer[A_col_buffer[nnz_index]] += 1;

   }


   // Bring row-start array in place using exclusive-scan:

   unsigned int offset = B_row_buffer[0];

   B_row_buffer[0] = 0;

   for (std::size_t row = 1; row < B.size1(); ++row)

   {

     unsigned int tmp = B_row_buffer[row];

     B_row_buffer[row] = offset;

     offset += tmp;

   }

   B_row_buffer[B.size1()] = offset;


   //

   // Stage 2: Fill with data

   //


   std::vector<unsigned int> B_row_offsets(B.size1()); //number of elements already written per row


   for (unsigned int row = 0; row < static_cast<unsigned int>(A.size1()); ++row)

   {

     //std::cout << "Row " << row << ": ";

     unsigned int row_start = A_row_buffer[row];

     unsigned int row_stop  = A_row_buffer[row+1];


     for (unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)

     {

       unsigned int col_in_A = A_col_buffer[nnz_index];

       unsigned int B_nnz_index = B_row_buffer[col_in_A] + B_row_offsets[col_in_A];

       B_col_buffer[B_nnz_index] = row;

       B_elements[B_nnz_index] = A_elements[nnz_index];

       ++B_row_offsets[col_in_A];

       //B_temp.at(A_col_buffer[nnz_index])[row] = A_elements[nnz_index];

     }

   }


   // Step 3: Make datastructure consistent (row blocks!)

   B.generate_row_block_information();

 }


 template<typename NumericT>

 void ilu_chow_patel_sweep(compressed_matrix<NumericT>       & L,

                           vector<NumericT>            const & aij_L,

                           compressed_matrix<NumericT>       & U_trans,

                           vector<NumericT>            const & aij_U_trans)

 {

   unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());

   unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());

   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.handle());


   NumericT     const *aij_L_ptr    = detail::extract_raw_pointer<NumericT>(aij_L.handle());


   unsigned int const *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U_trans.handle1());

   unsigned int const *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U_trans.handle2());

   NumericT           *U_elements   = detail::extract_raw_pointer<NumericT>(U_trans.handle());


   NumericT     const *aij_U_trans_ptr = detail::extract_raw_pointer<NumericT>(aij_U_trans.handle());


   // temporary workspace

   NumericT *L_backup = new NumericT[L.nnz()];

   NumericT *U_backup = new NumericT[U_trans.nnz()];


   // backup:

 #ifdef VIENNACL_WITH_OPENMP

     #pragma omp parallel for if (L.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long i = 0; i < static_cast<long>(L.nnz()); ++i)

     L_backup[i] = L_elements[i];


 #ifdef VIENNACL_WITH_OPENMP

     #pragma omp parallel for if (U_trans.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long i = 0; i < static_cast<long>(U_trans.nnz()); ++i)

     U_backup[i] = U_elements[i];


   // sweep

 #ifdef VIENNACL_WITH_OPENMP

     #pragma omp parallel for if (L.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(L.size1()); ++row)

   {

     //

     // update L:

     //

     unsigned int row_L_start = L_row_buffer[row];

     unsigned int row_L_end   = L_row_buffer[row + 1];


     for (unsigned int j = row_L_start; j < row_L_end; ++j)

     {

       unsigned int col = L_col_buffer[j];


       if (col == row)

         continue;


       unsigned int row_U_start = U_row_buffer[col];

       unsigned int row_U_end   = U_row_buffer[col + 1];


       // compute \sum_{k=1}^{j-1} l_ik u_kj

       unsigned int index_U = row_U_start;

       unsigned int col_U = (index_U < row_U_end) ? U_col_buffer[index_U] : static_cast<unsigned int>(U_trans.size2());

       NumericT sum = 0;

       for (unsigned int k = row_L_start; k < j; ++k)

       {

         unsigned int col_L = L_col_buffer[k];


         // find element in U

         while (col_U < col_L)

         {

           ++index_U;

           col_U = U_col_buffer[index_U];

         }


         if (col_U == col_L)

           sum += L_backup[k] * U_backup[index_U];

       }


       // update l_ij:

       assert(U_col_buffer[row_U_end - 1] == col && bool("Accessing U element which is not a diagonal element!"));

       L_elements[j] = (aij_L_ptr[j] - sum) / U_backup[row_U_end - 1];  // diagonal element is last entry in U

     }


     //

     // update U:

     //

     unsigned int row_U_start = U_row_buffer[row];

     unsigned int row_U_end   = U_row_buffer[row + 1];

     for (unsigned int j = row_U_start; j < row_U_end; ++j)

     {

       unsigned int col = U_col_buffer[j];


       row_L_start = L_row_buffer[col];

       row_L_end   = L_row_buffer[col + 1];


       // compute \sum_{k=1}^{j-1} l_ik u_kj

       unsigned int index_L = row_L_start;

       unsigned int col_L = (index_L < row_L_end) ? L_col_buffer[index_L] : static_cast<unsigned int>(L.size1());

       NumericT sum = 0;

       for (unsigned int k = row_U_start; k < j; ++k)

       {

         unsigned int col_U = U_col_buffer[k];


         // find element in L

         while (col_L < col_U)

         {

           ++index_L;

           col_L = L_col_buffer[index_L];

         }


         if (col_U == col_L)

           sum += L_backup[index_L] * U_backup[k];

       }


       // update u_ij:

       U_elements[j] = aij_U_trans_ptr[j] - sum;

     }

   }


   delete[] L_backup;

   delete[] U_backup;

 }


 template<typename NumericT>

 void ilu_form_neumann_matrix(compressed_matrix<NumericT> & R,

                              vector<NumericT> & diag_R)

 {

   unsigned int *R_row_buffer = detail::extract_raw_pointer<unsigned int>(R.handle1());

   unsigned int *R_col_buffer = detail::extract_raw_pointer<unsigned int>(R.handle2());

   NumericT     *R_elements   = detail::extract_raw_pointer<NumericT>(R.handle());


   NumericT     *diag_R_ptr   = detail::extract_raw_pointer<NumericT>(diag_R.handle());


 #ifdef VIENNACL_WITH_OPENMP

     #pragma omp parallel for if (R.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)

 #endif

   for (long row = 0; row < static_cast<long>(R.size1()); ++row)

   {

     unsigned int col_begin = R_row_buffer[row];

     unsigned int col_end   = R_row_buffer[row+1];


     // part 1: extract diagonal entry

     NumericT diag = 0;

     for (unsigned int j = col_begin; j < col_end; ++j)

     {

       unsigned int col = R_col_buffer[j];

       if (col == row)

       {

         diag = R_elements[j];

         R_elements[j] = 0; // (I - D^{-1}R)

         break;

       }

     }

     diag_R_ptr[row] = diag;


     assert((diag > 0 || diag < 0) && bool("Zero diagonal detected!"));


     // part2: scale

     for (unsigned int j = col_begin; j < col_end; ++j)

       R_elements[j] /= -diag;

   }


   //std::cout << "diag_R: " << diag_R << std::endl;

 }


 } //namespace host_based

 } //namespace linalg

 } //namespace viennacl


 #endif

viennacl::compressed_matrix::size2
const vcl_size_t & size2() const
Returns the number of columns.
Definition: compressed_matrix.hpp:929

viennacl::linalg::sum
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
Definition: sum.hpp:45

size.hpp
Generic size and resize functionality for different vector and matrix types.

vector_operations.hpp
Implementations of vector operations.

viennacl::compressed_matrix::size1
const vcl_size_t & size1() const
Returns the number of rows.
Definition: compressed_matrix.hpp:927

start.hpp
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...

tools.hpp
Various little tools used here and there in ViennaCL.

viennacl::linalg::host_based::icc_chow_patel_sweep
void icc_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > &aij_L)
Performs one nonlinear relaxation step in the Chow-Patel-ICC using OpenMP (cf. Algorithm 3 in paper...
Definition: ilu_operations.hpp:185

forwards.h
This file provides the forward declarations for the main types used within ViennaCL.

stride.hpp
Determines row and column increments for matrices and matrix proxies.

viennacl::linalg::host_based::extract_LU
void extract_LU(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
Definition: ilu_operations.hpp:258

viennacl::compressed_matrix::handle
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: compressed_matrix.hpp:942

viennacl::compressed_matrix::handle1
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Definition: compressed_matrix.hpp:936

viennacl::compressed_matrix::nnz
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
Definition: compressed_matrix.hpp:931

NumericT
float NumericT
Definition: bisect.cpp:40

viennacl::linalg::host_based::ilu_scale
void ilu_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
Definition: ilu_operations.hpp:349

viennacl::compressed_matrix::generate_row_block_information
void generate_row_block_information()
Builds the row block information needed for fast sparse matrix-vector multiplications.
Definition: compressed_matrix.hpp:999

viennacl::linalg::host_based::ilu_form_neumann_matrix
void ilu_form_neumann_matrix(compressed_matrix< NumericT > &R, vector< NumericT > &diag_R)
Definition: ilu_operations.hpp:626

viennacl::linalg::host_based::extract_L
void extract_L(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
Definition: ilu_operations.hpp:53

viennacl::compressed_matrix::handle2
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Definition: compressed_matrix.hpp:938

viennacl::vector_base< unsigned int >

common.hpp
Common routines for single-threaded or OpenMP-enabled execution on CPU.

viennacl::linalg::host_based::icc_scale
void icc_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
Definition: ilu_operations.hpp:124

viennacl::vector< NumericT >

viennacl::diag
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
Definition: matrix.hpp:895

viennacl::row
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910

predicate.hpp
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.

viennacl::compressed_matrix::reserve
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
Definition: compressed_matrix.hpp:794

viennacl::traits::context
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40

viennacl::linalg::host_based::ilu_transpose
void ilu_transpose(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &B)
Definition: ilu_operations.hpp:431

viennacl::compressed_matrix< NumericT >

viennacl::linalg::exclusive_scan
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
Definition: vector_operations.hpp:1240

scalar.hpp
Implementation of the ViennaCL scalar class.

viennacl::vector_base::handle
const handle_type & handle() const
Returns the memory handle.
Definition: vector_def.hpp:128

viennacl::linalg::host_based::ilu_chow_patel_sweep
void ilu_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L, compressed_matrix< NumericT > &U_trans, vector< NumericT > const &aij_U_trans)
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) ...
Definition: ilu_operations.hpp:503

enable_if.hpp
Simple enable-if variant that uses the SFINAE pattern.