ViennaCL - The Vienna Computing Library  1.4.2
viennacl/linalg/amg.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_AMG_HPP_
00002 #define VIENNACL_LINALG_AMG_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2013, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015                
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00027 #include <boost/numeric/ublas/matrix.hpp>
00028 #include <boost/numeric/ublas/lu.hpp>
00029 #include <boost/numeric/ublas/operation.hpp>
00030 #include <boost/numeric/ublas/vector_proxy.hpp>
00031 #include <boost/numeric/ublas/matrix_proxy.hpp>
00032 #include <boost/numeric/ublas/vector.hpp>
00033 #include <boost/numeric/ublas/triangular.hpp> 
00034 #include <vector>
00035 #include <cmath>
00036 #include "viennacl/forwards.h"
00037 #include "viennacl/tools/tools.hpp"
00038 #include "viennacl/linalg/prod.hpp"
00039 #include "viennacl/linalg/direct_solve.hpp"
00040 
00041 #include "viennacl/linalg/detail/amg/amg_base.hpp"
00042 #include "viennacl/linalg/detail/amg/amg_coarse.hpp"
00043 #include "viennacl/linalg/detail/amg/amg_interpol.hpp"
00044 
00045 #include <map>
00046 
00047 #ifdef VIENNACL_WITH_OPENMP
00048  #include <omp.h>
00049 #endif
00050 
00051 #include "viennacl/linalg/detail/amg/amg_debug.hpp"
00052 
00053 #define VIENNACL_AMG_COARSE_LIMIT 50
00054 #define VIENNACL_AMG_MAX_LEVELS 100
00055 
00056 namespace viennacl
00057 {
00058   namespace linalg
00059   {    
00060     typedef detail::amg::amg_tag          amg_tag;
00061     
00062     
00063     
00071     template <typename InternalType1, typename InternalType2>
00072     void amg_setup(InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag)
00073     {
00074       typedef typename InternalType1::value_type SparseMatrixType;
00075       typedef typename InternalType2::value_type PointVectorType;     
00076       typedef typename SparseMatrixType::value_type ScalarType;   
00077       typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00078       typedef typename SparseMatrixType::iterator2 InternalColIterator;
00079       
00080       unsigned int i, iterations, c_points, f_points;
00081       detail::amg::amg_slicing<InternalType1,InternalType2> Slicing;
00082       
00083       // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process.
00084       iterations = tag.get_coarselevels();
00085       if (iterations == 0)
00086         iterations = VIENNACL_AMG_MAX_LEVELS;
00087              
00088       // For parallel coarsenings build data structures (number of threads set automatically).
00089       if (tag.get_coarse() == VIENNACL_AMG_COARSE_RS0 || tag.get_coarse() == VIENNACL_AMG_COARSE_RS3)
00090         Slicing.init(iterations);
00091       
00092       for (i=0; i<iterations; ++i)
00093       {  
00094         // Initialize Pointvector on level i and construct points.
00095         Pointvector[i] = PointVectorType(A[i].size1());
00096         Pointvector[i].init_points();
00097         
00098         // Construct C and F points on coarse level (i is fine level, i+1 coarse level).
00099         detail::amg::amg_coarse (i, A, Pointvector, Slicing, tag);
00100 
00101         // Calculate number of C and F points on level i.
00102         c_points = Pointvector[i].get_cpoints();
00103         f_points = Pointvector[i].get_fpoints();
00104 
00105         #if defined (VIENNACL_AMG_DEBUG) //or defined(VIENNACL_AMG_DEBUGBENCH) 
00106         std::cout << "Level " << i << ": ";
00107         std::cout << "No of C points = " << c_points << ", ";
00108         std::cout << "No of F points = " << f_points << std::endl;
00109         #endif
00110         
00111         // Stop routine when the maximal coarse level is found (no C or F point). Coarsest level is level i.
00112         if (c_points == 0 || f_points == 0)
00113           break;
00114           
00115         // Construct interpolation matrix for level i.
00116         detail::amg::amg_interpol (i, A, P, Pointvector, tag);
00117         
00118         // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = trans(P).
00119         detail::amg::amg_galerkin_prod(A[i], P[i], A[i+1]);
00120         
00121         // Test triple matrix product. Very slow for large matrix sizes (ublas).
00122         // test_triplematprod(A[i],P[i],A[i+1]);
00123         
00124         Pointvector[i].delete_points();
00125         
00126         #ifdef VIENNACL_AMG_DEBUG
00127         std::cout << "Coarse Grid Operator Matrix:" << std::endl;
00128         printmatrix (A[i+1]);
00129         #endif  
00130         
00131         // If Limit of coarse points is reached then stop. Coarsest level is level i+1.
00132         if (tag.get_coarselevels() == 0 && c_points <= VIENNACL_AMG_COARSE_LIMIT)
00133         {
00134           tag.set_coarselevels(i+1);
00135           return;
00136         }
00137       }
00138       tag.set_coarselevels(i);  
00139     }
00140     
00149     template <typename MatrixType, typename InternalType1, typename InternalType2>
00150     void amg_init(MatrixType const & mat, InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag)
00151     {
00152       typedef typename MatrixType::value_type ScalarType;
00153       typedef typename InternalType1::value_type SparseMatrixType;
00154       
00155       if (tag.get_coarselevels() > 0)
00156       {
00157         A.resize(tag.get_coarselevels()+1);
00158         P.resize(tag.get_coarselevels());
00159         Pointvector.resize(tag.get_coarselevels());
00160       }
00161       else
00162       {
00163         A.resize(VIENNACL_AMG_MAX_LEVELS+1);
00164         P.resize(VIENNACL_AMG_MAX_LEVELS);
00165         Pointvector.resize(VIENNACL_AMG_MAX_LEVELS);
00166       }
00167       
00168       // Insert operator matrix as operator for finest level.
00169       SparseMatrixType A0 (mat);
00170       A.insert_element (0, A0);  
00171     }
00172     
00182     template <typename InternalType1, typename InternalType2>
00183     void amg_transform_cpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag)
00184     {  
00185       typedef typename InternalType1::value_type MatrixType;
00186       
00187       // Resize internal data structures to actual size.
00188       A.resize(tag.get_coarselevels()+1);
00189       P.resize(tag.get_coarselevels());
00190       R.resize(tag.get_coarselevels());
00191       
00192       // Transform into matrix type.
00193       for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
00194       {
00195         A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
00196         A[i] = A_setup[i];
00197       }
00198       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00199       {
00200         P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
00201         P[i] = P_setup[i];
00202       }
00203       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00204       {
00205         R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
00206         P_setup[i].set_trans(true);
00207         R[i] = P_setup[i];
00208         P_setup[i].set_trans(false);
00209       }
00210     }
00211     
00221     template <typename InternalType1, typename InternalType2>
00222     void amg_transform_gpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag)
00223     {
00224       typedef typename InternalType1::value_type MatrixType;
00225       typedef typename InternalType2::value_type::value_type ScalarType;
00226       
00227       // Resize internal data structures to actual size.
00228       A.resize(tag.get_coarselevels()+1);
00229       P.resize(tag.get_coarselevels());
00230       R.resize(tag.get_coarselevels());
00231       
00232       // Copy to GPU using the internal sparse matrix structure: std::vector<std::map>.
00233       for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
00234       {
00235         //A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
00236         viennacl::copy(*(A_setup[i].get_internal_pointer()),A[i]);
00237       }
00238       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00239       {
00240         //P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
00241         viennacl::copy(*(P_setup[i].get_internal_pointer()),P[i]);
00242         //viennacl::copy((boost::numeric::ublas::compressed_matrix<ScalarType>)P_setup[i],P[i]);
00243       }
00244       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00245       {
00246         //R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
00247         P_setup[i].set_trans(true);
00248         viennacl::copy(*(P_setup[i].get_internal_pointer()),R[i]);
00249         P_setup[i].set_trans(false);
00250       }
00251     }
00252     
00261     template <typename InternalVectorType, typename SparseMatrixType>
00262     void amg_setup_apply (InternalVectorType & result, InternalVectorType & rhs, InternalVectorType & residual, SparseMatrixType const & A, amg_tag const & tag)
00263     {        
00264       typedef typename InternalVectorType::value_type VectorType;
00265       
00266       result.resize(tag.get_coarselevels()+1);
00267       rhs.resize(tag.get_coarselevels()+1);
00268       residual.resize(tag.get_coarselevels());
00269             
00270       for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level)
00271       {
00272         result[level] = VectorType(A[level].size1());
00273         result[level].clear();
00274         rhs[level] = VectorType(A[level].size1());
00275         rhs[level].clear();
00276       }
00277       for (unsigned int level=0; level < tag.get_coarselevels(); ++level)
00278       {
00279         residual[level] = VectorType(A[level].size1());
00280         residual[level].clear();
00281       }
00282     }
00290     template <typename ScalarType, typename SparseMatrixType>
00291     void amg_lu(boost::numeric::ublas::compressed_matrix<ScalarType> & op, boost::numeric::ublas::permutation_matrix<> & Permutation, SparseMatrixType const & A)
00292     {
00293       typedef typename SparseMatrixType::const_iterator1 ConstRowIterator;
00294       typedef typename SparseMatrixType::const_iterator2 ConstColIterator;
00295     
00296       // Copy to operator matrix. Needed 
00297       op.resize(A.size1(),A.size2(),false);
00298       for (ConstRowIterator row_iter = A.begin1(); row_iter != A.end1(); ++row_iter)
00299         for (ConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00300           op (col_iter.index1(), col_iter.index2()) = *col_iter;
00301       
00302       // Permutation matrix has to be reinitialized with actual size. Do not clear() or resize()!
00303       Permutation = boost::numeric::ublas::permutation_matrix<> (op.size1());
00304       boost::numeric::ublas::lu_factorize(op,Permutation);
00305     }
00306     
00309     template <typename MatrixType>
00310     class amg_precond
00311     {
00312       typedef typename MatrixType::value_type ScalarType;
00313       typedef boost::numeric::ublas::vector<ScalarType> VectorType;
00314       typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType;
00315       typedef detail::amg::amg_pointvector PointVectorType;
00316       
00317       typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator;
00318       typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator;
00319       typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00320       typedef typename SparseMatrixType::iterator2 InternalColIterator;
00321       
00322       boost::numeric::ublas::vector <SparseMatrixType> A_setup;
00323       boost::numeric::ublas::vector <SparseMatrixType> P_setup;
00324       boost::numeric::ublas::vector <MatrixType> A;
00325       boost::numeric::ublas::vector <MatrixType> P;
00326       boost::numeric::ublas::vector <MatrixType> R;
00327       boost::numeric::ublas::vector <PointVectorType> Pointvector;
00328        
00329       mutable boost::numeric::ublas::compressed_matrix<ScalarType> op;
00330       mutable boost::numeric::ublas::permutation_matrix<> Permutation;  
00331       
00332       mutable boost::numeric::ublas::vector <VectorType> result;
00333       mutable boost::numeric::ublas::vector <VectorType> rhs;
00334       mutable boost::numeric::ublas::vector <VectorType> residual;
00335       
00336       mutable bool done_init_apply;
00337           
00338       amg_tag tag_;
00339     public:
00340     
00341       amg_precond(): Permutation(0) {}
00347       amg_precond(MatrixType const & mat, amg_tag const & tag): Permutation(0)
00348       {
00349         tag_ = tag;
00350         // Initialize data structures.
00351         amg_init (mat,A_setup,P_setup,Pointvector,tag_);
00352         
00353         done_init_apply = false;
00354       }
00355       
00358       void setup()
00359       {
00360         // Start setup phase.
00361         amg_setup(A_setup,P_setup,Pointvector,tag_);
00362         // Transform to CPU-Matrixtype for precondition phase.
00363         amg_transform_cpu(A,P,R,A_setup,P_setup,tag_);
00364         
00365         done_init_apply = false;
00366       }
00367       
00372       void init_apply() const
00373       {
00374         // Setup precondition phase (Data structures).
00375         amg_setup_apply(result,rhs,residual,A_setup,tag_);
00376         // Do LU factorization for direct solve.
00377         amg_lu(op,Permutation,A_setup[tag_.get_coarselevels()]);
00378         
00379         done_init_apply = true;
00380       }
00381       
00387       template <typename VectorType>
00388       ScalarType calc_complexity(VectorType & avgstencil)
00389       {
00390         avgstencil = VectorType (tag_.get_coarselevels()+1);
00391         unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
00392           
00393         for (unsigned int level=0; level < tag_.get_coarselevels()+1; ++level)
00394         {
00395           level_coefficients = 0;
00396           for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter)
00397           { 
00398             for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00399             {
00400               if (level == 0)
00401                 systemmat_nonzero++;
00402               nonzero++;
00403               level_coefficients++;
00404             }
00405           }
00406           avgstencil[level] = level_coefficients/static_cast<ScalarType>(A_setup[level].size1());
00407         }
00408         return nonzero/static_cast<ScalarType>(systemmat_nonzero);
00409       }
00410 
00415       template <typename VectorType>
00416       void apply(VectorType & vec) const
00417       {   
00418         // Build data structures and do lu factorization before first iteration step.
00419         if (!done_init_apply)
00420           init_apply();
00421         
00422         int level;
00423         
00424         // Precondition operation (Yang, p.3)
00425         rhs[0] = vec;
00426         for (level=0; level <static_cast<int>(tag_.get_coarselevels()); level++)
00427         {    
00428           result[level].clear();
00429           
00430           // Apply Smoother presmooth_ times.
00431           smooth_jacobi (level, tag_.get_presmooth(), result[level], rhs[level]);    
00432           
00433           #ifdef VIENNACL_AMG_DEBUG
00434           std::cout << "After presmooth:" << std::endl;
00435           printvector(result[level]);
00436           #endif
00437 
00438           // Compute residual.
00439           residual[level] = rhs[level] - boost::numeric::ublas::prod (A[level],result[level]);
00440           
00441           #ifdef VIENNACL_AMG_DEBUG          
00442           std::cout << "Residual:" << std::endl;
00443           printvector(residual[level]);
00444           #endif
00445           
00446           // Restrict to coarse level. Restricted residual is RHS of coarse level.
00447           rhs[level+1] = boost::numeric::ublas::prod (R[level],residual[level]);
00448           
00449           #ifdef VIENNACL_AMG_DEBUG
00450           std::cout << "Restricted Residual: " << std::endl;
00451           printvector(rhs[level+1]);
00452           #endif
00453         }
00454           
00455         // On highest level use direct solve to solve equation.
00456         result[level] = rhs[level];
00457         boost::numeric::ublas::lu_substitute(op,Permutation,result[level]);
00458 
00459         #ifdef VIENNACL_AMG_DEBUG
00460         std::cout << "After direct solve: " << std::endl;
00461         printvector (result[level]);
00462         #endif
00463           
00464         for (level=tag_.get_coarselevels()-1; level >= 0; level--)
00465         {       
00466           #ifdef VIENNACL_AMG_DEBUG
00467           std::cout << "Coarse Error: " << std::endl;
00468           printvector(result[level+1]);
00469           #endif
00470           
00471           // Interpolate error to fine level. Correct solution by adding error.
00472           result[level] += boost::numeric::ublas::prod (P[level], result[level+1]);
00473               
00474           #ifdef VIENNACL_AMG_DEBUG
00475           std::cout << "Corrected Result: " << std::endl;
00476           printvector (result[level]);
00477           #endif
00478           
00479           // Apply Smoother postsmooth_ times.
00480           smooth_jacobi (level, tag_.get_postsmooth(), result[level], rhs[level]);
00481           
00482           #ifdef VIENNACL_AMG_DEBUG
00483           std::cout << "After postsmooth: " << std::endl;
00484           printvector (result[level]);
00485           #endif
00486         }    
00487         vec = result[0];
00488       }
00489       
00496       template <typename VectorType>
00497       void smooth_jacobi(int level, int const iterations, VectorType & x, VectorType const & rhs) const
00498       {
00499         VectorType old_result (x.size());
00500         unsigned int index;
00501         ScalarType sum = 0, diag = 1;
00502         
00503         for (int i=0; i<iterations; ++i)
00504         {
00505           old_result = x;
00506           x.clear();
00507 #ifdef VIENNACL_WITH_OPENMP
00508           #pragma omp parallel for private (sum,diag) shared (rhs,x)
00509 #endif          
00510           for (index=0; index<A_setup[level].size1(); ++index)  
00511           {
00512             InternalConstRowIterator row_iter = A_setup[level].begin1();
00513             row_iter += index; 
00514             sum = 0;
00515             diag = 1;
00516             for (InternalConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00517             {
00518               if (col_iter.index1() == col_iter.index2())
00519                 diag = *col_iter;
00520               else
00521                 sum += *col_iter * old_result[col_iter.index2()];
00522             }
00523             x[index]= static_cast<ScalarType>(tag_.get_jacobiweight()) * (rhs[index] - sum) / diag + (1-static_cast<ScalarType>(tag_.get_jacobiweight())) * old_result[index];
00524           }
00525         }
00526       }
00527       
00528       amg_tag & tag() { return tag_; }
00529     };
00530     
00535     template <typename ScalarType, unsigned int MAT_ALIGNMENT>
00536     class amg_precond< compressed_matrix<ScalarType, MAT_ALIGNMENT> >
00537     {
00538       typedef viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT> MatrixType;
00539       typedef viennacl::vector<ScalarType> VectorType;
00540       typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType;
00541       typedef detail::amg::amg_pointvector PointVectorType;
00542       
00543       typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator;
00544       typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator;
00545       typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00546       typedef typename SparseMatrixType::iterator2 InternalColIterator;
00547       
00548       boost::numeric::ublas::vector <SparseMatrixType> A_setup;
00549       boost::numeric::ublas::vector <SparseMatrixType> P_setup;
00550       boost::numeric::ublas::vector <MatrixType> A;
00551       boost::numeric::ublas::vector <MatrixType> P;
00552       boost::numeric::ublas::vector <MatrixType> R;
00553       boost::numeric::ublas::vector <PointVectorType> Pointvector;
00554       
00555       mutable boost::numeric::ublas::compressed_matrix<ScalarType> op;
00556       mutable boost::numeric::ublas::permutation_matrix<> Permutation;  
00557       
00558       mutable boost::numeric::ublas::vector <VectorType> result;
00559       mutable boost::numeric::ublas::vector <VectorType> rhs;
00560       mutable boost::numeric::ublas::vector <VectorType> residual;
00561           
00562       mutable bool done_init_apply;
00563     
00564       amg_tag tag_;
00565       
00566     public:
00567       
00568       amg_precond(): Permutation(0) {}
00569       
00575       amg_precond(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & mat, amg_tag const & tag): Permutation(0)
00576       {
00577         tag_ = tag;
00578         
00579         // Copy to CPU. Internal structure of sparse matrix is used for copy operation.
00580         std::vector<std::map<unsigned int, ScalarType> > mat2 = std::vector<std::map<unsigned int, ScalarType> >(mat.size1());
00581         viennacl::copy(mat, mat2);
00582         
00583         // Initialize data structures.
00584         amg_init (mat2,A_setup,P_setup,Pointvector,tag_);
00585           
00586         done_init_apply = false;
00587       }
00588       
00591       void setup()
00592       {
00593         // Start setup phase.
00594         amg_setup(A_setup,P_setup,Pointvector, tag_);  
00595         // Transform to GPU-Matrixtype for precondition phase.
00596         amg_transform_gpu(A,P,R,A_setup,P_setup, tag_);
00597         
00598         done_init_apply = false;
00599       }
00600       
00605       void init_apply() const
00606       {
00607         // Setup precondition phase (Data structures).
00608         amg_setup_apply(result,rhs,residual,A_setup,tag_);
00609         // Do LU factorization for direct solve.
00610         amg_lu(op,Permutation,A_setup[tag_.get_coarselevels()]);
00611         
00612         done_init_apply = true;
00613       }
00614 
00620       template <typename VectorType>
00621       ScalarType calc_complexity(VectorType & avgstencil)
00622       {
00623         avgstencil = VectorType (tag_.get_coarselevels()+1);
00624         unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
00625         
00626         for (unsigned int level=0; level < tag_.get_coarselevels()+1; ++level)
00627         {
00628           level_coefficients = 0;
00629           for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter)
00630           { 
00631             for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00632             {
00633               if (level == 0)
00634                 systemmat_nonzero++;
00635               nonzero++;
00636               level_coefficients++;
00637             }
00638           }
00639           avgstencil[level] = level_coefficients/(double)A[level].size1();
00640         }
00641         return nonzero/static_cast<double>(systemmat_nonzero);
00642       }
00643 
00648       template <typename VectorType>
00649       void apply(VectorType & vec) const
00650       {
00651         if (!done_init_apply)
00652           init_apply();  
00653         
00654         int level;
00655         
00656         // Precondition operation (Yang, p.3).
00657         rhs[0] = vec;
00658         for (level=0; level <static_cast<int>(tag_.get_coarselevels()); level++)
00659         {    
00660           result[level].clear();
00661           
00662           // Apply Smoother presmooth_ times.
00663           smooth_jacobi (level, tag_.get_presmooth(), result[level], rhs[level]);
00664 
00665           #ifdef VIENNACL_AMG_DEBUG
00666           std::cout << "After presmooth: " << std::endl;
00667           printvector(result[level]);
00668           #endif
00669           
00670           // Compute residual.
00671           residual[level] = rhs[level] - viennacl::linalg::prod (A[level],result[level]);
00672           
00673           #ifdef VIENNACL_AMG_DEBUG             
00674           std::cout << "Residual: " << std::endl;
00675           printvector(residual[level]);
00676           #endif
00677           
00678           // Restrict to coarse level. Result is RHS of coarse level equation.
00679           //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]);
00680           rhs[level+1] = viennacl::linalg::prod(R[level],residual[level]);
00681           
00682           #ifdef VIENNACL_AMG_DEBUG
00683           std::cout << "Restricted Residual: " << std::endl;
00684           printvector(rhs[level+1]);
00685           #endif
00686         }
00687           
00688         // On highest level use direct solve to solve equation (on the CPU)
00689         //TODO: Use GPU direct solve!
00690         result[level] = rhs[level];
00691         boost::numeric::ublas::vector <ScalarType> result_cpu (result[level].size());
00692 
00693         copy (result[level],result_cpu);
00694         boost::numeric::ublas::lu_substitute(op,Permutation,result_cpu);
00695         copy (result_cpu, result[level]);
00696         
00697         #ifdef VIENNACL_AMG_DEBUG
00698         std::cout << "After direct solve: " << std::endl;
00699         printvector (result[level]);
00700         #endif
00701           
00702         for (level=tag_.get_coarselevels()-1; level >= 0; level--)
00703         {   
00704           #ifdef VIENNACL_AMG_DEBUG
00705           std::cout << "Coarse Error: " << std::endl;
00706           printvector(result[level+1]);
00707           #endif
00708           
00709           // Interpolate error to fine level and correct solution.
00710           result[level] += viennacl::linalg::prod(P[level],result[level+1]);
00711               
00712           #ifdef VIENNACL_AMG_DEBUG
00713           std::cout << "Corrected Result: " << std::endl;
00714           printvector (result[level]);
00715           #endif
00716           
00717           // Apply Smoother postsmooth_ times.
00718           smooth_jacobi (level, tag_.get_postsmooth(), result[level], rhs[level]);
00719           
00720           #ifdef VIENNACL_AMG_DEBUG
00721           std::cout << "After postsmooth: " << std::endl;
00722           printvector (result[level]);
00723           #endif
00724         }    
00725         vec = result[0];
00726       }
00727       
00734       template <typename VectorType>
00735       void smooth_jacobi(int level, unsigned int iterations, VectorType & x, VectorType const & rhs) const
00736       {     
00737         VectorType old_result (x.size());
00738   
00739         viennacl::linalg::kernels::compressed_matrix<ScalarType, MAT_ALIGNMENT>::init();
00740         viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::compressed_matrix<ScalarType, MAT_ALIGNMENT>::program_name(),
00741                     "jacobi");
00742   
00743         for (unsigned int i=0; i<iterations; ++i)
00744         {
00745           old_result = x;    
00746           x.clear();
00747           viennacl::ocl::enqueue(k(A[level].handle1().opencl_handle(), A[level].handle2().opencl_handle(), A[level].handle().opencl_handle(),
00748                                   static_cast<ScalarType>(tag_.get_jacobiweight()), 
00749                                   viennacl::traits::opencl_handle(old_result),
00750                                   viennacl::traits::opencl_handle(x),
00751                                   viennacl::traits::opencl_handle(rhs),
00752                                   static_cast<cl_uint>(rhs.size()))); 
00753           
00754         }
00755       }
00756       
00757       amg_tag & tag() { return tag_; }
00758     };
00759 
00760   }
00761 }
00762 
00763 
00764 
00765 #endif
00766