|
ViennaCL - The Vienna Computing Library
1.4.2
|
00001 #ifndef VIENNACL_LINALG_AMG_HPP_ 00002 #define VIENNACL_LINALG_AMG_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00027 #include <boost/numeric/ublas/matrix.hpp> 00028 #include <boost/numeric/ublas/lu.hpp> 00029 #include <boost/numeric/ublas/operation.hpp> 00030 #include <boost/numeric/ublas/vector_proxy.hpp> 00031 #include <boost/numeric/ublas/matrix_proxy.hpp> 00032 #include <boost/numeric/ublas/vector.hpp> 00033 #include <boost/numeric/ublas/triangular.hpp> 00034 #include <vector> 00035 #include <cmath> 00036 #include "viennacl/forwards.h" 00037 #include "viennacl/tools/tools.hpp" 00038 #include "viennacl/linalg/prod.hpp" 00039 #include "viennacl/linalg/direct_solve.hpp" 00040 00041 #include "viennacl/linalg/detail/amg/amg_base.hpp" 00042 #include "viennacl/linalg/detail/amg/amg_coarse.hpp" 00043 #include "viennacl/linalg/detail/amg/amg_interpol.hpp" 00044 00045 #include <map> 00046 00047 #ifdef VIENNACL_WITH_OPENMP 00048 #include <omp.h> 00049 #endif 00050 00051 #include "viennacl/linalg/detail/amg/amg_debug.hpp" 00052 00053 #define VIENNACL_AMG_COARSE_LIMIT 50 00054 #define VIENNACL_AMG_MAX_LEVELS 100 00055 00056 namespace viennacl 00057 { 00058 namespace linalg 00059 { 00060 typedef detail::amg::amg_tag amg_tag; 00061 00062 00063 00071 template <typename InternalType1, typename InternalType2> 00072 void amg_setup(InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag) 00073 { 00074 typedef typename InternalType1::value_type SparseMatrixType; 00075 typedef typename InternalType2::value_type PointVectorType; 00076 typedef typename SparseMatrixType::value_type ScalarType; 00077 typedef typename SparseMatrixType::iterator1 InternalRowIterator; 00078 typedef typename SparseMatrixType::iterator2 InternalColIterator; 00079 00080 unsigned int i, iterations, c_points, f_points; 00081 detail::amg::amg_slicing<InternalType1,InternalType2> Slicing; 00082 00083 // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process. 00084 iterations = tag.get_coarselevels(); 00085 if (iterations == 0) 00086 iterations = VIENNACL_AMG_MAX_LEVELS; 00087 00088 // For parallel coarsenings build data structures (number of threads set automatically). 00089 if (tag.get_coarse() == VIENNACL_AMG_COARSE_RS0 || tag.get_coarse() == VIENNACL_AMG_COARSE_RS3) 00090 Slicing.init(iterations); 00091 00092 for (i=0; i<iterations; ++i) 00093 { 00094 // Initialize Pointvector on level i and construct points. 00095 Pointvector[i] = PointVectorType(A[i].size1()); 00096 Pointvector[i].init_points(); 00097 00098 // Construct C and F points on coarse level (i is fine level, i+1 coarse level). 00099 detail::amg::amg_coarse (i, A, Pointvector, Slicing, tag); 00100 00101 // Calculate number of C and F points on level i. 00102 c_points = Pointvector[i].get_cpoints(); 00103 f_points = Pointvector[i].get_fpoints(); 00104 00105 #if defined (VIENNACL_AMG_DEBUG) //or defined(VIENNACL_AMG_DEBUGBENCH) 00106 std::cout << "Level " << i << ": "; 00107 std::cout << "No of C points = " << c_points << ", "; 00108 std::cout << "No of F points = " << f_points << std::endl; 00109 #endif 00110 00111 // Stop routine when the maximal coarse level is found (no C or F point). Coarsest level is level i. 00112 if (c_points == 0 || f_points == 0) 00113 break; 00114 00115 // Construct interpolation matrix for level i. 00116 detail::amg::amg_interpol (i, A, P, Pointvector, tag); 00117 00118 // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = trans(P). 00119 detail::amg::amg_galerkin_prod(A[i], P[i], A[i+1]); 00120 00121 // Test triple matrix product. Very slow for large matrix sizes (ublas). 00122 // test_triplematprod(A[i],P[i],A[i+1]); 00123 00124 Pointvector[i].delete_points(); 00125 00126 #ifdef VIENNACL_AMG_DEBUG 00127 std::cout << "Coarse Grid Operator Matrix:" << std::endl; 00128 printmatrix (A[i+1]); 00129 #endif 00130 00131 // If Limit of coarse points is reached then stop. Coarsest level is level i+1. 00132 if (tag.get_coarselevels() == 0 && c_points <= VIENNACL_AMG_COARSE_LIMIT) 00133 { 00134 tag.set_coarselevels(i+1); 00135 return; 00136 } 00137 } 00138 tag.set_coarselevels(i); 00139 } 00140 00149 template <typename MatrixType, typename InternalType1, typename InternalType2> 00150 void amg_init(MatrixType const & mat, InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag) 00151 { 00152 typedef typename MatrixType::value_type ScalarType; 00153 typedef typename InternalType1::value_type SparseMatrixType; 00154 00155 if (tag.get_coarselevels() > 0) 00156 { 00157 A.resize(tag.get_coarselevels()+1); 00158 P.resize(tag.get_coarselevels()); 00159 Pointvector.resize(tag.get_coarselevels()); 00160 } 00161 else 00162 { 00163 A.resize(VIENNACL_AMG_MAX_LEVELS+1); 00164 P.resize(VIENNACL_AMG_MAX_LEVELS); 00165 Pointvector.resize(VIENNACL_AMG_MAX_LEVELS); 00166 } 00167 00168 // Insert operator matrix as operator for finest level. 00169 SparseMatrixType A0 (mat); 00170 A.insert_element (0, A0); 00171 } 00172 00182 template <typename InternalType1, typename InternalType2> 00183 void amg_transform_cpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag) 00184 { 00185 typedef typename InternalType1::value_type MatrixType; 00186 00187 // Resize internal data structures to actual size. 00188 A.resize(tag.get_coarselevels()+1); 00189 P.resize(tag.get_coarselevels()); 00190 R.resize(tag.get_coarselevels()); 00191 00192 // Transform into matrix type. 00193 for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i) 00194 { 00195 A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false); 00196 A[i] = A_setup[i]; 00197 } 00198 for (unsigned int i=0; i<tag.get_coarselevels(); ++i) 00199 { 00200 P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false); 00201 P[i] = P_setup[i]; 00202 } 00203 for (unsigned int i=0; i<tag.get_coarselevels(); ++i) 00204 { 00205 R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false); 00206 P_setup[i].set_trans(true); 00207 R[i] = P_setup[i]; 00208 P_setup[i].set_trans(false); 00209 } 00210 } 00211 00221 template <typename InternalType1, typename InternalType2> 00222 void amg_transform_gpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag) 00223 { 00224 typedef typename InternalType1::value_type MatrixType; 00225 typedef typename InternalType2::value_type::value_type ScalarType; 00226 00227 // Resize internal data structures to actual size. 00228 A.resize(tag.get_coarselevels()+1); 00229 P.resize(tag.get_coarselevels()); 00230 R.resize(tag.get_coarselevels()); 00231 00232 // Copy to GPU using the internal sparse matrix structure: std::vector<std::map>. 00233 for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i) 00234 { 00235 //A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false); 00236 viennacl::copy(*(A_setup[i].get_internal_pointer()),A[i]); 00237 } 00238 for (unsigned int i=0; i<tag.get_coarselevels(); ++i) 00239 { 00240 //P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false); 00241 viennacl::copy(*(P_setup[i].get_internal_pointer()),P[i]); 00242 //viennacl::copy((boost::numeric::ublas::compressed_matrix<ScalarType>)P_setup[i],P[i]); 00243 } 00244 for (unsigned int i=0; i<tag.get_coarselevels(); ++i) 00245 { 00246 //R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false); 00247 P_setup[i].set_trans(true); 00248 viennacl::copy(*(P_setup[i].get_internal_pointer()),R[i]); 00249 P_setup[i].set_trans(false); 00250 } 00251 } 00252 00261 template <typename InternalVectorType, typename SparseMatrixType> 00262 void amg_setup_apply (InternalVectorType & result, InternalVectorType & rhs, InternalVectorType & residual, SparseMatrixType const & A, amg_tag const & tag) 00263 { 00264 typedef typename InternalVectorType::value_type VectorType; 00265 00266 result.resize(tag.get_coarselevels()+1); 00267 rhs.resize(tag.get_coarselevels()+1); 00268 residual.resize(tag.get_coarselevels()); 00269 00270 for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level) 00271 { 00272 result[level] = VectorType(A[level].size1()); 00273 result[level].clear(); 00274 rhs[level] = VectorType(A[level].size1()); 00275 rhs[level].clear(); 00276 } 00277 for (unsigned int level=0; level < tag.get_coarselevels(); ++level) 00278 { 00279 residual[level] = VectorType(A[level].size1()); 00280 residual[level].clear(); 00281 } 00282 } 00290 template <typename ScalarType, typename SparseMatrixType> 00291 void amg_lu(boost::numeric::ublas::compressed_matrix<ScalarType> & op, boost::numeric::ublas::permutation_matrix<> & Permutation, SparseMatrixType const & A) 00292 { 00293 typedef typename SparseMatrixType::const_iterator1 ConstRowIterator; 00294 typedef typename SparseMatrixType::const_iterator2 ConstColIterator; 00295 00296 // Copy to operator matrix. Needed 00297 op.resize(A.size1(),A.size2(),false); 00298 for (ConstRowIterator row_iter = A.begin1(); row_iter != A.end1(); ++row_iter) 00299 for (ConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) 00300 op (col_iter.index1(), col_iter.index2()) = *col_iter; 00301 00302 // Permutation matrix has to be reinitialized with actual size. Do not clear() or resize()! 00303 Permutation = boost::numeric::ublas::permutation_matrix<> (op.size1()); 00304 boost::numeric::ublas::lu_factorize(op,Permutation); 00305 } 00306 00309 template <typename MatrixType> 00310 class amg_precond 00311 { 00312 typedef typename MatrixType::value_type ScalarType; 00313 typedef boost::numeric::ublas::vector<ScalarType> VectorType; 00314 typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType; 00315 typedef detail::amg::amg_pointvector PointVectorType; 00316 00317 typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator; 00318 typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator; 00319 typedef typename SparseMatrixType::iterator1 InternalRowIterator; 00320 typedef typename SparseMatrixType::iterator2 InternalColIterator; 00321 00322 boost::numeric::ublas::vector <SparseMatrixType> A_setup; 00323 boost::numeric::ublas::vector <SparseMatrixType> P_setup; 00324 boost::numeric::ublas::vector <MatrixType> A; 00325 boost::numeric::ublas::vector <MatrixType> P; 00326 boost::numeric::ublas::vector <MatrixType> R; 00327 boost::numeric::ublas::vector <PointVectorType> Pointvector; 00328 00329 mutable boost::numeric::ublas::compressed_matrix<ScalarType> op; 00330 mutable boost::numeric::ublas::permutation_matrix<> Permutation; 00331 00332 mutable boost::numeric::ublas::vector <VectorType> result; 00333 mutable boost::numeric::ublas::vector <VectorType> rhs; 00334 mutable boost::numeric::ublas::vector <VectorType> residual; 00335 00336 mutable bool done_init_apply; 00337 00338 amg_tag tag_; 00339 public: 00340 00341 amg_precond(): Permutation(0) {} 00347 amg_precond(MatrixType const & mat, amg_tag const & tag): Permutation(0) 00348 { 00349 tag_ = tag; 00350 // Initialize data structures. 00351 amg_init (mat,A_setup,P_setup,Pointvector,tag_); 00352 00353 done_init_apply = false; 00354 } 00355 00358 void setup() 00359 { 00360 // Start setup phase. 00361 amg_setup(A_setup,P_setup,Pointvector,tag_); 00362 // Transform to CPU-Matrixtype for precondition phase. 00363 amg_transform_cpu(A,P,R,A_setup,P_setup,tag_); 00364 00365 done_init_apply = false; 00366 } 00367 00372 void init_apply() const 00373 { 00374 // Setup precondition phase (Data structures). 00375 amg_setup_apply(result,rhs,residual,A_setup,tag_); 00376 // Do LU factorization for direct solve. 00377 amg_lu(op,Permutation,A_setup[tag_.get_coarselevels()]); 00378 00379 done_init_apply = true; 00380 } 00381 00387 template <typename VectorType> 00388 ScalarType calc_complexity(VectorType & avgstencil) 00389 { 00390 avgstencil = VectorType (tag_.get_coarselevels()+1); 00391 unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0; 00392 00393 for (unsigned int level=0; level < tag_.get_coarselevels()+1; ++level) 00394 { 00395 level_coefficients = 0; 00396 for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter) 00397 { 00398 for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) 00399 { 00400 if (level == 0) 00401 systemmat_nonzero++; 00402 nonzero++; 00403 level_coefficients++; 00404 } 00405 } 00406 avgstencil[level] = level_coefficients/static_cast<ScalarType>(A_setup[level].size1()); 00407 } 00408 return nonzero/static_cast<ScalarType>(systemmat_nonzero); 00409 } 00410 00415 template <typename VectorType> 00416 void apply(VectorType & vec) const 00417 { 00418 // Build data structures and do lu factorization before first iteration step. 00419 if (!done_init_apply) 00420 init_apply(); 00421 00422 int level; 00423 00424 // Precondition operation (Yang, p.3) 00425 rhs[0] = vec; 00426 for (level=0; level <static_cast<int>(tag_.get_coarselevels()); level++) 00427 { 00428 result[level].clear(); 00429 00430 // Apply Smoother presmooth_ times. 00431 smooth_jacobi (level, tag_.get_presmooth(), result[level], rhs[level]); 00432 00433 #ifdef VIENNACL_AMG_DEBUG 00434 std::cout << "After presmooth:" << std::endl; 00435 printvector(result[level]); 00436 #endif 00437 00438 // Compute residual. 00439 residual[level] = rhs[level] - boost::numeric::ublas::prod (A[level],result[level]); 00440 00441 #ifdef VIENNACL_AMG_DEBUG 00442 std::cout << "Residual:" << std::endl; 00443 printvector(residual[level]); 00444 #endif 00445 00446 // Restrict to coarse level. Restricted residual is RHS of coarse level. 00447 rhs[level+1] = boost::numeric::ublas::prod (R[level],residual[level]); 00448 00449 #ifdef VIENNACL_AMG_DEBUG 00450 std::cout << "Restricted Residual: " << std::endl; 00451 printvector(rhs[level+1]); 00452 #endif 00453 } 00454 00455 // On highest level use direct solve to solve equation. 00456 result[level] = rhs[level]; 00457 boost::numeric::ublas::lu_substitute(op,Permutation,result[level]); 00458 00459 #ifdef VIENNACL_AMG_DEBUG 00460 std::cout << "After direct solve: " << std::endl; 00461 printvector (result[level]); 00462 #endif 00463 00464 for (level=tag_.get_coarselevels()-1; level >= 0; level--) 00465 { 00466 #ifdef VIENNACL_AMG_DEBUG 00467 std::cout << "Coarse Error: " << std::endl; 00468 printvector(result[level+1]); 00469 #endif 00470 00471 // Interpolate error to fine level. Correct solution by adding error. 00472 result[level] += boost::numeric::ublas::prod (P[level], result[level+1]); 00473 00474 #ifdef VIENNACL_AMG_DEBUG 00475 std::cout << "Corrected Result: " << std::endl; 00476 printvector (result[level]); 00477 #endif 00478 00479 // Apply Smoother postsmooth_ times. 00480 smooth_jacobi (level, tag_.get_postsmooth(), result[level], rhs[level]); 00481 00482 #ifdef VIENNACL_AMG_DEBUG 00483 std::cout << "After postsmooth: " << std::endl; 00484 printvector (result[level]); 00485 #endif 00486 } 00487 vec = result[0]; 00488 } 00489 00496 template <typename VectorType> 00497 void smooth_jacobi(int level, int const iterations, VectorType & x, VectorType const & rhs) const 00498 { 00499 VectorType old_result (x.size()); 00500 unsigned int index; 00501 ScalarType sum = 0, diag = 1; 00502 00503 for (int i=0; i<iterations; ++i) 00504 { 00505 old_result = x; 00506 x.clear(); 00507 #ifdef VIENNACL_WITH_OPENMP 00508 #pragma omp parallel for private (sum,diag) shared (rhs,x) 00509 #endif 00510 for (index=0; index<A_setup[level].size1(); ++index) 00511 { 00512 InternalConstRowIterator row_iter = A_setup[level].begin1(); 00513 row_iter += index; 00514 sum = 0; 00515 diag = 1; 00516 for (InternalConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) 00517 { 00518 if (col_iter.index1() == col_iter.index2()) 00519 diag = *col_iter; 00520 else 00521 sum += *col_iter * old_result[col_iter.index2()]; 00522 } 00523 x[index]= static_cast<ScalarType>(tag_.get_jacobiweight()) * (rhs[index] - sum) / diag + (1-static_cast<ScalarType>(tag_.get_jacobiweight())) * old_result[index]; 00524 } 00525 } 00526 } 00527 00528 amg_tag & tag() { return tag_; } 00529 }; 00530 00535 template <typename ScalarType, unsigned int MAT_ALIGNMENT> 00536 class amg_precond< compressed_matrix<ScalarType, MAT_ALIGNMENT> > 00537 { 00538 typedef viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT> MatrixType; 00539 typedef viennacl::vector<ScalarType> VectorType; 00540 typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType; 00541 typedef detail::amg::amg_pointvector PointVectorType; 00542 00543 typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator; 00544 typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator; 00545 typedef typename SparseMatrixType::iterator1 InternalRowIterator; 00546 typedef typename SparseMatrixType::iterator2 InternalColIterator; 00547 00548 boost::numeric::ublas::vector <SparseMatrixType> A_setup; 00549 boost::numeric::ublas::vector <SparseMatrixType> P_setup; 00550 boost::numeric::ublas::vector <MatrixType> A; 00551 boost::numeric::ublas::vector <MatrixType> P; 00552 boost::numeric::ublas::vector <MatrixType> R; 00553 boost::numeric::ublas::vector <PointVectorType> Pointvector; 00554 00555 mutable boost::numeric::ublas::compressed_matrix<ScalarType> op; 00556 mutable boost::numeric::ublas::permutation_matrix<> Permutation; 00557 00558 mutable boost::numeric::ublas::vector <VectorType> result; 00559 mutable boost::numeric::ublas::vector <VectorType> rhs; 00560 mutable boost::numeric::ublas::vector <VectorType> residual; 00561 00562 mutable bool done_init_apply; 00563 00564 amg_tag tag_; 00565 00566 public: 00567 00568 amg_precond(): Permutation(0) {} 00569 00575 amg_precond(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & mat, amg_tag const & tag): Permutation(0) 00576 { 00577 tag_ = tag; 00578 00579 // Copy to CPU. Internal structure of sparse matrix is used for copy operation. 00580 std::vector<std::map<unsigned int, ScalarType> > mat2 = std::vector<std::map<unsigned int, ScalarType> >(mat.size1()); 00581 viennacl::copy(mat, mat2); 00582 00583 // Initialize data structures. 00584 amg_init (mat2,A_setup,P_setup,Pointvector,tag_); 00585 00586 done_init_apply = false; 00587 } 00588 00591 void setup() 00592 { 00593 // Start setup phase. 00594 amg_setup(A_setup,P_setup,Pointvector, tag_); 00595 // Transform to GPU-Matrixtype for precondition phase. 00596 amg_transform_gpu(A,P,R,A_setup,P_setup, tag_); 00597 00598 done_init_apply = false; 00599 } 00600 00605 void init_apply() const 00606 { 00607 // Setup precondition phase (Data structures). 00608 amg_setup_apply(result,rhs,residual,A_setup,tag_); 00609 // Do LU factorization for direct solve. 00610 amg_lu(op,Permutation,A_setup[tag_.get_coarselevels()]); 00611 00612 done_init_apply = true; 00613 } 00614 00620 template <typename VectorType> 00621 ScalarType calc_complexity(VectorType & avgstencil) 00622 { 00623 avgstencil = VectorType (tag_.get_coarselevels()+1); 00624 unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0; 00625 00626 for (unsigned int level=0; level < tag_.get_coarselevels()+1; ++level) 00627 { 00628 level_coefficients = 0; 00629 for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter) 00630 { 00631 for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) 00632 { 00633 if (level == 0) 00634 systemmat_nonzero++; 00635 nonzero++; 00636 level_coefficients++; 00637 } 00638 } 00639 avgstencil[level] = level_coefficients/(double)A[level].size1(); 00640 } 00641 return nonzero/static_cast<double>(systemmat_nonzero); 00642 } 00643 00648 template <typename VectorType> 00649 void apply(VectorType & vec) const 00650 { 00651 if (!done_init_apply) 00652 init_apply(); 00653 00654 int level; 00655 00656 // Precondition operation (Yang, p.3). 00657 rhs[0] = vec; 00658 for (level=0; level <static_cast<int>(tag_.get_coarselevels()); level++) 00659 { 00660 result[level].clear(); 00661 00662 // Apply Smoother presmooth_ times. 00663 smooth_jacobi (level, tag_.get_presmooth(), result[level], rhs[level]); 00664 00665 #ifdef VIENNACL_AMG_DEBUG 00666 std::cout << "After presmooth: " << std::endl; 00667 printvector(result[level]); 00668 #endif 00669 00670 // Compute residual. 00671 residual[level] = rhs[level] - viennacl::linalg::prod (A[level],result[level]); 00672 00673 #ifdef VIENNACL_AMG_DEBUG 00674 std::cout << "Residual: " << std::endl; 00675 printvector(residual[level]); 00676 #endif 00677 00678 // Restrict to coarse level. Result is RHS of coarse level equation. 00679 //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]); 00680 rhs[level+1] = viennacl::linalg::prod(R[level],residual[level]); 00681 00682 #ifdef VIENNACL_AMG_DEBUG 00683 std::cout << "Restricted Residual: " << std::endl; 00684 printvector(rhs[level+1]); 00685 #endif 00686 } 00687 00688 // On highest level use direct solve to solve equation (on the CPU) 00689 //TODO: Use GPU direct solve! 00690 result[level] = rhs[level]; 00691 boost::numeric::ublas::vector <ScalarType> result_cpu (result[level].size()); 00692 00693 copy (result[level],result_cpu); 00694 boost::numeric::ublas::lu_substitute(op,Permutation,result_cpu); 00695 copy (result_cpu, result[level]); 00696 00697 #ifdef VIENNACL_AMG_DEBUG 00698 std::cout << "After direct solve: " << std::endl; 00699 printvector (result[level]); 00700 #endif 00701 00702 for (level=tag_.get_coarselevels()-1; level >= 0; level--) 00703 { 00704 #ifdef VIENNACL_AMG_DEBUG 00705 std::cout << "Coarse Error: " << std::endl; 00706 printvector(result[level+1]); 00707 #endif 00708 00709 // Interpolate error to fine level and correct solution. 00710 result[level] += viennacl::linalg::prod(P[level],result[level+1]); 00711 00712 #ifdef VIENNACL_AMG_DEBUG 00713 std::cout << "Corrected Result: " << std::endl; 00714 printvector (result[level]); 00715 #endif 00716 00717 // Apply Smoother postsmooth_ times. 00718 smooth_jacobi (level, tag_.get_postsmooth(), result[level], rhs[level]); 00719 00720 #ifdef VIENNACL_AMG_DEBUG 00721 std::cout << "After postsmooth: " << std::endl; 00722 printvector (result[level]); 00723 #endif 00724 } 00725 vec = result[0]; 00726 } 00727 00734 template <typename VectorType> 00735 void smooth_jacobi(int level, unsigned int iterations, VectorType & x, VectorType const & rhs) const 00736 { 00737 VectorType old_result (x.size()); 00738 00739 viennacl::linalg::kernels::compressed_matrix<ScalarType, MAT_ALIGNMENT>::init(); 00740 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::compressed_matrix<ScalarType, MAT_ALIGNMENT>::program_name(), 00741 "jacobi"); 00742 00743 for (unsigned int i=0; i<iterations; ++i) 00744 { 00745 old_result = x; 00746 x.clear(); 00747 viennacl::ocl::enqueue(k(A[level].handle1().opencl_handle(), A[level].handle2().opencl_handle(), A[level].handle().opencl_handle(), 00748 static_cast<ScalarType>(tag_.get_jacobiweight()), 00749 viennacl::traits::opencl_handle(old_result), 00750 viennacl::traits::opencl_handle(x), 00751 viennacl::traits::opencl_handle(rhs), 00752 static_cast<cl_uint>(rhs.size()))); 00753 00754 } 00755 } 00756 00757 amg_tag & tag() { return tag_; } 00758 }; 00759 00760 } 00761 } 00762 00763 00764 00765 #endif 00766
1.7.6.1