ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
gmres.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_GMRES_HPP_
2 #define VIENNACL_GMRES_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <cmath>
27 #include <limits>
28 #include "viennacl/forwards.h"
29 #include "viennacl/tools/tools.hpp"
31 #include "viennacl/linalg/prod.hpp"
34 #include "viennacl/traits/size.hpp"
37 
40 
41 
42 namespace viennacl
43 {
44 namespace linalg
45 {
46 
49 class gmres_tag //generalized minimum residual
50 {
51 public:
58  gmres_tag(double tol = 1e-10, unsigned int max_iterations = 300, unsigned int krylov_dim = 20)
59  : tol_(tol), abs_tol_(0), iterations_(max_iterations), krylov_dim_(krylov_dim), iters_taken_(0) {}
60 
62  double tolerance() const { return tol_; }
63 
65  double abs_tolerance() const { return abs_tol_; }
67  void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; }
68 
70  unsigned int max_iterations() const { return iterations_; }
72  unsigned int krylov_dim() const { return krylov_dim_; }
74  unsigned int max_restarts() const
75  {
76  unsigned int ret = iterations_ / krylov_dim_;
77  if (ret > 0 && (ret * krylov_dim_ == iterations_) )
78  return ret - 1;
79  return ret;
80  }
81 
83  unsigned int iters() const { return iters_taken_; }
85  void iters(unsigned int i) const { iters_taken_ = i; }
86 
88  double error() const { return last_error_; }
90  void error(double e) const { last_error_ = e; }
91 
92 private:
93  double tol_;
94  double abs_tol_;
95  unsigned int iterations_;
96  unsigned int krylov_dim_;
97 
98  //return values from solver
99  mutable unsigned int iters_taken_;
100  mutable double last_error_;
101 };
102 
103 namespace detail
104 {
105 
106  template<typename SrcVectorT, typename DestVectorT>
107  void gmres_copy_helper(SrcVectorT const & src, DestVectorT & dest, vcl_size_t len, vcl_size_t start = 0)
108  {
109  for (vcl_size_t i=0; i<len; ++i)
110  dest[start+i] = src[start+i];
111  }
112 
113  template<typename NumericT, typename DestVectorT>
114  void gmres_copy_helper(viennacl::vector<NumericT> const & src, DestVectorT & dest, vcl_size_t len, vcl_size_t start = 0)
115  {
116  typedef typename viennacl::vector<NumericT>::difference_type difference_type;
117  viennacl::copy( src.begin() + static_cast<difference_type>(start),
118  src.begin() + static_cast<difference_type>(start + len),
119  dest.begin() + static_cast<difference_type>(start));
120  }
121 
130  template<typename VectorT, typename NumericT>
131  void gmres_setup_householder_vector(VectorT const & input_vec, VectorT & hh_vec, NumericT & beta, NumericT & mu, vcl_size_t j)
132  {
133  NumericT input_j = input_vec(j);
134 
135  // copy entries from input vector to householder vector:
136  detail::gmres_copy_helper(input_vec, hh_vec, viennacl::traits::size(hh_vec) - (j+1), j+1);
137 
138  NumericT sigma = viennacl::linalg::norm_2(hh_vec);
139  sigma *= sigma;
140 
141  if (sigma <= 0)
142  {
143  beta = 0;
144  mu = input_j;
145  }
146  else
147  {
148  mu = std::sqrt(sigma + input_j*input_j);
149 
150  NumericT hh_vec_0 = (input_j <= 0) ? (input_j - mu) : (-sigma / (input_j + mu));
151 
152  beta = NumericT(2) * hh_vec_0 * hh_vec_0 / (sigma + hh_vec_0 * hh_vec_0);
153 
154  //divide hh_vec by its diagonal element hh_vec_0
155  hh_vec /= hh_vec_0;
156  hh_vec[j] = NumericT(1);
157  }
158  }
159 
160  // Apply (I - beta h h^T) to x (Householder reflection with Householder vector h)
161  template<typename VectorT, typename NumericT>
162  void gmres_householder_reflect(VectorT & x, VectorT const & h, NumericT beta)
163  {
164  NumericT hT_in_x = viennacl::linalg::inner_prod(h, x);
165  x -= (beta * hT_in_x) * h;
166  }
167 
168 
181  template <typename MatrixType, typename ScalarType>
183  viennacl::vector<ScalarType> const & rhs,
184  gmres_tag const & tag,
186  bool (*monitor)(viennacl::vector<ScalarType> const &, ScalarType, void*) = NULL,
187  void *monitor_data = NULL)
188  {
189  viennacl::vector<ScalarType> residual(rhs);
191 
192  viennacl::vector<ScalarType> device_krylov_basis(rhs.internal_size() * tag.krylov_dim(), viennacl::traits::context(rhs)); // not using viennacl::matrix here because of spurious padding in column number
194  std::vector<ScalarType> host_buffer_R(device_buffer_R.size());
195 
196  vcl_size_t buffer_size_per_vector = 128;
197  vcl_size_t num_buffer_chunks = 3;
198  viennacl::vector<ScalarType> device_inner_prod_buffer = viennacl::zero_vector<ScalarType>(num_buffer_chunks*buffer_size_per_vector, viennacl::traits::context(rhs)); // temporary buffer
199  viennacl::vector<ScalarType> device_r_dot_vk_buffer = viennacl::zero_vector<ScalarType>(buffer_size_per_vector * tag.krylov_dim(), viennacl::traits::context(rhs)); // holds result of first reduction stage for <r, v_k> on device
200  viennacl::vector<ScalarType> device_vi_in_vk_buffer = viennacl::zero_vector<ScalarType>(buffer_size_per_vector * tag.krylov_dim(), viennacl::traits::context(rhs)); // holds <v_i, v_k> for i=0..k-1 on device
201  viennacl::vector<ScalarType> device_values_xi_k = viennacl::zero_vector<ScalarType>(tag.krylov_dim(), viennacl::traits::context(rhs)); // holds values \xi_k = <r, v_k> on device
202  std::vector<ScalarType> host_r_dot_vk_buffer(device_r_dot_vk_buffer.size());
203  std::vector<ScalarType> host_values_xi_k(tag.krylov_dim());
204  std::vector<ScalarType> host_values_eta_k_buffer(tag.krylov_dim());
205  std::vector<ScalarType> host_update_coefficients(tag.krylov_dim());
206 
207  ScalarType norm_rhs = viennacl::linalg::norm_2(residual);
208  ScalarType rho_0 = norm_rhs;
209  ScalarType rho = ScalarType(1);
210 
211  tag.iters(0);
212 
213  for (unsigned int restart_count = 0; restart_count <= tag.max_restarts(); ++restart_count)
214  {
215  //
216  // prepare restart:
217  //
218  if (restart_count > 0)
219  {
220  // compute new residual without introducing a temporary for A*x:
221  residual = viennacl::linalg::prod(A, result);
222  residual = rhs - residual;
223 
224  rho_0 = viennacl::linalg::norm_2(residual);
225  }
226 
227  if (rho_0 <= ScalarType(tag.abs_tolerance())) // trivial right hand side?
228  break;
229 
230  residual /= rho_0;
231  rho = ScalarType(1);
232 
233  // check for convergence:
234  if (rho_0 / norm_rhs < tag.tolerance() || rho_0 < tag.abs_tolerance())
235  break;
236 
237  //
238  // minimize in Krylov basis:
239  //
240  vcl_size_t k = 0;
241  for (k = 0; k < static_cast<vcl_size_t>(tag.krylov_dim()); ++k)
242  {
243  if (k == 0)
244  {
245  // compute v0 = A*r and perform first reduction stage for ||v0||
246  viennacl::vector_range<viennacl::vector<ScalarType> > v0(device_krylov_basis, viennacl::range(0, rhs.size()));
247  viennacl::linalg::pipelined_gmres_prod(A, residual, v0, device_inner_prod_buffer);
248 
249  // Normalize v_1 and compute first reduction stage for <r, v_0> in device_r_dot_vk_buffer:
251  device_buffer_R, k*tag.krylov_dim() + k,
252  device_inner_prod_buffer, device_r_dot_vk_buffer,
253  buffer_size_per_vector, k*buffer_size_per_vector);
254  }
255  else
256  {
257  // compute v0 = A*r and perform first reduction stage for ||v0||
258  viennacl::vector_range<viennacl::vector<ScalarType> > vk (device_krylov_basis, viennacl::range( k *rhs.internal_size(), k *rhs.internal_size() + rhs.size()));
259  viennacl::vector_range<viennacl::vector<ScalarType> > vk_minus_1(device_krylov_basis, viennacl::range((k-1)*rhs.internal_size(), (k-1)*rhs.internal_size() + rhs.size()));
260  viennacl::linalg::pipelined_gmres_prod(A, vk_minus_1, vk, device_inner_prod_buffer);
261 
262  //
263  // Gram-Schmidt, stage 1: compute first reduction stage of <v_i, v_k>
264  //
265  viennacl::linalg::pipelined_gmres_gram_schmidt_stage1(device_krylov_basis, rhs.size(), rhs.internal_size(), k, device_vi_in_vk_buffer, buffer_size_per_vector);
266 
267  //
268  // Gram-Schmidt, stage 2: compute second reduction stage of <v_i, v_k> and use that to compute v_k -= sum_i <v_i, v_k> v_i.
269  // Store <v_i, v_k> in R-matrix and compute first reduction stage for ||v_k||
270  //
271  viennacl::linalg::pipelined_gmres_gram_schmidt_stage2(device_krylov_basis, rhs.size(), rhs.internal_size(), k,
272  device_vi_in_vk_buffer,
273  device_buffer_R, tag.krylov_dim(),
274  device_inner_prod_buffer, buffer_size_per_vector);
275 
276  //
277  // Normalize v_k and compute first reduction stage for <r, v_k> in device_r_dot_vk_buffer:
278  //
280  device_buffer_R, k*tag.krylov_dim() + k,
281  device_inner_prod_buffer, device_r_dot_vk_buffer,
282  buffer_size_per_vector, k*buffer_size_per_vector);
283  }
284  }
285 
286  //
287  // Run reduction to obtain the values \xi_k = <r, v_k>.
288  // Note that unlike Algorithm 2.1 in Walker: "A Simpler GMRES", we do not update the residual
289  //
290  viennacl::fast_copy(device_r_dot_vk_buffer.begin(), device_r_dot_vk_buffer.end(), host_r_dot_vk_buffer.begin());
291  for (std::size_t i=0; i<k; ++i)
292  {
293  host_values_xi_k[i] = ScalarType(0);
294  for (std::size_t j=0; j<buffer_size_per_vector; ++j)
295  host_values_xi_k[i] += host_r_dot_vk_buffer[i*buffer_size_per_vector + j];
296  }
297 
298  //
299  // Bring values in R back to host:
300  //
301  viennacl::fast_copy(device_buffer_R.begin(), device_buffer_R.end(), host_buffer_R.begin());
302 
303  //
304  // Check for premature convergence: If the diagonal element drops too far below the first norm, we're done and restrict the Krylov size accordingly.
305  //
306  vcl_size_t full_krylov_dim = k; //needed for proper access to R
307  for (std::size_t i=0; i<k; ++i)
308  {
309  if (std::fabs(host_buffer_R[i + i*k]) < tag.tolerance() * host_buffer_R[0])
310  {
311  k = i;
312  break;
313  }
314  }
315 
316 
317  // Compute error estimator:
318  for (std::size_t i=0; i<k; ++i)
319  {
320  tag.iters( tag.iters() + 1 ); //increase iteration counter
321 
322  // check for accumulation of round-off errors for poorly conditioned systems
323  if (host_values_xi_k[i] >= rho || host_values_xi_k[i] <= -rho)
324  {
325  k = i;
326  break; // restrict Krylov space at this point. No gain from using additional basis vectors, since orthogonality is lost.
327  }
328 
329  // update error estimator
330  rho *= std::sin( std::acos(host_values_xi_k[i] / rho) );
331  }
332 
333  //
334  // Solve minimization problem:
335  //
336  host_values_eta_k_buffer = host_values_xi_k;
337 
338  for (int i2=static_cast<int>(k)-1; i2>-1; --i2)
339  {
340  vcl_size_t i = static_cast<vcl_size_t>(i2);
341  for (vcl_size_t j=static_cast<vcl_size_t>(i)+1; j<k; ++j)
342  host_values_eta_k_buffer[i] -= host_buffer_R[i + j*full_krylov_dim] * host_values_eta_k_buffer[j];
343 
344  host_values_eta_k_buffer[i] /= host_buffer_R[i + i*full_krylov_dim];
345  }
346 
347  //
348  // Update x += rho * z with z = \eta_0 * residual + sum_{i=0}^{k-1} \eta_{i+1} v_i
349  // Note that we have not updated the residual yet, hence this slightly modified as compared to the form given in Algorithm 2.1 in Walker: "A Simpler GMRES"
350  //
351  for (vcl_size_t i=0; i<k; ++i)
352  host_update_coefficients[i] = rho_0 * host_values_eta_k_buffer[i];
353 
354  viennacl::fast_copy(host_update_coefficients.begin(), host_update_coefficients.end(), device_values_xi_k.begin()); //reuse device_values_xi_k_buffer here for simplicity
355 
357  device_krylov_basis, rhs.size(), rhs.internal_size(),
358  device_values_xi_k, k);
359 
360  tag.error( std::fabs(rho*rho_0 / norm_rhs) );
361 
362  if (monitor && monitor(result, std::fabs(rho*rho_0 / norm_rhs), monitor_data))
363  break;
364  }
365 
366  return result;
367  }
368 
370  template<typename NumericT>
372  viennacl::vector<NumericT> const & rhs,
373  gmres_tag const & tag,
375  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
376  void *monitor_data = NULL)
377  {
378  return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
379  }
380 
381 
383  template<typename NumericT>
385  viennacl::vector<NumericT> const & rhs,
386  gmres_tag const & tag,
388  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
389  void *monitor_data = NULL)
390  {
391  return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
392  }
393 
394 
395 
397  template<typename NumericT>
399  viennacl::vector<NumericT> const & rhs,
400  gmres_tag const & tag,
402  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
403  void *monitor_data = NULL)
404  {
405  return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
406  }
407 
408 
409 
411  template<typename NumericT>
413  viennacl::vector<NumericT> const & rhs,
414  gmres_tag const & tag,
416  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
417  void *monitor_data = NULL)
418  {
419  return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
420  }
421 
422 
424  template<typename NumericT>
426  viennacl::vector<NumericT> const & rhs,
427  gmres_tag const & tag,
429  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
430  void *monitor_data = NULL)
431  {
432  return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
433  }
434 
435 
449  template<typename MatrixT, typename VectorT, typename PreconditionerT>
450  VectorT solve_impl(MatrixT const & matrix,
451  VectorT const & rhs,
452  gmres_tag const & tag,
453  PreconditionerT const & precond,
454  bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL,
455  void *monitor_data = NULL)
456  {
457  typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
458  typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
459 
460  unsigned int problem_size = static_cast<unsigned int>(viennacl::traits::size(rhs));
461  VectorT result = rhs;
462  viennacl::traits::clear(result);
463 
464  vcl_size_t krylov_dim = static_cast<vcl_size_t>(tag.krylov_dim());
465  if (problem_size < krylov_dim)
466  krylov_dim = problem_size; //A Krylov space larger than the matrix would lead to seg-faults (mathematically, error is certain to be zero already)
467 
468  VectorT res = rhs;
469  VectorT v_k_tilde = rhs;
470  VectorT v_k_tilde_temp = rhs;
471 
472  std::vector< std::vector<CPU_NumericType> > R(krylov_dim, std::vector<CPU_NumericType>(tag.krylov_dim()));
473  std::vector<CPU_NumericType> projection_rhs(krylov_dim);
474 
475  std::vector<VectorT> householder_reflectors(krylov_dim, rhs);
476  std::vector<CPU_NumericType> betas(krylov_dim);
477 
478  CPU_NumericType norm_rhs = viennacl::linalg::norm_2(rhs);
479 
480  if (norm_rhs <= tag.abs_tolerance()) //solution is zero if RHS norm is zero
481  return result;
482 
483  tag.iters(0);
484 
485  for (unsigned int it = 0; it <= tag.max_restarts(); ++it)
486  {
487  //
488  // (Re-)Initialize residual: r = b - A*x (without temporary for the result of A*x)
489  //
490  res = viennacl::linalg::prod(matrix, result); //initial guess zero
491  res = rhs - res;
492  precond.apply(res);
493 
494  CPU_NumericType rho_0 = viennacl::linalg::norm_2(res);
495 
496  //
497  // Check for premature convergence
498  //
499  if (rho_0 / norm_rhs < tag.tolerance() || rho_0 < tag.abs_tolerance()) // norm_rhs is known to be nonzero here
500  {
501  tag.error(rho_0 / norm_rhs);
502  return result;
503  }
504 
505  //
506  // Normalize residual and set 'rho' to 1 as requested in 'A Simpler GMRES' by Walker and Zhou.
507  //
508  res /= rho_0;
509  CPU_NumericType rho = static_cast<CPU_NumericType>(1.0);
510 
511 
512  //
513  // Iterate up until maximal Krylove space dimension is reached:
514  //
515  vcl_size_t k = 0;
516  for (k = 0; k < krylov_dim; ++k)
517  {
518  tag.iters( tag.iters() + 1 ); //increase iteration counter
519 
520  // prepare storage:
522  viennacl::traits::clear(householder_reflectors[k]);
523 
524  //compute v_k = A * v_{k-1} via Householder matrices
525  if (k == 0)
526  {
527  v_k_tilde = viennacl::linalg::prod(matrix, res);
528  precond.apply(v_k_tilde);
529  }
530  else
531  {
532  viennacl::traits::clear(v_k_tilde);
533  v_k_tilde[k-1] = CPU_NumericType(1);
534 
535  //Householder rotations, part 1: Compute P_1 * P_2 * ... * P_{k-1} * e_{k-1}
536  for (int i = static_cast<int>(k)-1; i > -1; --i)
537  detail::gmres_householder_reflect(v_k_tilde, householder_reflectors[vcl_size_t(i)], betas[vcl_size_t(i)]);
538 
539  v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde);
540  precond.apply(v_k_tilde_temp);
541  v_k_tilde = v_k_tilde_temp;
542 
543  //Householder rotations, part 2: Compute P_{k-1} * ... * P_{1} * v_k_tilde
544  for (vcl_size_t i = 0; i < k; ++i)
545  detail::gmres_householder_reflect(v_k_tilde, householder_reflectors[i], betas[i]);
546  }
547 
548  //
549  // Compute Householder reflection for v_k_tilde such that all entries below k-th entry are zero:
550  //
551  CPU_NumericType rho_k_k = 0;
552  detail::gmres_setup_householder_vector(v_k_tilde, householder_reflectors[k], betas[k], rho_k_k, k);
553 
554  //
555  // copy first k entries from v_k_tilde to R[k] in order to fill k-th column with result of
556  // P_k * v_k_tilde = (v[0], ... , v[k-1], norm(v), 0, 0, ...) =: (rho_{1,k}, rho_{2,k}, ..., rho_{k,k}, 0, ..., 0);
557  //
558  detail::gmres_copy_helper(v_k_tilde, R[k], k);
559  R[k][k] = rho_k_k;
560 
561  //
562  // Update residual: r = P_k r
563  // Set zeta_k = r[k] including machine precision considerations: mathematically we have |r[k]| <= rho
564  // Set rho *= sin(acos(r[k] / rho))
565  //
566  detail::gmres_householder_reflect(res, householder_reflectors[k], betas[k]);
567 
568  if (res[k] > rho) //machine precision reached
569  res[k] = rho;
570  if (res[k] < -rho) //machine precision reached
571  res[k] = -rho;
572  projection_rhs[k] = res[k];
573 
574  rho *= std::sin( std::acos(projection_rhs[k] / rho) );
575 
576  if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance()) // Residual is sufficiently reduced, stop here
577  {
578  tag.error( std::fabs(rho*rho_0 / norm_rhs) );
579  ++k;
580  break;
581  }
582  } // for k
583 
584  //
585  // Triangular solver stage:
586  //
587 
588  for (int i2=static_cast<int>(k)-1; i2>-1; --i2)
589  {
590  vcl_size_t i = static_cast<vcl_size_t>(i2);
591  for (vcl_size_t j=i+1; j<k; ++j)
592  projection_rhs[i] -= R[j][i] * projection_rhs[j]; //R is transposed
593 
594  projection_rhs[i] /= R[i][i];
595  }
596 
597  //
598  // Note: 'projection_rhs' now holds the solution (eta_1, ..., eta_k)
599  //
600 
601  res *= projection_rhs[0];
602 
603  if (k > 0)
604  {
605  for (unsigned int i = 0; i < k-1; ++i)
606  res[i] += projection_rhs[i+1];
607  }
608 
609  //
610  // Form z inplace in 'res' by applying P_1 * ... * P_{k}
611  //
612  for (int i=static_cast<int>(k)-1; i>=0; --i)
613  detail::gmres_householder_reflect(res, householder_reflectors[vcl_size_t(i)], betas[vcl_size_t(i)]);
614 
615  res *= rho_0;
616  result += res; // x += rho_0 * z in the paper
617 
618  //
619  // Check for convergence:
620  //
621  tag.error(std::fabs(rho*rho_0 / norm_rhs));
622 
623  if (monitor && monitor(result, std::fabs(rho*rho_0 / norm_rhs), monitor_data))
624  break;
625 
626  if ( tag.error() < tag.tolerance() )
627  return result;
628  }
629 
630  return result;
631  }
632 
633 }
634 
635 template<typename MatrixT, typename VectorT, typename PreconditionerT>
636 VectorT solve(MatrixT const & matrix, VectorT const & rhs, gmres_tag const & tag, PreconditionerT const & precond)
637 {
638  return detail::solve_impl(matrix, rhs, tag, precond);
639 }
640 
646 template<typename IndexT, typename NumericT, typename PreconditionerT>
647 std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & A, std::vector<NumericT> const & rhs, gmres_tag const & tag, PreconditionerT const & precond)
648 {
650  viennacl::copy(A, vcl_A);
651 
652  viennacl::vector<NumericT> vcl_rhs(rhs.size());
653  viennacl::copy(rhs, vcl_rhs);
654 
655  viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond);
656 
657  std::vector<NumericT> result(vcl_result.size());
658  viennacl::copy(vcl_result, result);
659  return result;
660 }
661 
669 template<typename MatrixT, typename VectorT>
670 VectorT solve(MatrixT const & A, VectorT const & rhs, gmres_tag const & tag)
671 {
672  return solve(A, rhs, tag, no_precond());
673 }
674 
675 
676 
677 template<typename VectorT>
679 {
680 public:
682 
683  gmres_solver(gmres_tag const & tag) : tag_(tag), monitor_callback_(NULL), user_data_(NULL) {}
684 
685  template<typename MatrixT, typename PreconditionerT>
686  VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT const & precond) const
687  {
688  if (viennacl::traits::size(init_guess_) > 0) // take initial guess into account
689  {
690  VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_);
691  mod_rhs = b - mod_rhs;
692  VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_);
693  return init_guess_ + y;
694  }
695  return detail::solve_impl(A, b, tag_, precond, monitor_callback_, user_data_);
696  }
697 
698 
699  template<typename MatrixT>
700  VectorT operator()(MatrixT const & A, VectorT const & b) const
701  {
703  }
704 
709  void set_initial_guess(VectorT const & x) { init_guess_ = x; }
710 
718  void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
719  {
720  monitor_callback_ = monitor_fun;
721  user_data_ = user_data;
722  }
723 
725  gmres_tag const & tag() const { return tag_; }
726 
727 private:
728  gmres_tag tag_;
729  VectorT init_guess_;
730  bool (*monitor_callback_)(VectorT const &, numeric_type, void *);
731  void *user_data_;
732 };
733 
734 
735 }
736 }
737 
738 #endif
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:406
unsigned int max_restarts() const
Returns the maximum number of GMRES restarts.
Definition: gmres.hpp:74
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:96
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::result_of::cpu_value_type< VectorT >::type numeric_type
Definition: gmres.hpp:681
unsigned int max_iterations() const
Returns the maximum number of iterations.
Definition: gmres.hpp:70
Generic size and resize functionality for different vector and matrix types.
unsigned int iters() const
Return the number of solver iterations:
Definition: gmres.hpp:83
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
void abs_tolerance(double new_tol)
Sets the absolute tolerance.
Definition: gmres.hpp:67
Various little tools used here and there in ViennaCL.
void pipelined_gmres_gram_schmidt_stage1(vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
Computes the first reduction stage for multiple inner products , i=0..k-1.
VectorT operator()(MatrixT const &A, VectorT const &b, PreconditionerT const &precond) const
Definition: gmres.hpp:686
void pipelined_gmres_update_result(vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t k)
Computes x += eta_0 r + sum_{i=1}^{k-1} eta_i v_{i-1}.
void pipelined_gmres_gram_schmidt_stage2(vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
Computes the second reduction stage for multiple inner products , i=0..k-1, then updates v_k -= v_i and computes the first reduction stage for ||v_k||.
viennacl::vector< NumericT > solve_impl(viennacl::compressed_matrix< NumericT > const &A, viennacl::vector< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond, bool(*monitor)(viennacl::vector< NumericT > const &, NumericT, void *)=NULL, void *monitor_data=NULL)
Overload for the pipelined CG implementation for the ViennaCL sparse matrix types.
Definition: bicgstab.hpp:219
double abs_tolerance() const
Returns the absolute tolerance.
Definition: gmres.hpp:65
void iters(unsigned int i) const
Set the number of solver iterations (should only be modified by the solver)
Definition: gmres.hpp:85
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:43
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:375
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:100
void error(double e) const
Sets the estimated relative error at the end of the solver run.
Definition: gmres.hpp:90
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
A tag for the solver GMRES. Used for supplying solver parameters and for dispatching the solve() func...
Definition: gmres.hpp:49
void set_initial_guess(VectorT const &x)
Specifies an initial guess for the iterative solver.
Definition: gmres.hpp:709
VectorT solve(MatrixT const &matrix, VectorT const &rhs, bicgstab_tag const &tag, PreconditionerT const &precond)
Definition: bicgstab.hpp:496
gmres_tag(double tol=1e-10, unsigned int max_iterations=300, unsigned int krylov_dim=20)
The constructor.
Definition: gmres.hpp:58
VectorT operator()(MatrixT const &A, VectorT const &b) const
Definition: gmres.hpp:700
float NumericT
Definition: bisect.cpp:40
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:434
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:841
A tag class representing the use of no preconditioner.
Definition: forwards.h:873
void pipelined_gmres_prod(MatrixType const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
Performs a joint vector update operation needed for an efficient pipelined GMRES algorithm.
base_type::difference_type difference_type
Definition: vector.hpp:957
vcl_size_t size() const
Definition: vector_def.hpp:47
gmres_tag const & tag() const
Returns the solver tag containing basic configuration such as tolerances, etc.
Definition: gmres.hpp:725
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:403
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
Extracts the underlying context from objects.
void pipelined_gmres_normalize_vk(vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
void gmres_setup_householder_vector(VectorT const &input_vec, VectorT &hh_vec, NumericT &beta, NumericT &mu, vcl_size_t j)
Computes the householder vector 'hh_vec' which rotates 'input_vec' such that all entries below the j-...
Definition: gmres.hpp:131
viennacl::vector< NumericT > pipelined_solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond, bool(*monitor)(viennacl::vector< NumericT > const &, NumericT, void *)=NULL, void *monitor_data=NULL)
Implementation of a pipelined stabilized Bi-conjugate gradient solver.
Definition: bicgstab.hpp:98
gmres_solver(gmres_tag const &tag)
Definition: gmres.hpp:683
void gmres_copy_helper(SrcVectorT const &src, DestVectorT &dest, vcl_size_t len, vcl_size_t start=0)
Definition: gmres.hpp:107
std::size_t vcl_size_t
Definition: forwards.h:75
Generic clear functionality for different vector and matrix types.
void set_monitor(bool(*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor...
Definition: gmres.hpp:718
double tolerance() const
Returns the relative tolerance.
Definition: gmres.hpp:62
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:271
Proxy classes for vectors.
Implementations of specialized routines for the iterative solvers.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
Helper meta function for retrieving the main RAM-based value type. Particularly important to obtain T...
Definition: result_of.hpp:269
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:424
float ScalarType
Definition: fft_1d.cpp:42
double error() const
Returns the estimated relative error at the end of the solver run.
Definition: gmres.hpp:88
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Definition: vector_def.hpp:120
A collection of compile time type deductions.
unsigned int krylov_dim() const
Returns the maximum dimension of the Krylov space before restart.
Definition: gmres.hpp:72
void gmres_householder_reflect(VectorT &x, VectorT const &h, NumericT beta)
Definition: gmres.hpp:162
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)