1 #ifndef VIENNACL_GMRES_HPP_
2 #define VIENNACL_GMRES_HPP_
67 void abs_tolerance(
double new_tol) {
if (new_tol >= 0) abs_tol_ = new_tol; }
76 unsigned int ret = iterations_ / krylov_dim_;
77 if (ret > 0 && (ret * krylov_dim_ == iterations_) )
83 unsigned int iters()
const {
return iters_taken_; }
85 void iters(
unsigned int i)
const { iters_taken_ = i; }
88 double error()
const {
return last_error_; }
90 void error(
double e)
const { last_error_ = e; }
95 unsigned int iterations_;
96 unsigned int krylov_dim_;
99 mutable unsigned int iters_taken_;
100 mutable double last_error_;
106 template<
typename SrcVectorT,
typename DestVectorT>
113 template<
typename NumericT,
typename DestVectorT>
118 src.
begin() +
static_cast<difference_type
>(
start + len),
119 dest.begin() +
static_cast<difference_type
>(
start));
130 template<
typename VectorT,
typename NumericT>
148 mu = std::sqrt(sigma + input_j*input_j);
150 NumericT hh_vec_0 = (input_j <= 0) ? (input_j - mu) : (-sigma / (input_j + mu));
152 beta =
NumericT(2) * hh_vec_0 * hh_vec_0 / (sigma + hh_vec_0 * hh_vec_0);
161 template<
typename VectorT,
typename NumericT>
165 x -= (beta * hT_in_x) * h;
181 template <
typename MatrixType,
typename ScalarType>
187 void *monitor_data = NULL)
194 std::vector<ScalarType> host_buffer_R(device_buffer_R.size());
202 std::vector<ScalarType> host_r_dot_vk_buffer(device_r_dot_vk_buffer.
size());
203 std::vector<ScalarType> host_values_xi_k(tag.
krylov_dim());
204 std::vector<ScalarType> host_values_eta_k_buffer(tag.
krylov_dim());
205 std::vector<ScalarType> host_update_coefficients(tag.
krylov_dim());
213 for (
unsigned int restart_count = 0; restart_count <= tag.
max_restarts(); ++restart_count)
218 if (restart_count > 0)
222 residual = rhs - residual;
241 for (k = 0; k < static_cast<vcl_size_t>(tag.
krylov_dim()); ++k)
252 device_inner_prod_buffer, device_r_dot_vk_buffer,
253 buffer_size_per_vector, k*buffer_size_per_vector);
272 device_vi_in_vk_buffer,
274 device_inner_prod_buffer, buffer_size_per_vector);
281 device_inner_prod_buffer, device_r_dot_vk_buffer,
282 buffer_size_per_vector, k*buffer_size_per_vector);
290 viennacl::fast_copy(device_r_dot_vk_buffer.begin(), device_r_dot_vk_buffer.end(), host_r_dot_vk_buffer.begin());
291 for (std::size_t i=0; i<k; ++i)
294 for (std::size_t j=0; j<buffer_size_per_vector; ++j)
295 host_values_xi_k[i] += host_r_dot_vk_buffer[i*buffer_size_per_vector + j];
301 viennacl::fast_copy(device_buffer_R.begin(), device_buffer_R.end(), host_buffer_R.begin());
307 for (std::size_t i=0; i<k; ++i)
309 if (std::fabs(host_buffer_R[i + i*k]) < tag.
tolerance() * host_buffer_R[0])
318 for (std::size_t i=0; i<k; ++i)
323 if (host_values_xi_k[i] >= rho || host_values_xi_k[i] <= -rho)
330 rho *= std::sin( std::acos(host_values_xi_k[i] / rho) );
336 host_values_eta_k_buffer = host_values_xi_k;
338 for (
int i2=static_cast<int>(k)-1; i2>-1; --i2)
341 for (
vcl_size_t j=static_cast<vcl_size_t>(i)+1; j<k; ++j)
342 host_values_eta_k_buffer[i] -= host_buffer_R[i + j*full_krylov_dim] * host_values_eta_k_buffer[j];
344 host_values_eta_k_buffer[i] /= host_buffer_R[i + i*full_krylov_dim];
352 host_update_coefficients[i] = rho_0 * host_values_eta_k_buffer[i];
354 viennacl::fast_copy(host_update_coefficients.begin(), host_update_coefficients.end(), device_values_xi_k.begin());
358 device_values_xi_k, k);
360 tag.
error( std::fabs(rho*rho_0 / norm_rhs) );
362 if (monitor && monitor(result, std::fabs(rho*rho_0 / norm_rhs), monitor_data))
370 template<
typename NumericT>
376 void *monitor_data = NULL)
383 template<
typename NumericT>
389 void *monitor_data = NULL)
397 template<
typename NumericT>
403 void *monitor_data = NULL)
411 template<
typename NumericT>
417 void *monitor_data = NULL)
424 template<
typename NumericT>
430 void *monitor_data = NULL)
449 template<
typename MatrixT,
typename VectorT,
typename PreconditionerT>
453 PreconditionerT
const & precond,
455 void *monitor_data = NULL)
461 VectorT result = rhs;
465 if (problem_size < krylov_dim)
466 krylov_dim = problem_size;
469 VectorT v_k_tilde = rhs;
470 VectorT v_k_tilde_temp = rhs;
472 std::vector< std::vector<CPU_NumericType> > R(krylov_dim, std::vector<CPU_NumericType>(tag.
krylov_dim()));
473 std::vector<CPU_NumericType> projection_rhs(krylov_dim);
475 std::vector<VectorT> householder_reflectors(krylov_dim, rhs);
476 std::vector<CPU_NumericType> betas(krylov_dim);
485 for (
unsigned int it = 0; it <= tag.
max_restarts(); ++it)
501 tag.
error(rho_0 / norm_rhs);
509 CPU_NumericType rho =
static_cast<CPU_NumericType
>(1.0);
516 for (k = 0; k < krylov_dim; ++k)
528 precond.apply(v_k_tilde);
533 v_k_tilde[k-1] = CPU_NumericType(1);
536 for (
int i = static_cast<int>(k)-1; i > -1; --i)
540 precond.apply(v_k_tilde_temp);
541 v_k_tilde = v_k_tilde_temp;
551 CPU_NumericType rho_k_k = 0;
572 projection_rhs[k] = res[k];
574 rho *= std::sin( std::acos(projection_rhs[k] / rho) );
576 if (std::fabs(rho * rho_0 / norm_rhs) < tag.
tolerance())
578 tag.
error( std::fabs(rho*rho_0 / norm_rhs) );
588 for (
int i2=static_cast<int>(k)-1; i2>-1; --i2)
592 projection_rhs[i] -= R[j][i] * projection_rhs[j];
594 projection_rhs[i] /= R[i][i];
601 res *= projection_rhs[0];
605 for (
unsigned int i = 0; i < k-1; ++i)
606 res[i] += projection_rhs[i+1];
612 for (
int i=static_cast<int>(k)-1; i>=0; --i)
621 tag.
error(std::fabs(rho*rho_0 / norm_rhs));
623 if (monitor && monitor(result, std::fabs(rho*rho_0 / norm_rhs), monitor_data))
635 template<
typename MatrixT,
typename VectorT,
typename PreconditionerT>
636 VectorT
solve(MatrixT
const & matrix, VectorT
const & rhs,
gmres_tag const & tag, PreconditionerT
const & precond)
646 template<
typename IndexT,
typename NumericT,
typename PreconditionerT>
647 std::vector<NumericT>
solve(std::vector< std::map<IndexT, NumericT> >
const & A, std::vector<NumericT>
const & rhs,
gmres_tag const & tag, PreconditionerT
const & precond)
657 std::vector<NumericT> result(vcl_result.
size());
669 template<
typename MatrixT,
typename VectorT>
677 template<
typename VectorT>
685 template<
typename MatrixT,
typename PreconditionerT>
686 VectorT
operator()(MatrixT
const & A, VectorT
const & b, PreconditionerT
const & precond)
const
691 mod_rhs = b - mod_rhs;
692 VectorT y =
detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_);
693 return init_guess_ + y;
699 template<
typename MatrixT>
700 VectorT
operator()(MatrixT
const & A, VectorT
const & b)
const
720 monitor_callback_ = monitor_fun;
721 user_data_ = user_data;
730 bool (*monitor_callback_)(VectorT
const &,
numeric_type,
void *);
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
unsigned int max_restarts() const
Returns the maximum number of GMRES restarts.
T norm_2(std::vector< T, A > const &v1)
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::result_of::cpu_value_type< VectorT >::type numeric_type
unsigned int max_iterations() const
Returns the maximum number of iterations.
Generic size and resize functionality for different vector and matrix types.
unsigned int iters() const
Return the number of solver iterations:
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
void abs_tolerance(double new_tol)
Sets the absolute tolerance.
void pipelined_gmres_gram_schmidt_stage1(vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
Computes the first reduction stage for multiple inner products , i=0..k-1.
VectorT operator()(MatrixT const &A, VectorT const &b, PreconditionerT const &precond) const
void pipelined_gmres_update_result(vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t k)
Computes x += eta_0 r + sum_{i=1}^{k-1} eta_i v_{i-1}.
void pipelined_gmres_gram_schmidt_stage2(vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
Computes the second reduction stage for multiple inner products , i=0..k-1, then updates v_k -= v_i and computes the first reduction stage for ||v_k||.
viennacl::vector< NumericT > solve_impl(viennacl::compressed_matrix< NumericT > const &A, viennacl::vector< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond, bool(*monitor)(viennacl::vector< NumericT > const &, NumericT, void *)=NULL, void *monitor_data=NULL)
Overload for the pipelined CG implementation for the ViennaCL sparse matrix types.
double abs_tolerance() const
Returns the absolute tolerance.
void iters(unsigned int i) const
Set the number of solver iterations (should only be modified by the solver)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL.
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
void error(double e) const
Sets the estimated relative error at the end of the solver run.
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
A tag for the solver GMRES. Used for supplying solver parameters and for dispatching the solve() func...
void set_initial_guess(VectorT const &x)
Specifies an initial guess for the iterative solver.
VectorT solve(MatrixT const &matrix, VectorT const &rhs, bicgstab_tag const &tag, PreconditionerT const &precond)
gmres_tag(double tol=1e-10, unsigned int max_iterations=300, unsigned int krylov_dim=20)
The constructor.
VectorT operator()(MatrixT const &A, VectorT const &b) const
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Class for representing non-strided subvectors of a bigger vector x.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
A tag class representing the use of no preconditioner.
void pipelined_gmres_prod(MatrixType const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
Performs a joint vector update operation needed for an efficient pipelined GMRES algorithm.
base_type::difference_type difference_type
gmres_tag const & tag() const
Returns the solver tag containing basic configuration such as tolerances, etc.
Sparse matrix class using the sliced ELLPACK with parameters C, .
result_of::size_type< T >::type start(T const &obj)
Extracts the underlying context from objects.
void pipelined_gmres_normalize_vk(vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
void gmres_setup_householder_vector(VectorT const &input_vec, VectorT &hh_vec, NumericT &beta, NumericT &mu, vcl_size_t j)
Computes the householder vector 'hh_vec' which rotates 'input_vec' such that all entries below the j-...
viennacl::vector< NumericT > pipelined_solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond, bool(*monitor)(viennacl::vector< NumericT > const &, NumericT, void *)=NULL, void *monitor_data=NULL)
Implementation of a pipelined stabilized Bi-conjugate gradient solver.
gmres_solver(gmres_tag const &tag)
void gmres_copy_helper(SrcVectorT const &src, DestVectorT &dest, vcl_size_t len, vcl_size_t start=0)
Generic clear functionality for different vector and matrix types.
void set_monitor(bool(*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor...
double tolerance() const
Returns the relative tolerance.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Proxy classes for vectors.
Implementations of specialized routines for the iterative solvers.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Helper meta function for retrieving the main RAM-based value type. Particularly important to obtain T...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
double error() const
Returns the estimated relative error at the end of the solver run.
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
A collection of compile time type deductions.
unsigned int krylov_dim() const
Returns the maximum dimension of the Krylov space before restart.
void gmres_householder_reflect(VectorT &x, VectorT const &h, NumericT beta)
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)