29 #define VIENNACL_WITH_UBLAS 1
31 #include <boost/numeric/ublas/triangular.hpp>
32 #include <boost/numeric/ublas/vector.hpp>
33 #include <boost/numeric/ublas/vector_proxy.hpp>
34 #include <boost/numeric/ublas/matrix_sparse.hpp>
35 #include <boost/numeric/ublas/operation_sparse.hpp>
36 #include <boost/numeric/ublas/lu.hpp>
57 #define BENCHMARK_RUNS 10
60 inline void printOps(
double num_ops,
double exec_time)
62 std::cout <<
"GFLOPs: " << num_ops / (1000000 * exec_time * 1000) << std::endl;
66 template<
typename ScalarType>
77 boost::numeric::ublas::vector<ScalarType> ublas_vec1;
78 boost::numeric::ublas::vector<ScalarType> ublas_vec2;
80 boost::numeric::ublas::compressed_matrix<ScalarType> ublas_matrix;
83 std::cout <<
"Error reading Matrix file" << std::endl;
87 std::cout <<
"done reading matrix" << std::endl;
89 ublas_vec1 = boost::numeric::ublas::scalar_vector<ScalarType>(ublas_matrix.size1(),
ScalarType(1.0));
90 ublas_vec2 = ublas_vec1;
107 #ifndef VIENNACL_EXPERIMENTAL_DOUBLE_PRECISION_WITH_STREAM_SDK_ON_GPU
121 std::cout <<
"------- Matrix-Vector product on CPU ----------" << std::endl;
126 boost::numeric::ublas::axpy_prod(ublas_matrix, ublas_vec2, ublas_vec1,
true);
128 exec_time = timer.
get();
129 std::cout <<
"CPU time: " << exec_time << std::endl;
130 std::cout <<
"CPU ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
131 std::cout << ublas_vec1[0] << std::endl;
134 std::cout <<
"------- Matrix-Vector product with compressed_matrix ----------" << std::endl;
149 exec_time = timer.
get();
150 std::cout <<
"GPU time align1: " << exec_time << std::endl;
151 std::cout <<
"GPU align1 ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
152 std::cout << vcl_vec1[0] << std::endl;
154 std::cout <<
"Testing triangular solves: compressed_matrix" << std::endl;
159 std::cout <<
"ublas..." << std::endl;
162 std::cout <<
"Time elapsed: " << timer.
get() << std::endl;
163 std::cout <<
"ViennaCL..." << std::endl;
168 std::cout <<
"Time elapsed: " << timer.
get() << std::endl;
179 exec_time = timer.
get();
180 std::cout <<
"GPU time align4: " << exec_time << std::endl;
181 std::cout <<
"GPU align4 ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
182 std::cout << vcl_vec1[0] << std::endl;
191 exec_time = timer.
get();
192 std::cout <<
"GPU time align8: " << exec_time << std::endl;
193 std::cout <<
"GPU align8 ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
194 std::cout << vcl_vec1[0] << std::endl;
197 std::cout <<
"------- Matrix-Vector product with coordinate_matrix ----------" << std::endl;
203 for (std::size_t i=0; i<ublas_vec1.size(); ++i)
205 if ( fabs(ublas_vec1[i] - ublas_vec2[i]) /
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
207 std::cout <<
"Error at index " << i <<
": Should: " << ublas_vec1[i] <<
", Is: " << ublas_vec2[i] << std::endl;
221 exec_time = timer.
get();
222 std::cout <<
"GPU time: " << exec_time << std::endl;
223 std::cout <<
"GPU ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
224 std::cout << vcl_vec1[0] << std::endl;
227 std::cout <<
"------- Matrix-Vector product with ell_matrix ----------" << std::endl;
233 for (std::size_t i=0; i<ublas_vec1.size(); ++i)
235 if ( fabs(ublas_vec1[i] - ublas_vec2[i]) /
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
237 std::cout <<
"Error at index " << i <<
": Should: " << ublas_vec1[i] <<
", Is: " << ublas_vec2[i] << std::endl;
251 exec_time = timer.
get();
252 std::cout <<
"GPU time: " << exec_time << std::endl;
253 std::cout <<
"GPU ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
254 std::cout << vcl_vec1[0] << std::endl;
257 std::cout <<
"------- Matrix-Vector product with hyb_matrix ----------" << std::endl;
263 for (std::size_t i=0; i<ublas_vec1.size(); ++i)
265 if ( fabs(ublas_vec1[i] - ublas_vec2[i]) /
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
267 std::cout <<
"Error at index " << i <<
": Should: " << ublas_vec1[i] <<
", Is: " << ublas_vec2[i] << std::endl;
281 exec_time = timer.
get();
282 std::cout <<
"GPU time: " << exec_time << std::endl;
283 std::cout <<
"GPU ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
284 std::cout << vcl_vec1[0] << std::endl;
287 std::cout <<
"------- Matrix-Vector product with sliced_ell_matrix ----------" << std::endl;
293 for (std::size_t i=0; i<ublas_vec1.size(); ++i)
295 if ( fabs(ublas_vec1[i] - ublas_vec2[i]) /
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
297 std::cout <<
"Error at index " << i <<
": Should: " << ublas_vec1[i] <<
", Is: " << ublas_vec2[i] << std::endl;
311 exec_time = timer.
get();
312 std::cout <<
"GPU time: " << exec_time << std::endl;
313 std::cout <<
"GPU ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
314 std::cout << vcl_vec1[0] << std::endl;
322 std::cout << std::endl;
323 std::cout <<
"----------------------------------------------" << std::endl;
324 std::cout <<
" Device Info" << std::endl;
325 std::cout <<
"----------------------------------------------" << std::endl;
327 #ifdef VIENNACL_WITH_OPENCL
330 std::cout << std::endl;
331 std::cout <<
"----------------------------------------------" << std::endl;
332 std::cout <<
"----------------------------------------------" << std::endl;
333 std::cout <<
"## Benchmark :: Sparse" << std::endl;
334 std::cout <<
"----------------------------------------------" << std::endl;
335 std::cout << std::endl;
336 std::cout <<
" -------------------------------" << std::endl;
337 std::cout <<
" # benchmarking single-precision" << std::endl;
338 std::cout <<
" -------------------------------" << std::endl;
339 run_benchmark<float>();
340 #ifdef VIENNACL_WITH_OPENCL
344 std::cout << std::endl;
345 std::cout <<
" -------------------------------" << std::endl;
346 std::cout <<
" # benchmarking double-precision" << std::endl;
347 std::cout <<
" -------------------------------" << std::endl;
348 run_benchmark<double>();
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT tag)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
void inplace_solve(const matrix_base< NumericT > &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
A reader and writer for the matrix market format is implemented here.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
std::vector< std::vector< NumericT > > trans(std::vector< std::vector< NumericT > > const &A)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
T max(const T &lhs, const T &rhs)
Maximum.
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Implementation of the coordinate_matrix class.
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Implementation of the hyb_matrix class.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Sparse matrix class using the ELLPACK format for storing the nonzeros.
void printOps(double num_ops, double exec_time)
Implementations of incomplete factorization preconditioners. Convenience header file.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Implementation of the compressed_matrix class.
Implementation of the sliced_ell_matrix class.
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Implementation of the ell_matrix class.
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.
void prod(std::vector< std::map< IndexT, NumericT > > const &stl_A, std::vector< std::map< IndexT, NumericT > > const &stl_B, std::vector< std::map< IndexT, NumericT > > &stl_C)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A tag class representing a lower triangular matrix with unit diagonal.
A sparse square matrix in compressed sparse rows format.
long read_matrix_market_file(MatrixT &mat, const char *file, long index_base=1)
Reads a sparse matrix from a file (MatrixMarket format)
Implementation of the ViennaCL scalar class.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...