38 #include <boost/numeric/ublas/io.hpp>
39 #include <boost/numeric/ublas/triangular.hpp>
40 #include <boost/numeric/ublas/matrix_sparse.hpp>
41 #include <boost/numeric/ublas/matrix.hpp>
42 #include <boost/numeric/ublas/matrix_proxy.hpp>
43 #include <boost/numeric/ublas/lu.hpp>
44 #include <boost/numeric/ublas/io.hpp>
48 #define VIENNACL_WITH_UBLAS 1
61 #define BLAS3_MATRIX_SIZE 1500
63 using namespace boost::numeric;
87 std::vector<ScalarType> stl_B(BLAS3_MATRIX_SIZE * BLAS3_MATRIX_SIZE);
88 std::vector<ScalarType> stl_C(BLAS3_MATRIX_SIZE * BLAS3_MATRIX_SIZE);
93 for (
unsigned int i = 0; i < ublas_A.size1(); ++i)
94 for (
unsigned int j = 0; j < ublas_A.size2(); ++j)
96 ublas_A(i,j) = randomNumber();
97 stl_A[i*ublas_A.size2() + j] = ublas_A(i,j);
100 for (
unsigned int i = 0; i < ublas_B.size1(); ++i)
101 for (
unsigned int j = 0; j < ublas_B.size2(); ++j)
103 ublas_B(i,j) = randomNumber();
104 stl_B[i + j*ublas_B.size1()] = ublas_B(i,j);
109 ublas::matrix_range< ublas::matrix<ScalarType> > ublas_A_sub(ublas_A, ublas_r1, ublas_r2);
110 ublas::matrix_range< ublas::matrix<ScalarType, ublas::column_major> > ublas_B_sub(ublas_B, ublas_r2, ublas_r1);
111 ublas::matrix_range< ublas::matrix<ScalarType> > ublas_C_sub(ublas_C, ublas_r1, ublas_r1);
137 std::cout <<
"--- Computing matrix-matrix product using ublas ---" << std::endl;
139 ublas_C_sub =
ublas::prod(ublas_A_sub, ublas_B_sub);
140 exec_time = timer.
get();
141 std::cout <<
" - Execution time: " << exec_time << std::endl;
148 std::cout << std::endl <<
"--- Computing matrix-matrix product on each available compute device using ViennaCL ---" << std::endl;
150 for (std::size_t i=0; i<devices.size(); ++i)
158 &(stl_A[0]) + stl_A.size(),
161 &(stl_B[0]) + stl_B.size(),
168 exec_time = timer.
get();
169 std::cout <<
" - Execution time on device (no setup time included): " << exec_time << std::endl;
170 std::cout <<
" - GFLOPs: " << (vcl_A.
size1() / 1000.0) * (vcl_A.
size2() / 1000.0) * (vcl_B.
size2() / 1000.0) / exec_time << std::endl;
179 for (
unsigned int i = 0; i < ublas_C1.size1(); ++i)
180 for (
unsigned int j = 0; j < ublas_C1.size2(); ++j)
181 ublas_C1(i,j) = stl_C[i * ublas_C1.size2() + j];
183 std::cout <<
" - Checking result... ";
184 bool check_ok =
true;
185 for (
unsigned int i = 0; i < ublas_A.size1(); ++i)
187 for (
unsigned int j = 0; j < ublas_A.size2(); ++j)
189 if ( fabs(ublas_C1(i,j) - ublas_C(i,j)) / ublas_C(i,j) > 1e-4 )
199 std::cout <<
"[OK]" << std::endl << std::endl;
201 std::cout <<
"[FAILED]" << std::endl << std::endl;
208 std::cout <<
"!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
void finish() const
Waits until all kernels in the queue have finished their execution.
void switch_device(vcl_size_t i)
Switches the current device to the i-th device in this context.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
viennacl::ocl::context & current_context()
Convenience function for returning the current context.
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
viennacl::ocl::command_queue & get_queue()
Convenience function for getting the default queue for the currently active device in the active cont...
size_type size2() const
Returns the number of columns.
std::string name() const
Device name string.
size_type size1() const
Returns the number of rows.
#define BLAS3_MATRIX_SIZE
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.
Proxy classes for matrices.
void prod(std::vector< std::map< IndexT, NumericT > > const &stl_A, std::vector< std::map< IndexT, NumericT > > const &stl_B, std::vector< std::map< IndexT, NumericT > > &stl_C)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A small collection of sequential random number generators.
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Class for representing non-strided submatrices of a bigger matrix A.
std::vector< viennacl::ocl::device > const & devices() const
Returns a vector with all devices in this context.
Implementation of the ViennaCL scalar class.
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)