ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
blas3.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2016, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
25 //disable debug mechanisms to have a fair comparison with ublas:
26 #ifndef NDEBUG
27  #define NDEBUG
28 #endif
29 
30 // System headers
31 #include <iostream>
32 
33 
34 // ublas headers
35 #include <boost/numeric/ublas/io.hpp>
36 #include <boost/numeric/ublas/triangular.hpp>
37 #include <boost/numeric/ublas/matrix_sparse.hpp>
38 #include <boost/numeric/ublas/matrix.hpp>
39 #include <boost/numeric/ublas/matrix_proxy.hpp>
40 #include <boost/numeric/ublas/lu.hpp>
41 #include <boost/numeric/ublas/io.hpp>
42 
43 
44 // Must be set if you want to use ViennaCL algorithms on ublas objects
45 #define VIENNACL_WITH_UBLAS 1
46 
47 
48 // ViennaCL headers
49 #include "viennacl/scalar.hpp"
50 #include "viennacl/vector.hpp"
51 #include "viennacl/matrix.hpp"
52 #include "viennacl/linalg/prod.hpp"
54 #include "viennacl/tools/timer.hpp"
55 
56 #define BLAS3_MATRIX_SIZE 400
57 
58 using namespace boost::numeric;
59 
60 
65 #ifndef VIENNACL_WITH_OPENCL
66  struct dummy
67  {
68  std::size_t size() const { return 1; }
69  };
70 #endif
71 
75 int main()
76 {
77  typedef float ScalarType;
78 
80  double exec_time;
81 
83 
87  ublas::matrix<ScalarType> ublas_A(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
88  ublas::matrix<ScalarType, ublas::column_major> ublas_B(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
89  ublas::matrix<ScalarType> ublas_C(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
90  ublas::matrix<ScalarType> ublas_C1(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
91 
92  for (unsigned int i = 0; i < ublas_A.size1(); ++i)
93  for (unsigned int j = 0; j < ublas_A.size2(); ++j)
94  ublas_A(i,j) = randomNumber();
95 
96  for (unsigned int i = 0; i < ublas_B.size1(); ++i)
97  for (unsigned int j = 0; j < ublas_B.size2(); ++j)
98  ublas_B(i,j) = randomNumber();
99 
103  //viennacl::ocl::set_context_device_type(0, viennacl::ocl::gpu_tag()); //uncomment this is you wish to use GPUs only
107 
113  std::cout << "--- Computing matrix-matrix product using ublas ---" << std::endl;
114  timer.start();
115  ublas_C = ublas::prod(ublas_A, ublas_B);
116  exec_time = timer.get();
117  std::cout << " - Execution time: " << exec_time << std::endl;
118 
123  std::cout << std::endl << "--- Computing matrix-matrix product on each available compute device using ViennaCL ---" << std::endl;
124 #ifdef VIENNACL_WITH_OPENCL
125  std::vector<viennacl::ocl::device> devices = viennacl::ocl::current_context().devices();
126 #else
127  dummy devices;
128 #endif
129 
130  for (std::size_t device_id=0; device_id<devices.size(); ++device_id)
131  {
132 #ifdef VIENNACL_WITH_OPENCL
133  viennacl::ocl::current_context().switch_device(devices[device_id]);
134  std::cout << " - Device Name: " << viennacl::ocl::current_device().name() << std::endl;
135 #endif
136 
140  viennacl::copy(ublas_A, vcl_A);
141  viennacl::copy(ublas_B, vcl_B);
142  vcl_C = viennacl::linalg::prod(vcl_A, vcl_B);
144  timer.start();
145  vcl_C = viennacl::linalg::prod(vcl_A, vcl_B);
147  exec_time = timer.get();
148  std::cout << " - Execution time on device (no setup time included): " << exec_time << std::endl;
149 
153  viennacl::copy(vcl_C, ublas_C1);
154 
155  std::cout << " - Checking result... ";
156  bool check_ok = true;
157  for (std::size_t i = 0; i < ublas_A.size1(); ++i)
158  {
159  for (std::size_t j = 0; j < ublas_A.size2(); ++j)
160  {
161  if ( std::fabs(ublas_C1(i,j) - ublas_C(i,j)) / ublas_C(i,j) > 1e-4 )
162  {
163  check_ok = false;
164  break;
165  }
166  }
167  if (!check_ok)
168  break;
169  }
170  if (check_ok)
171  std::cout << "[OK]" << std::endl << std::endl;
172  else
173  std::cout << "[FAILED]" << std::endl << std::endl;
174 
175  }
176 
180  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
181  return EXIT_SUCCESS;
182 }
183 
Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
Definition: timer.hpp:90
void switch_device(vcl_size_t i)
Switches the current device to the i-th device in this context.
Definition: context.hpp:119
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
viennacl::ocl::context & current_context()
Convenience function for returning the current context.
Definition: backend.hpp:213
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
A dense matrix class.
Definition: forwards.h:375
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239
Random number generator for returning uniformly distributed values in the closed interval [0...
Definition: random.hpp:44
int main()
Definition: blas3.cpp:75
std::string name() const
Device name string.
Definition: device.hpp:566
#define BLAS3_MATRIX_SIZE
Definition: blas3range.cpp:61
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.
void prod(std::vector< std::map< IndexT, NumericT > > const &stl_A, std::vector< std::map< IndexT, NumericT > > const &stl_B, std::vector< std::map< IndexT, NumericT > > &stl_C)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A small collection of sequential random number generators.
float ScalarType
Definition: fft_1d.cpp:42
double get() const
Definition: timer.hpp:104
std::vector< viennacl::ocl::device > const & devices() const
Returns a vector with all devices in this context.
Definition: context.hpp:106
Implementation of the ViennaCL scalar class.