ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
libviennacl_blas2.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2016, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
23 // include necessary system headers
24 #include <iostream>
25 #include <vector>
26 
27 // Some helper functions for this tutorial:
28 #include "viennacl.hpp"
29 
30 #include "viennacl/vector.hpp"
31 
32 template<typename ScalarType>
34 {
35  if (s1 > s2 || s1 < s2)
36  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
37  return ScalarType(0);
38 }
39 
40 template<typename ScalarType, typename ViennaCLVectorType>
41 ScalarType diff(std::vector<ScalarType> const & v1, ViennaCLVectorType const & vcl_vec)
42 {
43  std::vector<ScalarType> v2_cpu(vcl_vec.size());
45  viennacl::copy(vcl_vec, v2_cpu);
46 
47  ScalarType inf_norm = 0;
48  for (unsigned int i=0;i<v1.size(); ++i)
49  {
50  if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
51  v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
52  else
53  v2_cpu[i] = 0.0;
54 
55  if (v2_cpu[i] > inf_norm)
56  inf_norm = v2_cpu[i];
57  }
58 
59  return inf_norm;
60 }
61 
62 template<typename T, typename U, typename EpsilonT>
63 void check(T const & t, U const & u, EpsilonT eps)
64 {
65  EpsilonT rel_error = std::fabs(static_cast<EpsilonT>(diff(t,u)));
66  if (rel_error > eps)
67  {
68  std::cerr << "Relative error: " << rel_error << std::endl;
69  std::cerr << "Aborting!" << std::endl;
70  exit(EXIT_FAILURE);
71  }
72  std::cout << "SUCCESS ";
73 }
74 
75 int main()
76 {
77  std::size_t size1 = 13; // at least 7
78  std::size_t size2 = 11; // at least 7
79  float eps_float = 1e-5f;
80  double eps_double = 1e-12;
81 
82  ViennaCLBackend my_backend;
83  ViennaCLBackendCreate(&my_backend);
84 
85  std::vector<float> ref_float_x(size1); for (std::size_t i=0; i<size1; ++i) ref_float_x[i] = static_cast<float>(i);
86  std::vector<float> ref_float_y(size2); for (std::size_t i=0; i<size2; ++i) ref_float_y[i] = static_cast<float>(size2 - i);
87  std::vector<float> ref_float_A(size1*size2); for (std::size_t i=0; i<size1*size2; ++i) ref_float_A[i] = static_cast<float>(3*i);
88  std::vector<float> ref_float_B(size1*size2); for (std::size_t i=0; i<size1*size2; ++i) ref_float_B[i] = static_cast<float>(2*i);
89 
90  std::vector<double> ref_double_x(size1, 1.0); for (std::size_t i=0; i<size1; ++i) ref_double_x[i] = static_cast<double>(i);
91  std::vector<double> ref_double_y(size2, 2.0); for (std::size_t i=0; i<size2; ++i) ref_double_y[i] = static_cast<double>(size2 - i);
92  std::vector<double> ref_double_A(size1*size2, 3.0); for (std::size_t i=0; i<size1*size2; ++i) ref_double_A[i] = static_cast<double>(3*i);
93  std::vector<double> ref_double_B(size1*size2, 4.0); for (std::size_t i=0; i<size1*size2; ++i) ref_double_B[i] = static_cast<double>(2*i);
94 
95  // Host setup
96  viennacl::vector<float> host_float_x = viennacl::scalar_vector<float>(size1, 1.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1; ++i) host_float_x[i] = float(i);
97  viennacl::vector<float> host_float_y = viennacl::scalar_vector<float>(size2, 2.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size2; ++i) host_float_y[i] = float(size2 - i);
98  viennacl::vector<float> host_float_A = viennacl::scalar_vector<float>(size1*size2, 3.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) host_float_A[i] = float(3*i);
99  viennacl::vector<float> host_float_B = viennacl::scalar_vector<float>(size1*size2, 4.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) host_float_B[i] = float(2*i);
100 
101  viennacl::vector<double> host_double_x = viennacl::scalar_vector<double>(size1, 1.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1; ++i) host_double_x[i] = double(i);
102  viennacl::vector<double> host_double_y = viennacl::scalar_vector<double>(size2, 2.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size2; ++i) host_double_y[i] = double(size2 - i);
103  viennacl::vector<double> host_double_A = viennacl::scalar_vector<double>(size1*size2, 3.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) host_double_A[i] = double(3*i);
104  viennacl::vector<double> host_double_B = viennacl::scalar_vector<double>(size1*size2, 4.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) host_double_B[i] = double(2*i);
105 
106  // CUDA setup
107 #ifdef VIENNACL_WITH_CUDA
108  viennacl::vector<float> cuda_float_x = viennacl::scalar_vector<float>(size1, 1.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1; ++i) cuda_float_x[i] = float(i);
109  viennacl::vector<float> cuda_float_y = viennacl::scalar_vector<float>(size2, 2.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size2; ++i) cuda_float_y[i] = float(size2 - i);
110  viennacl::vector<float> cuda_float_A = viennacl::scalar_vector<float>(size1*size2, 3.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) cuda_float_A[i] = float(3*i);
111  viennacl::vector<float> cuda_float_B = viennacl::scalar_vector<float>(size1*size2, 4.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) cuda_float_B[i] = float(2*i);
112 
113  viennacl::vector<double> cuda_double_x = viennacl::scalar_vector<double>(size1, 1.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1; ++i) cuda_double_x[i] = double(i);
114  viennacl::vector<double> cuda_double_y = viennacl::scalar_vector<double>(size2, 2.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size2; ++i) cuda_double_y[i] = double(size2 - i);
115  viennacl::vector<double> cuda_double_A = viennacl::scalar_vector<double>(size1*size2, 3.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) cuda_double_A[i] = double(3*i);
116  viennacl::vector<double> cuda_double_B = viennacl::scalar_vector<double>(size1*size2, 4.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) cuda_double_B[i] = double(2*i);
117 #endif
118 
119  // OpenCL setup
120 #ifdef VIENNACL_WITH_OPENCL
121  ViennaCLInt context_id = 0;
122  viennacl::vector<float> opencl_float_x = viennacl::scalar_vector<float>(size1, 1.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i<size1; ++i) opencl_float_x[i] = float(i);
123  viennacl::vector<float> opencl_float_y = viennacl::scalar_vector<float>(size2, 2.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i<size2; ++i) opencl_float_y[i] = float(size2 - i);
124  viennacl::vector<float> opencl_float_A = viennacl::scalar_vector<float>(size1*size2, 3.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i<size1*size2; ++i) opencl_float_A[i] = float(3*i);
125  viennacl::vector<float> opencl_float_B = viennacl::scalar_vector<float>(size1*size2, 4.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i<size1*size2; ++i) opencl_float_B[i] = float(2*i);
126 
127  viennacl::vector<double> *opencl_double_x = NULL;
128  viennacl::vector<double> *opencl_double_y = NULL;
129  viennacl::vector<double> *opencl_double_A = NULL;
130  viennacl::vector<double> *opencl_double_B = NULL;
132  {
133  opencl_double_x = new viennacl::vector<double>(viennacl::scalar_vector<double>(size1, 1.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i<size1; ++i) (*opencl_double_x)[i] = double(i);
134  opencl_double_y = new viennacl::vector<double>(viennacl::scalar_vector<double>(size2, 2.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i<size2; ++i) (*opencl_double_y)[i] = double(size2 - i);
135  opencl_double_A = new viennacl::vector<double>(viennacl::scalar_vector<double>(size1*size2, 3.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i<size1*size2; ++i) (*opencl_double_A)[i] = double(3*i);
136  opencl_double_B = new viennacl::vector<double>(viennacl::scalar_vector<double>(size1*size2, 4.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i<size1*size2; ++i) (*opencl_double_B)[i] = double(2*i);
137  }
138 
139  ViennaCLBackendSetOpenCLContextID(my_backend, context_id);
140 #endif
141 
142  // consistency checks:
143  check(ref_float_x, host_float_x, eps_float);
144  check(ref_float_y, host_float_y, eps_float);
145  check(ref_float_A, host_float_A, eps_float);
146  check(ref_float_B, host_float_B, eps_float);
147  check(ref_double_x, host_double_x, eps_double);
148  check(ref_double_y, host_double_y, eps_double);
149  check(ref_double_A, host_double_A, eps_double);
150  check(ref_double_B, host_double_B, eps_double);
151 #ifdef VIENNACL_WITH_CUDA
152  check(ref_float_x, cuda_float_x, eps_float);
153  check(ref_float_y, cuda_float_y, eps_float);
154  check(ref_float_A, cuda_float_A, eps_float);
155  check(ref_float_B, cuda_float_B, eps_float);
156  check(ref_double_x, cuda_double_x, eps_double);
157  check(ref_double_y, cuda_double_y, eps_double);
158  check(ref_double_A, cuda_double_A, eps_double);
159  check(ref_double_B, cuda_double_B, eps_double);
160 #endif
161 #ifdef VIENNACL_WITH_OPENCL
162  check(ref_float_x, opencl_float_x, eps_float);
163  check(ref_float_y, opencl_float_y, eps_float);
164  check(ref_float_A, opencl_float_A, eps_float);
165  check(ref_float_B, opencl_float_B, eps_float);
166  if ( viennacl::ocl::current_device().double_support() )
167  {
168  check(ref_double_x, *opencl_double_x, eps_double);
169  check(ref_double_y, *opencl_double_y, eps_double);
170  check(ref_double_A, *opencl_double_A, eps_double);
171  check(ref_double_B, *opencl_double_B, eps_double);
172  }
173 #endif
174 
175  // GEMV
176  std::cout << std::endl << "-- Testing xGEMV...";
177  for (std::size_t i=0; i<size1/3; ++i)
178  {
179  ref_float_x[i * 2 + 1] *= 0.1234f;
180  ref_double_x[i * 2 + 1] *= 0.1234;
181  for (std::size_t j=0; j<size2/4; ++j)
182  {
183  ref_float_x[i * 2 + 1] += 3.1415f * ref_float_A[(2*i+2) * size2 + 3 * j + 1] * ref_float_y[j * 3 + 1];
184  ref_double_x[i * 2 + 1] += 3.1415 * ref_double_A[(2*i+2) * size2 + 3 * j + 1] * ref_double_y[j * 3 + 1];
185  }
186  }
187 
188  std::cout << std::endl << "Host: ";
189  ViennaCLHostSgemv(my_backend,
191  ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415f, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_A), 2, 1, 2, 3, ViennaCLInt(size2),
192  viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 1, 3,
193  0.1234f,
194  viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 1, 2);
195  check(ref_float_x, host_float_x, eps_float);
196  ViennaCLHostDgemv(my_backend,
198  ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_A), 2, 1, 2, 3, ViennaCLInt(size2),
199  viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 1, 3,
200  0.1234,
201  viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 1, 2);
202  check(ref_double_x, host_double_x, eps_double);
203 
204 
205 #ifdef VIENNACL_WITH_CUDA
206  std::cout << std::endl << "CUDA: ";
207  ViennaCLCUDASgemv(my_backend,
209  ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415f, viennacl::cuda_arg(cuda_float_A), 2, 1, 2, 3, size2,
210  viennacl::cuda_arg(cuda_float_y), 1, 3,
211  0.1234f,
212  viennacl::cuda_arg(cuda_float_x), 1, 2);
213  check(ref_float_x, cuda_float_x, eps_float);
214  ViennaCLCUDADgemv(my_backend,
216  ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::cuda_arg(cuda_double_A), 2, 1, 2, 3, size2,
217  viennacl::cuda_arg(cuda_double_y), 1, 3,
218  0.1234,
219  viennacl::cuda_arg(cuda_double_x), 1, 2);
220  check(ref_double_x, cuda_double_x, eps_double);
221 #endif
222 
223 #ifdef VIENNACL_WITH_OPENCL
224  std::cout << std::endl << "OpenCL: ";
225  ViennaCLOpenCLSgemv(my_backend,
227  ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415f, viennacl::traits::opencl_handle(opencl_float_A), 2, 1, 2, 3, ViennaCLInt(size2),
228  viennacl::traits::opencl_handle(opencl_float_y), 1, 3,
229  0.1234f,
230  viennacl::traits::opencl_handle(opencl_float_x), 1, 2);
231  check(ref_float_x, opencl_float_x, eps_float);
232  if ( viennacl::ocl::current_device().double_support() )
233  {
234  ViennaCLOpenCLDgemv(my_backend,
236  ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::traits::opencl_handle(*opencl_double_A), 2, 1, 2, 3, ViennaCLInt(size2),
237  viennacl::traits::opencl_handle(*opencl_double_y), 1, 3,
238  0.1234,
239  viennacl::traits::opencl_handle(*opencl_double_x), 1, 2);
240  check(ref_double_x, *opencl_double_x, eps_double);
241  }
242 #endif
243 
244 
245 
246 #ifdef VIENNACL_WITH_OPENCL
247  delete opencl_double_x;
248  delete opencl_double_y;
249  delete opencl_double_A;
250  delete opencl_double_B;
251 #endif
252 
253  ViennaCLBackendDestroy(&my_backend);
254 
255  //
256  // That's it.
257  //
258  std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl;
259 
260  return EXIT_SUCCESS;
261 }
262 
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendCreate(ViennaCLBackend *backend)
Definition: backend.cpp:25
Generic backend for CUDA, OpenCL, host-based stuff.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, double beta, cl_mem y, ViennaCLInt offy, ViennaCLInt incy)
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendSetOpenCLContextID(ViennaCLBackend backend, ViennaCLInt context_id)
Definition: backend.cpp:32
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, ViennaCLInt incx, float beta, float *y, ViennaCLInt offy, ViennaCLInt incy)
Definition: blas2_host.cpp:36
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, ViennaCLInt incx, float beta, float *y, ViennaCLInt offy, ViennaCLInt incy)
viennacl::vector< float > v1
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, ViennaCLInt incx, double beta, double *y, ViennaCLInt offy, ViennaCLInt incy)
Definition: blas2_host.cpp:60
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendDestroy(ViennaCLBackend *backend)
Definition: backend.cpp:39
int main()
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
int ViennaCLInt
Definition: viennacl.hpp:48
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, ViennaCLInt incx, double beta, double *y, ViennaCLInt offy, ViennaCLInt incy)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, float beta, cl_mem y, ViennaCLInt offy, ViennaCLInt incy)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
float ScalarType
Definition: fft_1d.cpp:42
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version.
Definition: common.hpp:39
void check(T const &t, U const &u, EpsilonT eps)
viennacl::ocl::context & get_context(long i)
Convenience function for returning the current context.
Definition: backend.hpp:225
ScalarType diff(ScalarType const &s1, ScalarType const &s2)