ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
opencl.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2016, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 /*
19 *
20 * Benchmark: Profiling performance of current OpenCL implementation
21 *
22 */
23 
24 
25 #ifndef NDEBUG
26  #define NDEBUG
27 #endif
28 
29 #ifndef VIENNACL_WITH_OPENCL
30  #define VIENNACL_WITH_OPENCL
31 #endif
32 
33 #include "viennacl/scalar.hpp"
34 #include "viennacl/vector.hpp"
35 #include "viennacl/matrix.hpp"
37 #include "viennacl/tools/timer.hpp"
38 
39 #include <iostream>
40 #include <vector>
41 
42 using std::cout;
43 using std::cin;
44 using std::endl;
45 
46 
47 #define BENCHMARK_VECTOR_SIZE 100000
48 
49 
50 template<typename ScalarType>
52 {
53 
55  double exec_time;
56 
57  std::vector<ScalarType> std_vec1(BENCHMARK_VECTOR_SIZE);
58 
59 
61 
62  timer.start();
64  exec_time = timer.get();
65  std::cout << "Time for building scalar kernels: " << exec_time << std::endl;
66 
67  timer.start();
69  exec_time = timer.get();
71  std::cout << "Time for building vector kernels: " << exec_time << std::endl;
72 
73  timer.start();
75  exec_time = timer.get();
76  std::cout << "Time for building matrix kernels: " << exec_time << std::endl;
77 
78  timer.start();
80  exec_time = timer.get();
81  std::cout << "Time for building compressed_matrix kernels: " << exec_time << std::endl;
82 
83 
84 
86 
87  std_vec1[0] = 1.0;
88  for (std::size_t i=1; i<BENCHMARK_VECTOR_SIZE; ++i)
89  std_vec1[i] = std_vec1[i-1] * ScalarType(1.000001);
90 
91  viennacl::copy(std_vec1, vcl_vec1);
92 
93  double std_accumulate = 0;
94  double vcl_accumulate = 0;
95 
96  timer.start();
97  for (std::size_t i=0; i<BENCHMARK_VECTOR_SIZE; ++i)
98  std_accumulate += std_vec1[i];
99  exec_time = timer.get();
100  std::cout << "Time for " << BENCHMARK_VECTOR_SIZE << " entry accesses on host: " << exec_time << std::endl;
101  std::cout << "Time per entry: " << exec_time / BENCHMARK_VECTOR_SIZE << std::endl;
102  std::cout << "Result of operation on host: " << std_accumulate << std::endl;
103 
104  vcl_accumulate = vcl_vec1[0];
106  vcl_accumulate = 0;
107  timer.start();
108  for (std::size_t i=0; i<BENCHMARK_VECTOR_SIZE; ++i)
109  vcl_accumulate += vcl_vec1[i];
110  exec_time = timer.get();
111  std::cout << "Time for " << BENCHMARK_VECTOR_SIZE << " entry accesses via OpenCL: " << exec_time << std::endl;
112  std::cout << "Time per entry: " << exec_time / BENCHMARK_VECTOR_SIZE << std::endl;
113  std::cout << "Result of operation via OpenCL: " << vcl_accumulate << std::endl;
114 
115  return 0;
116 }
117 
118 int main()
119 {
120  std::cout << std::endl;
121  std::cout << "----------------------------------------------" << std::endl;
122  std::cout << " Device Info" << std::endl;
123  std::cout << "----------------------------------------------" << std::endl;
124 
125  std::cout << viennacl::ocl::current_device().info() << std::endl;
126 
127  std::cout << std::endl;
128  std::cout << "----------------------------------------------" << std::endl;
129  std::cout << "----------------------------------------------" << std::endl;
130  std::cout << "## Benchmark :: OpenCL performance" << std::endl;
131  std::cout << "----------------------------------------------" << std::endl;
132  std::cout << std::endl;
133  std::cout << " -------------------------------" << std::endl;
134  std::cout << " # benchmarking single-precision" << std::endl;
135  std::cout << " -------------------------------" << std::endl;
136  run_benchmark<float>();
138  {
139  std::cout << std::endl;
140  std::cout << " -------------------------------" << std::endl;
141  std::cout << " # benchmarking double-precision" << std::endl;
142  std::cout << " -------------------------------" << std::endl;
143  run_benchmark<double>();
144  }
145  return 0;
146 }
147 
Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
Definition: timer.hpp:90
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227
void finish() const
Waits until all kernels in the queue have finished their execution.
int run_benchmark()
Definition: opencl.cpp:51
Implementation of the dense matrix class.
A dense matrix class.
Definition: forwards.h:375
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Definition: device.hpp:995
viennacl::ocl::command_queue & get_queue()
Convenience function for getting the default queue for the currently active device in the active cont...
Definition: backend.hpp:320
Implementation of the compressed_matrix class.
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.
#define BENCHMARK_VECTOR_SIZE
Definition: opencl.cpp:47
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
float ScalarType
Definition: fft_1d.cpp:42
int main()
Definition: opencl.cpp:118
A sparse square matrix in compressed sparse rows format.
double get() const
Definition: timer.hpp:104
Implementation of the ViennaCL scalar class.