doc/opencl_8cpp_source.html

 /* =========================================================================

    Copyright (c) 2010-2016, Institute for Microelectronics,

                             Institute for Analysis and Scientific Computing,

                             TU Wien.

    Portions of this software are copyright by UChicago Argonne, LLC.


                             -----------------

                   ViennaCL - The Vienna Computing Library

                             -----------------


    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at


    (A list of authors and contributors can be found in the PDF manual)


    License:         MIT (X11), see file LICENSE in the base directory

 ============================================================================= */


 /*

 *

 *   Benchmark:  Profiling performance of current OpenCL implementation

 *

 */


 #ifndef NDEBUG

  #define NDEBUG

 #endif


 #ifndef VIENNACL_WITH_OPENCL

   #define VIENNACL_WITH_OPENCL

 #endif


 #include "viennacl/scalar.hpp"

 #include "viennacl/vector.hpp"

 #include "viennacl/matrix.hpp"

 #include "viennacl/compressed_matrix.hpp"

 #include "viennacl/tools/timer.hpp"


 #include <iostream>

 #include <vector>


 using std::cout;

 using std::cin;

 using std::endl;


 #define BENCHMARK_VECTOR_SIZE   100000


 template<typename ScalarType>

 int run_benchmark()

 {


   viennacl::tools::timer timer;

   double exec_time;


   std::vector<ScalarType> std_vec1(BENCHMARK_VECTOR_SIZE);


   viennacl::ocl::get_queue().finish();


   timer.start();

   viennacl::scalar<ScalarType> vcl_s1;

   exec_time = timer.get();

   std::cout << "Time for building scalar kernels: " << exec_time << std::endl;


   timer.start();

   viennacl::vector<ScalarType> vcl_vec1(BENCHMARK_VECTOR_SIZE);

   exec_time = timer.get();

   viennacl::vector<ScalarType> vcl_vec2(BENCHMARK_VECTOR_SIZE);

   std::cout << "Time for building vector kernels: " << exec_time << std::endl;


   timer.start();

   viennacl::matrix<ScalarType> vcl_matrix(BENCHMARK_VECTOR_SIZE/100, BENCHMARK_VECTOR_SIZE/100);

   exec_time = timer.get();

   std::cout << "Time for building matrix kernels: " << exec_time << std::endl;


   timer.start();

   viennacl::compressed_matrix<ScalarType> vcl_compressed_matrix(BENCHMARK_VECTOR_SIZE, BENCHMARK_VECTOR_SIZE);

   exec_time = timer.get();

   std::cout << "Time for building compressed_matrix kernels: " << exec_time << std::endl;


   std_vec1[0] = 1.0;

   for (std::size_t i=1; i<BENCHMARK_VECTOR_SIZE; ++i)

     std_vec1[i] = std_vec1[i-1] * ScalarType(1.000001);


   viennacl::copy(std_vec1, vcl_vec1);


   double std_accumulate = 0;

   double vcl_accumulate = 0;


   timer.start();

   for (std::size_t i=0; i<BENCHMARK_VECTOR_SIZE; ++i)

     std_accumulate += std_vec1[i];

   exec_time = timer.get();

   std::cout << "Time for " << BENCHMARK_VECTOR_SIZE << " entry accesses on host: " << exec_time << std::endl;

   std::cout << "Time per entry: " << exec_time / BENCHMARK_VECTOR_SIZE << std::endl;

   std::cout << "Result of operation on host: " << std_accumulate << std::endl;


   vcl_accumulate = vcl_vec1[0];

   viennacl::ocl::get_queue().finish();

   vcl_accumulate = 0;

   timer.start();

   for (std::size_t i=0; i<BENCHMARK_VECTOR_SIZE; ++i)

     vcl_accumulate += vcl_vec1[i];

   exec_time = timer.get();

   std::cout << "Time for " << BENCHMARK_VECTOR_SIZE << " entry accesses via OpenCL: " << exec_time << std::endl;

   std::cout << "Time per entry: " << exec_time / BENCHMARK_VECTOR_SIZE << std::endl;

   std::cout << "Result of operation via OpenCL: " << vcl_accumulate << std::endl;


   return 0;

 }


 int main()

 {

   std::cout << std::endl;

   std::cout << "----------------------------------------------" << std::endl;

   std::cout << "               Device Info" << std::endl;

   std::cout << "----------------------------------------------" << std::endl;


   std::cout << viennacl::ocl::current_device().info() << std::endl;


   std::cout << std::endl;

   std::cout << "----------------------------------------------" << std::endl;

   std::cout << "----------------------------------------------" << std::endl;

   std::cout << "## Benchmark :: OpenCL performance" << std::endl;

   std::cout << "----------------------------------------------" << std::endl;

   std::cout << std::endl;

   std::cout << "   -------------------------------" << std::endl;

   std::cout << "   # benchmarking single-precision" << std::endl;

   std::cout << "   -------------------------------" << std::endl;

   run_benchmark<float>();

   if ( viennacl::ocl::current_device().double_support() )

   {

     std::cout << std::endl;

     std::cout << "   -------------------------------" << std::endl;

     std::cout << "   # benchmarking double-precision" << std::endl;

     std::cout << "   -------------------------------" << std::endl;

     run_benchmark<double>();

   }

   return 0;

 }


viennacl::tools::timer
Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
Definition: timer.hpp:90

viennacl::scalar
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227

viennacl::ocl::command_queue::finish
void finish() const
Waits until all kernels in the queue have finished their execution.
Definition: command_queue.hpp:70

run_benchmark
int run_benchmark()
Definition: opencl.cpp:51

matrix.hpp
Implementation of the dense matrix class.

viennacl::matrix
A dense matrix class.
Definition: forwards.h:375

viennacl::ocl::current_device
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351

viennacl::ocl::device::info
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Definition: device.hpp:995

viennacl::ocl::get_queue
viennacl::ocl::command_queue & get_queue()
Convenience function for getting the default queue for the currently active device in the active cont...
Definition: backend.hpp:320

compressed_matrix.hpp
Implementation of the compressed_matrix class.

viennacl::ocl::device::double_support
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956

viennacl::vector< ScalarType >

timer.hpp
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.

BENCHMARK_VECTOR_SIZE
#define BENCHMARK_VECTOR_SIZE
Definition: opencl.cpp:47

vector.hpp
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...

viennacl::copy
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150

ScalarType
float ScalarType
Definition: fft_1d.cpp:42

main
int main()
Definition: opencl.cpp:118

viennacl::compressed_matrix
A sparse square matrix in compressed sparse rows format.
Definition: compressed_matrix.hpp:559

viennacl::tools::timer::get
double get() const
Definition: timer.hpp:104

viennacl::tools::timer::start
void start()
Definition: timer.hpp:97

scalar.hpp
Implementation of the ViennaCL scalar class.