doc/benchmarks_2scheduler_8cpp_source.html

 /* =========================================================================

    Copyright (c) 2010-2016, Institute for Microelectronics,

                             Institute for Analysis and Scientific Computing,

                             TU Wien.

    Portions of this software are copyright by UChicago Argonne, LLC.


                             -----------------

                   ViennaCL - The Vienna Computing Library

                             -----------------


    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at


    (A list of authors and contributors can be found in the PDF manual)


    License:         MIT (X11), see file LICENSE in the base directory

 ============================================================================= */


 /*

 *

 *   Benchmark:   Vector operations (vector.cpp and vector.cu are identical, the latter being required for compilation using CUDA nvcc)

 *

 */


 //#define VIENNACL_DEBUG_ALL

 #ifndef NDEBUG

  #define NDEBUG

 #endif


 #include "viennacl/scalar.hpp"

 #include "viennacl/vector.hpp"

 #include "viennacl/matrix.hpp"

 #include "viennacl/linalg/inner_prod.hpp"

 #include "viennacl/linalg/norm_2.hpp"

 #include "viennacl/scheduler/execute.hpp"

 #include "viennacl/tools/timer.hpp"


 #include <iostream>

 #include <vector>


 using std::cout;

 using std::cin;

 using std::endl;


 #define BENCHMARK_VECTOR_SIZE   2

 #define BENCHMARK_RUNS          1000


 template<typename ScalarType>

 int run_benchmark()

 {


   viennacl::tools::timer timer;

   double exec_time;


   std::vector<ScalarType> std_vec1(BENCHMARK_VECTOR_SIZE);

   std::vector<ScalarType> std_vec2(BENCHMARK_VECTOR_SIZE);

   viennacl::vector<ScalarType> vcl_vec1(BENCHMARK_VECTOR_SIZE);

   viennacl::vector<ScalarType> vcl_vec2(BENCHMARK_VECTOR_SIZE);

   ScalarType alpha = ScalarType(1.1415);

   ScalarType beta  = ScalarType(0.97172);


   std_vec1[0] = 1.0;

   std_vec2[0] = 1.0;

   for (std::size_t i=1; i<BENCHMARK_VECTOR_SIZE; ++i)

   {

     std_vec1[i] = std_vec1[i-1] * ScalarType(1.000001);

     std_vec2[i] = std_vec1[i-1] * ScalarType(0.999999);

   }


   viennacl::copy(std_vec1, vcl_vec1);

   viennacl::fast_copy(std_vec1, vcl_vec1);

   viennacl::copy(std_vec2, vcl_vec2);


   viennacl::backend::finish();

   vcl_vec2 = alpha * vcl_vec1 + beta * vcl_vec2;

   viennacl::backend::finish();

   timer.start();

   for (std::size_t runs=0; runs<BENCHMARK_RUNS; ++runs)

   {

     vcl_vec2 = alpha * vcl_vec1 + beta * vcl_vec2;

   }

   viennacl::backend::finish();

   exec_time = timer.get();

   std::cout << "Execution time per operation, no scheduler: " << exec_time / BENCHMARK_RUNS << " sec" << std::endl;

   std::cout << "Result: " << vcl_vec2[0] << std::endl;


   viennacl::backend::finish();

   timer.start();

   for (std::size_t runs=0; runs<BENCHMARK_RUNS; ++runs)

   {

     viennacl::scheduler::statement   my_statement(vcl_vec2, viennacl::op_assign(), alpha * vcl_vec1 + beta * vcl_vec2); // same as vcl_v1 = alpha * vcl_vec1 + beta * vcl_vec2;

     viennacl::scheduler::execute(my_statement);

   }

   viennacl::backend::finish();

   exec_time = timer.get();

   std::cout << "Execution time per operation, with scheduler including statement generation: " << exec_time / BENCHMARK_RUNS << " sec" << std::endl;

   std::cout << "Result: " << vcl_vec2[0] << std::endl;


   viennacl::scheduler::statement   my_statement(vcl_vec2, viennacl::op_assign(), alpha * vcl_vec1 + beta * vcl_vec2); // same as vcl_v1 = alpha * vcl_vec1 + beta * vcl_vec2;

   viennacl::backend::finish();

   timer.start();

   for (std::size_t runs=0; runs<BENCHMARK_RUNS; ++runs)

   {

     viennacl::scheduler::execute(my_statement);

   }

   viennacl::backend::finish();

   exec_time = timer.get();

   std::cout << "Execution time per operation, only execution: " << exec_time / BENCHMARK_RUNS << " sec" << std::endl;

   std::cout << "Result: " << vcl_vec2[0] << std::endl;


   return 0;

 }


 int main()

 {

   std::cout << std::endl;

   std::cout << "----------------------------------------------" << std::endl;

   std::cout << "               Device Info" << std::endl;

   std::cout << "----------------------------------------------" << std::endl;


 #ifdef VIENNACL_WITH_OPENCL

   std::cout << viennacl::ocl::current_device().info() << std::endl;

 #endif


   std::cout << std::endl;

   std::cout << "----------------------------------------------" << std::endl;

   std::cout << "----------------------------------------------" << std::endl;

   std::cout << "## Benchmark :: Vector" << std::endl;

   std::cout << "----------------------------------------------" << std::endl;

   std::cout << std::endl;

   std::cout << "   -------------------------------" << std::endl;

   std::cout << "   # benchmarking single-precision" << std::endl;

   std::cout << "   -------------------------------" << std::endl;

   run_benchmark<float>();

 #ifdef VIENNACL_WITH_OPENCL

   if ( viennacl::ocl::current_device().double_support() )

 #endif

   {

     std::cout << std::endl;

     std::cout << "   -------------------------------" << std::endl;

     std::cout << "   # benchmarking double-precision" << std::endl;

     std::cout << "   -------------------------------" << std::endl;

     run_benchmark<double>();

   }

   return 0;

 }


viennacl::tools::timer
Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
Definition: timer.hpp:90

norm_2.hpp
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...

run_benchmark
int run_benchmark()
Definition: scheduler.cpp:51

matrix.hpp
Implementation of the dense matrix class.

viennacl::op_assign
A tag class representing assignment.
Definition: forwards.h:81

viennacl::backend::finish
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54

viennacl::scheduler::execute
void execute(statement const &s)
Definition: execute.hpp:279

BENCHMARK_RUNS
#define BENCHMARK_RUNS
Definition: scheduler.cpp:47

BENCHMARK_VECTOR_SIZE
#define BENCHMARK_VECTOR_SIZE
Definition: scheduler.cpp:46

viennacl::ocl::current_device
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351

inner_prod.hpp
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.

viennacl::ocl::device::info
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Definition: device.hpp:995

viennacl::ocl::device::double_support
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956

viennacl::vector< ScalarType >

timer.hpp
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.

main
int main()
Definition: scheduler.cpp:119

vector.hpp
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...

viennacl::copy
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150

ScalarType
float ScalarType
Definition: fft_1d.cpp:42

execute.hpp
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.

viennacl::tools::timer::get
double get() const
Definition: timer.hpp:104

viennacl::scheduler::statement
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:502

viennacl::tools::timer::start
void start()
Definition: timer.hpp:97

scalar.hpp
Implementation of the ViennaCL scalar class.

viennacl::fast_copy
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)