29 #ifndef VIENNACL_WITH_OPENCL
30 #define VIENNACL_WITH_OPENCL
47 #define BENCHMARK_VECTOR_SIZE 100000
50 template<
typename ScalarType>
64 exec_time = timer.
get();
65 std::cout <<
"Time for building scalar kernels: " << exec_time << std::endl;
69 exec_time = timer.
get();
71 std::cout <<
"Time for building vector kernels: " << exec_time << std::endl;
75 exec_time = timer.
get();
76 std::cout <<
"Time for building matrix kernels: " << exec_time << std::endl;
80 exec_time = timer.
get();
81 std::cout <<
"Time for building compressed_matrix kernels: " << exec_time << std::endl;
89 std_vec1[i] = std_vec1[i-1] *
ScalarType(1.000001);
93 double std_accumulate = 0;
94 double vcl_accumulate = 0;
98 std_accumulate += std_vec1[i];
99 exec_time = timer.
get();
100 std::cout <<
"Time for " << BENCHMARK_VECTOR_SIZE <<
" entry accesses on host: " << exec_time << std::endl;
101 std::cout <<
"Time per entry: " << exec_time / BENCHMARK_VECTOR_SIZE << std::endl;
102 std::cout <<
"Result of operation on host: " << std_accumulate << std::endl;
104 vcl_accumulate = vcl_vec1[0];
109 vcl_accumulate += vcl_vec1[i];
110 exec_time = timer.
get();
111 std::cout <<
"Time for " << BENCHMARK_VECTOR_SIZE <<
" entry accesses via OpenCL: " << exec_time << std::endl;
112 std::cout <<
"Time per entry: " << exec_time / BENCHMARK_VECTOR_SIZE << std::endl;
113 std::cout <<
"Result of operation via OpenCL: " << vcl_accumulate << std::endl;
120 std::cout << std::endl;
121 std::cout <<
"----------------------------------------------" << std::endl;
122 std::cout <<
" Device Info" << std::endl;
123 std::cout <<
"----------------------------------------------" << std::endl;
127 std::cout << std::endl;
128 std::cout <<
"----------------------------------------------" << std::endl;
129 std::cout <<
"----------------------------------------------" << std::endl;
130 std::cout <<
"## Benchmark :: OpenCL performance" << std::endl;
131 std::cout <<
"----------------------------------------------" << std::endl;
132 std::cout << std::endl;
133 std::cout <<
" -------------------------------" << std::endl;
134 std::cout <<
" # benchmarking single-precision" << std::endl;
135 std::cout <<
" -------------------------------" << std::endl;
136 run_benchmark<float>();
139 std::cout << std::endl;
140 std::cout <<
" -------------------------------" << std::endl;
141 std::cout <<
" # benchmarking double-precision" << std::endl;
142 std::cout <<
" -------------------------------" << std::endl;
143 run_benchmark<double>();
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
void finish() const
Waits until all kernels in the queue have finished their execution.
Implementation of the dense matrix class.
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
viennacl::ocl::command_queue & get_queue()
Convenience function for getting the default queue for the currently active device in the active cont...
Implementation of the compressed_matrix class.
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.
#define BENCHMARK_VECTOR_SIZE
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A sparse square matrix in compressed sparse rows format.
Implementation of the ViennaCL scalar class.