Go to the source code of this file.
#define BENCHMARK_OP |
( |
|
OPERATION, |
|
|
|
NAME, |
|
|
|
PERF, |
|
|
|
INDEX |
|
) |
| |
Value:
timer.start(); \
Nruns = 0; \
time_spent = 0; \
while (time_spent < time_per_benchmark) \
{ \
time_previous = timer.get(); \
time_spent += timer.get() - time_previous; \
Nruns+=1; \
} \
time_spent/=(double)Nruns; \
std::cout << prefix << NAME " : " << PERF << " " INDEX << std::endl; \
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
template<class T >
void bench |
( |
size_t |
BLAS1_N, |
|
|
size_t |
BLAS2_M, |
|
|
size_t |
BLAS2_N, |
|
|
size_t |
BLAS3_M, |
|
|
size_t |
BLAS3_N, |
|
|
size_t |
BLAS3_K, |
|
|
std::string const & |
prefix |
|
) |
| |
template<class T , class F >