35 template<
typename ScalarType>
38 if (s1 > s2 || s1 < s2)
39 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
43 template<
typename ScalarType,
typename ViennaCLVectorType>
44 ScalarType diff(std::vector<ScalarType>
const &
v1, ViennaCLVectorType
const & vcl_vec)
46 std::vector<ScalarType> v2_cpu(vcl_vec.size());
51 for (
unsigned int i=0;i<v1.size(); ++i)
53 if (
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
54 v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) /
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
58 if (v2_cpu[i] > inf_norm)
65 template<
typename T,
typename U,
typename EpsilonT>
66 void check(T
const & t, U
const & u, EpsilonT eps)
68 EpsilonT rel_error = std::fabs(static_cast<EpsilonT>(
diff(t,u)));
71 std::cerr <<
"Relative error: " << rel_error << std::endl;
72 std::cerr <<
"Aborting!" << std::endl;
75 std::cout <<
"SUCCESS ";
88 return array[
static_cast<std::size_t
>((j*stride1 +
start1) * cols + (i*stride2 + start2))];
90 return array[
static_cast<std::size_t
>((i*stride1 +
start1) * cols + (j*stride2 + start2))];
94 return array[
static_cast<std::size_t
>((j*stride1 +
start1) + (i*stride2 + start2) * rows)];
95 return array[
static_cast<std::size_t
>((i*stride1 +
start1) + (j*stride2 + start2) * rows)];
100 float eps_float,
double eps_double,
101 std::vector<float> & C_float, std::vector<double> & C_double,
102 std::vector<float> & A_float, std::vector<double> & A_double,
103 std::vector<float> & B_float, std::vector<double> & B_double,
109 #ifdef VIENNACL_WITH_CUDA
122 float eps_float,
double eps_double,
123 std::vector<float> & C_float, std::vector<double> & C_double,
124 std::vector<float> & A_float, std::vector<double> & A_double,
125 std::vector<float> & B_float, std::vector<double> & B_double,
131 #ifdef VIENNACL_WITH_CUDA
149 ViennaCLInt C_rows = C_size1 * C_stride1 + C_start1 + 5;
150 ViennaCLInt C_columns = C_size2 * C_stride2 + C_start2 + 5;
158 ViennaCLInt A_rows = A_size1 * A_stride1 + A_start1 + 5;
159 ViennaCLInt A_columns = A_size2 * A_stride2 + A_start2 + 5;
167 ViennaCLInt B_rows = B_size1 * B_stride1 + B_start1 + 5;
168 ViennaCLInt B_columns = B_size2 * B_stride2 + B_start2 + 5;
176 double val_double = 0;
179 float val_A_float =
get_value(A_float, i, k, A_start1, A_start2, A_stride1, A_stride2, A_rows, A_columns, order_A, trans_A);
180 double val_A_double =
get_value(A_double, i, k, A_start1, A_start2, A_stride1, A_stride2, A_rows, A_columns, order_A, trans_A);
182 float val_B_float =
get_value(B_float, k, j, B_start1, B_start2, B_stride1, B_stride2, B_rows, B_columns, order_B, trans_B);
183 double val_B_double =
get_value(B_double, k, j, B_start1, B_start2, B_stride1, B_stride2, B_rows, B_columns, order_B, trans_B);
185 val_float += val_A_float * val_B_float;
186 val_double += val_A_double * val_B_double;
192 C_float [
static_cast<std::size_t
>((i*C_stride1 + C_start1) * C_columns + (j*C_stride2 + C_start2))] = val_float;
193 C_double[
static_cast<std::size_t
>((i*C_stride1 + C_start1) * C_columns + (j*C_stride2 + C_start2))] = val_double;
197 C_float [
static_cast<std::size_t
>((i*C_stride1 + C_start1) + (j*C_stride2 + C_start2) * C_rows)] = val_float;
198 C_double[
static_cast<std::size_t
>((i*C_stride1 + C_start1) + (j*C_stride2 + C_start2) * C_rows)] = val_double;
204 order_A, trans_A, order_B, trans_B, order_C,
205 C_size1, C_size2, size_k,
207 viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A ==
ViennaCLRowMajor) ? A_columns : A_rows,
208 viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B ==
ViennaCLRowMajor) ? B_columns : B_rows,
210 viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C ==
ViennaCLRowMajor) ? C_columns : C_rows);
211 check(C_float, host_C_float, eps_float);
214 order_A, trans_A, order_B, trans_B, order_C,
215 C_size1, C_size2, size_k,
217 viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A ==
ViennaCLRowMajor) ? A_columns : A_rows,
218 viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B ==
ViennaCLRowMajor) ? B_columns : B_rows,
220 viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C ==
ViennaCLRowMajor) ? C_columns : C_rows);
221 check(C_double, host_C_double, eps_double);
223 #ifdef VIENNACL_WITH_CUDA
225 order_A, trans_A, order_B, trans_B, order_C,
226 C_size1, C_size2, size_k,
232 check(C_float, cuda_C_float, eps_float);
235 order_A, trans_A, order_B, trans_B, order_C,
236 C_size1, C_size2, size_k,
242 check(C_double, cuda_C_double, eps_double);
245 #ifdef VIENNACL_WITH_OPENCL
246 ViennaCLOpenCLSgemm(my_backend,
247 order_A, trans_A, order_B, trans_B, order_C,
248 C_size1, C_size2, size_k,
250 viennacl::traits::opencl_handle(opencl_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A ==
ViennaCLRowMajor) ? A_columns : A_rows,
251 viennacl::traits::opencl_handle(opencl_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B ==
ViennaCLRowMajor) ? B_columns : B_rows,
253 viennacl::traits::opencl_handle(opencl_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C ==
ViennaCLRowMajor) ? C_columns : C_rows);
254 check(C_float, opencl_C_float, eps_float);
256 if (opencl_A_double != NULL && opencl_B_double != NULL && opencl_C_double != NULL)
258 ViennaCLOpenCLDgemm(my_backend,
259 order_A, trans_A, order_B, trans_B, order_C,
260 C_size1, C_size2, size_k,
262 viennacl::traits::opencl_handle(*opencl_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A ==
ViennaCLRowMajor) ? A_columns : A_rows,
263 viennacl::traits::opencl_handle(*opencl_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B ==
ViennaCLRowMajor) ? B_columns : B_rows,
265 viennacl::traits::opencl_handle(*opencl_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C ==
ViennaCLRowMajor) ? C_columns : C_rows);
266 check(C_double, *opencl_C_double, eps_double);
270 std::cout << std::endl;
274 float eps_float,
double eps_double,
275 std::vector<float> & C_float, std::vector<double> & C_double,
276 std::vector<float> & A_float, std::vector<double> & A_double,
277 std::vector<float> & B_float, std::vector<double> & B_double,
282 #ifdef VIENNACL_WITH_CUDA
295 float eps_float,
double eps_double,
296 std::vector<float> & C_float, std::vector<double> & C_double,
297 std::vector<float> & A_float, std::vector<double> & A_double,
298 std::vector<float> & B_float, std::vector<double> & B_double,
303 #ifdef VIENNACL_WITH_CUDA
315 std::cout <<
" -> trans-trans: ";
317 eps_float, eps_double,
318 C_float, C_double, A_float, A_double, B_float, B_double,
319 order_C, order_A, order_B,
321 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
322 #ifdef VIENNACL_WITH_CUDA
323 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
325 #ifdef VIENNACL_WITH_OPENCL
326 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
330 std::cout <<
" -> trans-no: ";
332 eps_float, eps_double,
333 C_float, C_double, A_float, A_double, B_float, B_double,
334 order_C, order_A, order_B,
336 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
337 #ifdef VIENNACL_WITH_CUDA
338 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
340 #ifdef VIENNACL_WITH_OPENCL
341 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
345 std::cout <<
" -> no-trans: ";
347 eps_float, eps_double,
348 C_float, C_double, A_float, A_double, B_float, B_double,
349 order_C, order_A, order_B,
351 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
352 #ifdef VIENNACL_WITH_CUDA
353 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
355 #ifdef VIENNACL_WITH_OPENCL
356 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
360 std::cout <<
" -> no-no: ";
362 eps_float, eps_double,
363 C_float, C_double, A_float, A_double, B_float, B_double,
364 order_C, order_A, order_B,
366 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
367 #ifdef VIENNACL_WITH_CUDA
368 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
370 #ifdef VIENNACL_WITH_OPENCL
371 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
379 float eps_float,
double eps_double,
380 std::vector<float> & C_float, std::vector<double> & C_double,
381 std::vector<float> & A_float, std::vector<double> & A_double,
382 std::vector<float> & B_float, std::vector<double> & B_double,
386 #ifdef VIENNACL_WITH_CUDA
399 float eps_float,
double eps_double,
400 std::vector<float> & C_float, std::vector<double> & C_double,
401 std::vector<float> & A_float, std::vector<double> & A_double,
402 std::vector<float> & B_float, std::vector<double> & B_double,
406 #ifdef VIENNACL_WITH_CUDA
418 std::cout <<
" -> C: row, A: row, B: row" << std::endl;
420 eps_float, eps_double,
421 C_float, C_double, A_float, A_double, B_float, B_double,
423 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
424 #ifdef VIENNACL_WITH_CUDA
425 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
427 #ifdef VIENNACL_WITH_OPENCL
428 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
432 std::cout <<
" -> C: row, A: row, B: col" << std::endl;
434 eps_float, eps_double,
435 C_float, C_double, A_float, A_double, B_float, B_double,
437 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
438 #ifdef VIENNACL_WITH_CUDA
439 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
441 #ifdef VIENNACL_WITH_OPENCL
442 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
446 std::cout <<
" -> C: row, A: col, B: row" << std::endl;
448 eps_float, eps_double,
449 C_float, C_double, A_float, A_double, B_float, B_double,
451 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
452 #ifdef VIENNACL_WITH_CUDA
453 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
455 #ifdef VIENNACL_WITH_OPENCL
456 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
460 std::cout <<
" -> C: row, A: col, B: col" << std::endl;
462 eps_float, eps_double,
463 C_float, C_double, A_float, A_double, B_float, B_double,
465 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
466 #ifdef VIENNACL_WITH_CUDA
467 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
469 #ifdef VIENNACL_WITH_OPENCL
470 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
475 std::cout <<
" -> C: col, A: row, B: row" << std::endl;
477 eps_float, eps_double,
478 C_float, C_double, A_float, A_double, B_float, B_double,
480 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
481 #ifdef VIENNACL_WITH_CUDA
482 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
484 #ifdef VIENNACL_WITH_OPENCL
485 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
489 std::cout <<
" -> C: col, A: row, B: col" << std::endl;
491 eps_float, eps_double,
492 C_float, C_double, A_float, A_double, B_float, B_double,
494 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
495 #ifdef VIENNACL_WITH_CUDA
496 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
498 #ifdef VIENNACL_WITH_OPENCL
499 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
503 std::cout <<
" -> C: col, A: col, B: row" << std::endl;
505 eps_float, eps_double,
506 C_float, C_double, A_float, A_double, B_float, B_double,
508 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
509 #ifdef VIENNACL_WITH_CUDA
510 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
512 #ifdef VIENNACL_WITH_OPENCL
513 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
517 std::cout <<
" -> C: col, A: col, B: col" << std::endl;
519 eps_float, eps_double,
520 C_float, C_double, A_float, A_double, B_float, B_double,
522 host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
523 #ifdef VIENNACL_WITH_CUDA
524 , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
526 #ifdef VIENNACL_WITH_OPENCL
527 , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
541 std::size_t
size = 500*500;
542 float eps_float = 1e-5f;
543 double eps_double = 1e-12;
545 std::vector<float> C_float(size);
546 std::vector<float> A_float(size);
547 std::vector<float> B_float(size);
549 std::vector<double> C_double(size);
550 std::vector<double> A_double(size);
551 std::vector<double> B_double(size);
555 for (std::size_t i = 0; i <
size; ++i)
557 C_float[i] = 0.5f + 0.1f * randomFloat();
558 A_float[i] = 0.5f + 0.1f * randomFloat();
559 B_float[i] = 0.5f + 0.1f * randomFloat();
561 C_double[i] = 0.5 + 0.2 * randomDouble();
562 A_double[i] = 0.5 + 0.2 * randomDouble();
563 B_double[i] = 0.5 + 0.2 * randomDouble();
580 #ifdef VIENNACL_WITH_CUDA
591 #ifdef VIENNACL_WITH_OPENCL
612 check(C_float, host_C_float, eps_float);
613 check(A_float, host_A_float, eps_float);
614 check(B_float, host_B_float, eps_float);
616 check(C_double, host_C_double, eps_double);
617 check(A_double, host_A_double, eps_double);
618 check(B_double, host_B_double, eps_double);
620 #ifdef VIENNACL_WITH_CUDA
621 check(C_float, cuda_C_float, eps_float);
622 check(A_float, cuda_A_float, eps_float);
623 check(B_float, cuda_B_float, eps_float);
625 check(C_double, cuda_C_double, eps_double);
626 check(A_double, cuda_A_double, eps_double);
627 check(B_double, cuda_B_double, eps_double);
629 #ifdef VIENNACL_WITH_OPENCL
630 check(C_float, opencl_C_float, eps_float);
631 check(A_float, opencl_A_float, eps_float);
632 check(B_float, opencl_B_float, eps_float);
636 check(C_double, *opencl_C_double, eps_double);
637 check(A_double, *opencl_A_double, eps_double);
638 check(B_double, *opencl_B_double, eps_double);
642 std::cout << std::endl;
645 eps_float, eps_double,
649 host_C_float, host_C_double,
650 host_A_float, host_A_double,
651 host_B_float, host_B_double
652 #ifdef VIENNACL_WITH_CUDA
653 , cuda_C_float, cuda_C_double
654 , cuda_A_float, cuda_A_double
655 , cuda_B_float, cuda_B_double
658 , opencl_C_float, opencl_C_double
659 , opencl_A_float, opencl_A_double
660 , opencl_B_float, opencl_B_double
665 #ifdef VIENNACL_WITH_OPENCL
669 delete opencl_C_double;
670 delete opencl_A_double;
671 delete opencl_B_double;
681 std::cout << std::endl <<
"!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl;
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendCreate(ViennaCLBackend *backend)
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Generic backend for CUDA, OpenCL, host-based stuff.
std::vector< std::vector< NumericT > > trans(std::vector< std::vector< NumericT > > const &A)
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendSetOpenCLContextID(ViennaCLBackend backend, ViennaCLInt context_id)
result_of::size_type< T >::type start1(T const &obj)
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendDestroy(ViennaCLBackend *backend)
void test_blas(ViennaCLBackend my_backend, float eps_float, double eps_double, std::vector< float > &C_float, std::vector< double > &C_double, std::vector< float > &A_float, std::vector< double > &A_double, std::vector< float > &B_float, std::vector< double > &B_double, ViennaCLOrder order_C, ViennaCLOrder order_A, ViennaCLOrder order_B, ViennaCLTranspose trans_A, ViennaCLTranspose trans_B, viennacl::vector< float > &host_C_float, viennacl::vector< double > &host_C_double, viennacl::vector< float > &host_A_float, viennacl::vector< double > &host_A_double, viennacl::vector< float > &host_B_float, viennacl::vector< double > &host_B_double)
void check(T const &t, U const &u, EpsilonT eps)
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A small collection of sequential random number generators.
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version.
#define VIENNACL_WITH_OPENCL
T get_value(std::vector< T > &array, ViennaCLInt i, ViennaCLInt j, ViennaCLInt start1, ViennaCLInt start2, ViennaCLInt stride1, ViennaCLInt stride2, ViennaCLInt rows, ViennaCLInt cols, ViennaCLOrder order, ViennaCLTranspose trans)
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
viennacl::ocl::context & get_context(long i)
Convenience function for returning the current context.