40 template<
typename NumericT,
typename VCLMatrixType>
41 bool check_for_equality(std::vector<std::vector<NumericT> >
const & std_A, VCLMatrixType
const & vcl_A,
double epsilon)
43 std::vector<std::vector<NumericT> > vcl_A_cpu(vcl_A.size1(), std::vector<NumericT>(vcl_A.size2()));
47 for (std::size_t i=0; i<std_A.size(); ++i)
49 for (std::size_t j=0; j<std_A[i].size(); ++j)
51 if (std::fabs(std_A[i][j] - vcl_A_cpu[i][j]) > 0)
53 if ( (std::abs(std_A[i][j] - vcl_A_cpu[i][j]) /
std::max(std::fabs(std_A[i][j]), std::fabs(vcl_A_cpu[i][j])) > epsilon) || std::fabs(vcl_A_cpu[i][j] - vcl_A_cpu[i][j]) > 0 )
55 std::cout <<
"Error at index (" << i <<
", " << j <<
"): " << std_A[i][j] <<
" vs " << vcl_A_cpu[i][j] << std::endl;
56 std::cout << std::endl <<
"TEST failed!" << std::endl;
63 std::cout <<
"PASSED!" << std::endl;
70 template<
typename STLMatrixType,
71 typename ViennaCLMatrixType1,
typename ViennaCLMatrixType2,
typename ViennaCLMatrixType3>
73 STLMatrixType & std_A, STLMatrixType & std_B, STLMatrixType & std_C,
74 ViennaCLMatrixType1 & vcl_A, ViennaCLMatrixType2 & vcl_B, ViennaCLMatrixType3 vcl_C)
79 cpu_value_type alpha = cpu_value_type(3.1415);
82 cpu_value_type beta = cpu_value_type(2.7182);
89 std::cout <<
"Checking for zero_matrix initializer..." << std::endl;
90 std_A = std::vector<std::vector<cpu_value_type> >(std_A.size(), std::vector<cpu_value_type>(std_A[0].size()));
95 std::cout <<
"Checking for scalar_matrix initializer..." << std::endl;
96 std_A = std::vector<std::vector<cpu_value_type> >(std_A.size(), std::vector<cpu_value_type>(std_A[0].size(), alpha));
101 std_A = std::vector<std::vector<cpu_value_type> >(std_A.size(), std::vector<cpu_value_type>(std_A[0].size(), gpu_beta));
113 std::cout << std::endl;
121 std::cout <<
"Testing matrix assignment... ";
128 if (std_A.size() == std_A[0].size())
130 std::cout <<
"Testing matrix assignment (transposed)... ";
131 for (std::size_t i=0; i<std_A.size(); ++i)
132 for (std::size_t j=0; j<std_A[i].size(); ++j)
133 std_A[i][j] = std_B[j][i];
149 std::cout <<
"Testing upper left copy to GPU... ";
156 std::cout <<
"Testing lower right copy to GPU... ";
166 std::cout <<
"Testing upper left copy to A... ";
170 std::cout <<
"Testing lower right copy to C... ";
181 std::cout <<
"Inplace add: ";
182 for (std::size_t i=0; i<std_C.size(); ++i)
183 for (std::size_t j=0; j<std_C[i].size(); ++j)
184 std_C[i][j] += std_C[i][j];
190 if (std_C.size() == std_C[0].size())
192 std::cout <<
"Inplace add (transposed): ";
193 STLMatrixType std_C2(std_C);
194 for (std::size_t i=0; i<std_C.size(); ++i)
195 for (std::size_t j=0; j<std_C[i].size(); ++j)
196 std_C[i][j] += std_C2[j][i];
202 std::cout <<
"Inplace add (transposed, expression): ";
204 for (std::size_t i=0; i<std_C.size(); ++i)
205 for (std::size_t j=0; j<std_C[i].size(); ++j)
206 std_C[i][j] += std_C2[j][i] + std_C2[j][i];
213 std::cout <<
"Scaled inplace add: ";
214 for (std::size_t i=0; i<std_C.size(); ++i)
215 for (std::size_t j=0; j<std_C[i].size(); ++j)
216 std_C[i][j] += beta * std_A[i][j];
217 vcl_C += gpu_beta * vcl_A;
222 std::cout <<
"Add: ";
223 for (std::size_t i=0; i<std_C.size(); ++i)
224 for (std::size_t j=0; j<std_C[i].size(); ++j)
225 std_C[i][j] = std_A[i][j] + std_B[i][j];
226 vcl_C = vcl_A + vcl_B;
231 std::cout <<
"Add with flipsign: ";
232 for (std::size_t i=0; i<std_C.size(); ++i)
233 for (std::size_t j=0; j<std_C[i].size(); ++j)
234 std_C[i][j] = -std_A[i][j] + std_B[i][j];
235 vcl_C = - vcl_A + vcl_B;
241 std::cout <<
"Scaled add (left): ";
242 for (std::size_t i=0; i<std_C.size(); ++i)
243 for (std::size_t j=0; j<std_C[i].size(); ++j)
244 std_C[i][j] = cpu_value_type(
long(alpha)) * std_A[i][j] + std_B[i][j];
245 vcl_C = long(alpha) * vcl_A + vcl_B;
250 for (std::size_t i=0; i<std_C.size(); ++i)
251 for (std::size_t j=0; j<std_C[i].size(); ++j)
252 std_C[i][j] = cpu_value_type(
float(alpha)) * std_A[i][j] + std_B[i][j];
253 vcl_C = float(alpha) * vcl_A + vcl_B;
258 for (std::size_t i=0; i<std_C.size(); ++i)
259 for (std::size_t j=0; j<std_C[i].size(); ++j)
260 std_C[i][j] = cpu_value_type(
double(alpha)) * std_A[i][j] + std_B[i][j];
261 vcl_C = double(alpha) * vcl_A + vcl_B;
266 std::cout <<
"Scaled add (left): ";
267 vcl_C = gpu_alpha * vcl_A + vcl_B;
272 std::cout <<
"Scaled add (right): ";
273 for (std::size_t i=0; i<std_C.size(); ++i)
274 for (std::size_t j=0; j<std_C[i].size(); ++j)
275 std_C[i][j] = std_A[i][j] + std_B[i][j] * cpu_value_type(
long(beta));
276 vcl_C = vcl_A + vcl_B * long(beta);
281 for (std::size_t i=0; i<std_C.size(); ++i)
282 for (std::size_t j=0; j<std_C[i].size(); ++j)
283 std_C[i][j] = std_A[i][j] + std_B[i][j] * cpu_value_type(
float(beta));
284 vcl_C = vcl_A + vcl_B * float(beta);
289 for (std::size_t i=0; i<std_C.size(); ++i)
290 for (std::size_t j=0; j<std_C[i].size(); ++j)
291 std_C[i][j] = std_A[i][j] + std_B[i][j] * cpu_value_type(
double(beta));
292 vcl_C = vcl_A + vcl_B * double(beta);
297 std::cout <<
"Scaled add (right): ";
298 vcl_C = vcl_A + gpu_beta * vcl_B;
302 std::cout <<
"Scaled add (right, with division): ";
303 for (std::size_t i=0; i<std_C.size(); ++i)
304 for (std::size_t j=0; j<std_C[i].size(); ++j)
305 std_C[i][j] = std_A[i][j] + std_B[i][j] / cpu_value_type(
long(beta));
306 vcl_C = vcl_A + vcl_B / long(beta);
311 for (std::size_t i=0; i<std_C.size(); ++i)
312 for (std::size_t j=0; j<std_C[i].size(); ++j)
313 std_C[i][j] = std_A[i][j] + std_B[i][j] / cpu_value_type(
float(beta));
314 vcl_C = vcl_A + vcl_B / float(beta);
319 for (std::size_t i=0; i<std_C.size(); ++i)
320 for (std::size_t j=0; j<std_C[i].size(); ++j)
321 std_C[i][j] = std_A[i][j] + std_B[i][j] / cpu_value_type(
double(beta));
322 vcl_C = vcl_A + vcl_B / double(beta);
328 std::cout <<
"Scaled add (both): ";
329 for (std::size_t i=0; i<std_C.size(); ++i)
330 for (std::size_t j=0; j<std_C[i].size(); ++j)
331 std_C[i][j] = alpha * std_A[i][j] + beta * std_B[i][j];
332 vcl_C = alpha * vcl_A + beta * vcl_B;
337 std::cout <<
"Scaled add (both): ";
338 vcl_C = gpu_alpha * vcl_A + gpu_beta * vcl_B;
347 std::cout <<
"Inplace sub: ";
348 for (std::size_t i=0; i<std_C.size(); ++i)
349 for (std::size_t j=0; j<std_C[i].size(); ++j)
350 std_C[i][j] -= std_B[i][j];
356 if (std_C.size() == std_C[0].size())
358 std::cout <<
"Inplace sub (transposed): ";
359 STLMatrixType std_C2(std_C);
360 for (std::size_t i=0; i<std_C.size(); ++i)
361 for (std::size_t j=0; j<std_C[i].size(); ++j)
362 std_C[i][j] -= cpu_value_type(2) * std_C2[j][i];
368 std::cout <<
"Inplace sub (transposed, expression): ";
370 for (std::size_t i=0; i<std_C.size(); ++i)
371 for (std::size_t j=0; j<std_C[i].size(); ++j)
372 std_C[i][j] -= std_C2[j][i] + std_C2[j][i];
379 std::cout <<
"Scaled Inplace sub: ";
380 for (std::size_t i=0; i<std_C.size(); ++i)
381 for (std::size_t j=0; j<std_C[i].size(); ++j)
382 std_C[i][j] -= alpha * std_B[i][j];
383 vcl_C -= alpha * vcl_B;
391 std::cout <<
"Sub: ";
392 for (std::size_t i=0; i<std_C.size(); ++i)
393 for (std::size_t j=0; j<std_C[i].size(); ++j)
394 std_C[i][j] = std_A[i][j] - std_B[i][j];
395 vcl_C = vcl_A - vcl_B;
400 std::cout <<
"Scaled sub (left): ";
401 for (std::size_t i=0; i<std_C.size(); ++i)
402 for (std::size_t j=0; j<std_C[i].size(); ++j)
403 std_B[i][j] = alpha * std_A[i][j] - std_C[i][j];
404 vcl_B = alpha * vcl_A - vcl_C;
409 std::cout <<
"Scaled sub (left): ";
410 vcl_B = gpu_alpha * vcl_A - vcl_C;
415 std::cout <<
"Scaled sub (right): ";
416 for (std::size_t i=0; i<std_C.size(); ++i)
417 for (std::size_t j=0; j<std_C[i].size(); ++j)
418 std_B[i][j] = std_A[i][j] - beta * std_C[i][j];
419 vcl_B = vcl_A - vcl_C * beta;
424 std::cout <<
"Scaled sub (right): ";
425 vcl_B = vcl_A - vcl_C * gpu_beta;
430 std::cout <<
"Scaled sub (both): ";
431 for (std::size_t i=0; i<std_C.size(); ++i)
432 for (std::size_t j=0; j<std_C[i].size(); ++j)
433 std_B[i][j] = alpha * std_A[i][j] - beta * std_C[i][j];
434 vcl_B = alpha * vcl_A - vcl_C * beta;
439 std::cout <<
"Scaled sub (both): ";
440 vcl_B = gpu_alpha * vcl_A - vcl_C * gpu_beta;
445 std::cout <<
"Unary operator-: ";
446 for (std::size_t i=0; i<std_C.size(); ++i)
447 for (std::size_t j=0; j<std_C[i].size(); ++j)
448 std_C[i][j] = - std_A[i][j];
461 std::cout <<
"Multiplication with CPU scalar: ";
462 for (std::size_t i=0; i<std_A.size(); ++i)
463 for (std::size_t j=0; j<std_A[i].size(); ++j)
464 std_A[i][j] *= cpu_value_type(
long(alpha));
465 vcl_A *= long(alpha);
470 for (std::size_t i=0; i<std_A.size(); ++i)
471 for (std::size_t j=0; j<std_A[i].size(); ++j)
472 std_A[i][j] *= cpu_value_type(
float(alpha));
473 vcl_A *= float(alpha);
478 for (std::size_t i=0; i<std_A.size(); ++i)
479 for (std::size_t j=0; j<std_A[i].size(); ++j)
480 std_A[i][j] *= cpu_value_type(
double(alpha));
481 vcl_A *= double(alpha);
486 std::cout <<
"Multiplication with GPU scalar: ";
487 for (std::size_t i=0; i<std_A.size(); ++i)
488 for (std::size_t j=0; j<std_A[i].size(); ++j)
496 std::cout <<
"Division with CPU scalar: ";
497 for (std::size_t i=0; i<std_A.size(); ++i)
498 for (std::size_t j=0; j<std_A[i].size(); ++j)
499 std_A[i][j] /= cpu_value_type(
long(alpha));
500 vcl_A /= long(alpha);
505 for (std::size_t i=0; i<std_A.size(); ++i)
506 for (std::size_t j=0; j<std_A[i].size(); ++j)
507 std_A[i][j] /= cpu_value_type(
float(alpha));
508 vcl_A /= float(alpha);
513 for (std::size_t i=0; i<std_A.size(); ++i)
514 for (std::size_t j=0; j<std_A[i].size(); ++j)
515 std_A[i][j] /= cpu_value_type(
double(alpha));
516 vcl_A /= double(alpha);
521 std::cout <<
"Division with GPU scalar: ";
522 for (std::size_t i=0; i<std_A.size(); ++i)
523 for (std::size_t j=0; j<std_A[i].size(); ++j)
532 std::cout <<
"Testing elementwise multiplication..." << std::endl;
533 std_B = std::vector<std::vector<cpu_value_type> >(std_B.size(), std::vector<cpu_value_type>(std_B[0].size(), cpu_value_type(1.4142)));
534 for (std::size_t i=0; i<std_A.size(); ++i)
535 for (std::size_t j=0; j<std_A[i].size(); ++j)
536 std_A[i][j] = cpu_value_type(3.1415) * std_B[i][j];
540 for (std::size_t i=0; i<std_A.size(); ++i)
541 for (std::size_t j=0; j<std_A[i].size(); ++j)
542 std_A[i][j] = std_A[i][j] * std_B[i][j];
548 for (std::size_t i=0; i<std_A.size(); ++i)
549 for (std::size_t j=0; j<std_A[i].size(); ++j)
550 std_A[i][j] += std_A[i][j] * std_B[i][j];
556 for (std::size_t i=0; i<std_A.size(); ++i)
557 for (std::size_t j=0; j<std_A[i].size(); ++j)
558 std_A[i][j] -= std_A[i][j] * std_B[i][j];
565 for (std::size_t i=0; i<std_A.size(); ++i)
566 for (std::size_t j=0; j<std_A[i].size(); ++j)
567 std_A[i][j] = (std_A[i][j] + std_B[i][j]) * std_B[i][j];
573 for (std::size_t i=0; i<std_A.size(); ++i)
574 for (std::size_t j=0; j<std_A[i].size(); ++j)
575 std_A[i][j] += (std_A[i][j] + std_B[i][j]) * std_B[i][j];
581 for (std::size_t i=0; i<std_A.size(); ++i)
582 for (std::size_t j=0; j<std_A[i].size(); ++j)
583 std_A[i][j] -= (std_A[i][j] + std_B[i][j]) * std_B[i][j];
590 for (std::size_t i=0; i<std_A.size(); ++i)
591 for (std::size_t j=0; j<std_A[i].size(); ++j)
592 std_A[i][j] = std_A[i][j] * (std_B[i][j] + std_A[i][j]);
598 for (std::size_t i=0; i<std_A.size(); ++i)
599 for (std::size_t j=0; j<std_A[i].size(); ++j)
600 std_A[i][j] += std_A[i][j] * (std_B[i][j] + std_A[i][j]);
606 for (std::size_t i=0; i<std_A.size(); ++i)
607 for (std::size_t j=0; j<std_A[i].size(); ++j)
608 std_A[i][j] -= std_A[i][j] * (std_B[i][j] + std_A[i][j]);
615 for (std::size_t i=0; i<std_A.size(); ++i)
616 for (std::size_t j=0; j<std_A[i].size(); ++j)
617 std_A[i][j] = (std_A[i][j] + std_B[i][j]) * (std_B[i][j] + std_A[i][j]);
623 for (std::size_t i=0; i<std_A.size(); ++i)
624 for (std::size_t j=0; j<std_A[i].size(); ++j)
625 std_A[i][j] += (std_A[i][j] + std_B[i][j]) * (std_B[i][j] + std_A[i][j]);
631 for (std::size_t i=0; i<std_A.size(); ++i)
632 for (std::size_t j=0; j<std_A[i].size(); ++j)
633 std_A[i][j] -= (std_A[i][j] + std_B[i][j]) * (std_B[i][j] + std_A[i][j]);
640 std_B = std::vector<std::vector<cpu_value_type> >(std_B.size(), std::vector<cpu_value_type>(std_B[0].size(), cpu_value_type(1.4142)));
641 for (std::size_t i=0; i<std_A.size(); ++i)
642 for (std::size_t j=0; j<std_A[i].size(); ++j)
643 std_A[i][j] = cpu_value_type(3.1415) * std_B[i][j];
647 for (std::size_t i=0; i<std_A.size(); ++i)
648 for (std::size_t j=0; j<std_A[i].size(); ++j)
649 std_A[i][j] = std_A[i][j] / std_B[i][j];
655 for (std::size_t i=0; i<std_A.size(); ++i)
656 for (std::size_t j=0; j<std_A[i].size(); ++j)
657 std_A[i][j] += std_A[i][j] / std_B[i][j];
663 for (std::size_t i=0; i<std_A.size(); ++i)
664 for (std::size_t j=0; j<std_A[i].size(); ++j)
665 std_A[i][j] -= std_A[i][j] / std_B[i][j];
672 for (std::size_t i=0; i<std_A.size(); ++i)
673 for (std::size_t j=0; j<std_A[i].size(); ++j)
674 std_A[i][j] = (std_A[i][j] + std_B[i][j]) / std_B[i][j];
680 for (std::size_t i=0; i<std_A.size(); ++i)
681 for (std::size_t j=0; j<std_A[i].size(); ++j)
682 std_A[i][j] += (std_A[i][j] + std_B[i][j]) / std_B[i][j];
688 for (std::size_t i=0; i<std_A.size(); ++i)
689 for (std::size_t j=0; j<std_A[i].size(); ++j)
690 std_A[i][j] -= (std_A[i][j] + std_B[i][j]) / std_B[i][j];
697 for (std::size_t i=0; i<std_A.size(); ++i)
698 for (std::size_t j=0; j<std_A[i].size(); ++j)
699 std_A[i][j] = std_A[i][j] / (std_B[i][j] + std_A[i][j]);
705 for (std::size_t i=0; i<std_A.size(); ++i)
706 for (std::size_t j=0; j<std_A[i].size(); ++j)
707 std_A[i][j] += std_A[i][j] / (std_B[i][j] + std_A[i][j]);
713 for (std::size_t i=0; i<std_A.size(); ++i)
714 for (std::size_t j=0; j<std_A[i].size(); ++j)
715 std_A[i][j] -= std_A[i][j] / (std_B[i][j] + std_A[i][j]);
722 for (std::size_t i=0; i<std_A.size(); ++i)
723 for (std::size_t j=0; j<std_A[i].size(); ++j)
724 std_A[i][j] = (std_A[i][j] + std_B[i][j]) / (std_B[i][j] + std_A[i][j]);
730 for (std::size_t i=0; i<std_A.size(); ++i)
731 for (std::size_t j=0; j<std_A[i].size(); ++j)
732 std_A[i][j] += (std_A[i][j] + std_B[i][j]) / (std_B[i][j] + std_A[i][j]);
738 for (std::size_t i=0; i<std_A.size(); ++i)
739 for (std::size_t j=0; j<std_A[i].size(); ++j)
740 std_A[i][j] -= (std_A[i][j] + std_B[i][j]) / (std_B[i][j] + std_A[i][j]);
747 std::cout <<
"Testing unary element_pow()..." << std::endl;
749 std_B = std::vector<std::vector<cpu_value_type> >(std_B.size(), std::vector<cpu_value_type>(std_B[0].size(), cpu_value_type(1.4142)));
750 for (std::size_t i=0; i<std_A.size(); ++i)
751 for (std::size_t j=0; j<std_A[i].size(); ++j)
752 std_A[i][j] = cpu_value_type(3.1415) * std_B[i][j];
756 for (std::size_t i=0; i<std_C.size(); ++i)
757 for (std::size_t j=0; j<std_C[i].size(); ++j)
758 std_C[i][j] = std::pow(std_A[i][j], std_B[i][j]);
759 vcl_C = viennacl::linalg::element_pow(vcl_A, vcl_B);
764 for (std::size_t i=0; i<std_C.size(); ++i)
765 for (std::size_t j=0; j<std_C[i].size(); ++j)
766 std_C[i][j] += std::pow(std_A[i][j], std_B[i][j]);
767 vcl_C += viennacl::linalg::element_pow(vcl_A, vcl_B);
772 for (std::size_t i=0; i<std_C.size(); ++i)
773 for (std::size_t j=0; j<std_C[i].size(); ++j)
774 std_C[i][j] -= std::pow(std_A[i][j], std_B[i][j]);
775 vcl_C -= viennacl::linalg::element_pow(vcl_A, vcl_B);
781 for (std::size_t i=0; i<std_C.size(); ++i)
782 for (std::size_t j=0; j<std_C[i].size(); ++j)
783 std_C[i][j] = std::pow(std_A[i][j] + std_B[i][j], std_B[i][j]);
784 vcl_C = viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B);
789 for (std::size_t i=0; i<std_C.size(); ++i)
790 for (std::size_t j=0; j<std_C[i].size(); ++j)
791 std_C[i][j] += std::pow(std_A[i][j] + std_B[i][j], std_B[i][j]);
792 vcl_C += viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B);
797 for (std::size_t i=0; i<std_C.size(); ++i)
798 for (std::size_t j=0; j<std_C[i].size(); ++j)
799 std_C[i][j] -= std::pow(std_A[i][j] + std_B[i][j], std_B[i][j]);
800 vcl_C -= viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B);
806 for (std::size_t i=0; i<std_C.size(); ++i)
807 for (std::size_t j=0; j<std_C[i].size(); ++j)
808 std_C[i][j] = std::pow(std_A[i][j], std_B[i][j] + std_A[i][j]);
809 vcl_C = viennacl::linalg::element_pow(vcl_A, vcl_B + vcl_A);
814 for (std::size_t i=0; i<std_C.size(); ++i)
815 for (std::size_t j=0; j<std_C[i].size(); ++j)
816 std_C[i][j] += std::pow(std_A[i][j], std_B[i][j] + std_A[i][j]);
817 vcl_C += viennacl::linalg::element_pow(vcl_A, vcl_B + vcl_A);
822 for (std::size_t i=0; i<std_C.size(); ++i)
823 for (std::size_t j=0; j<std_C[i].size(); ++j)
824 std_C[i][j] -= std::pow(std_A[i][j], std_B[i][j] + std_A[i][j]);
825 vcl_C -= viennacl::linalg::element_pow(vcl_A, vcl_B + vcl_A);
831 for (std::size_t i=0; i<std_C.size(); ++i)
832 for (std::size_t j=0; j<std_C[i].size(); ++j)
833 std_C[i][j] = std::pow(std_A[i][j] + std_B[i][j], std_B[i][j] + std_A[i][j]);
834 vcl_C = viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B + vcl_A);
839 for (std::size_t i=0; i<std_C.size(); ++i)
840 for (std::size_t j=0; j<std_C[i].size(); ++j)
841 std_C[i][j] += std::pow(std_A[i][j] + std_B[i][j], std_B[i][j] + std_A[i][j]);
842 vcl_C += viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B + vcl_A);
847 for (std::size_t i=0; i<std_C.size(); ++i)
848 for (std::size_t j=0; j<std_C[i].size(); ++j)
849 std_C[i][j] -= std::pow(std_A[i][j] + std_B[i][j], std_B[i][j] + std_A[i][j]);
850 vcl_C -= viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B + vcl_A);
856 std::cout <<
"Testing unary elementwise operations..." << std::endl;
858 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \
859 std_B = std::vector<std::vector<cpu_value_type> >(std_B.size(), std::vector<cpu_value_type>(std_B[0].size(), cpu_value_type(1.4142))); \
860 for (std::size_t i=0; i<std_C.size(); ++i) \
861 for (std::size_t j=0; j<std_C[i].size(); ++j) \
862 std_A[i][j] = cpu_value_type(3.1415) * std_B[i][j]; \
863 for (std::size_t i=0; i<std_C.size(); ++i) \
864 for (std::size_t j=0; j<std_C[i].size(); ++j) \
865 std_C[i][j] = cpu_value_type(2.7172) * std_A[i][j]; \
866 viennacl::copy(std_A, vcl_A); \
867 viennacl::copy(std_B, vcl_B); \
868 viennacl::copy(std_C, vcl_C); \
869 viennacl::copy(std_B, vcl_B); \
871 for (std::size_t i=0; i<std_C.size(); ++i) \
872 for (std::size_t j=0; j<std_C[i].size(); ++j) \
873 std_C[i][j] = std::FUNCNAME(std_A[i][j]); \
874 vcl_C = viennacl::linalg::element_##FUNCNAME(vcl_A); \
876 if (!check_for_equality(std_C, vcl_C, epsilon)) \
878 std::cout << "Failure at C = " << #FUNCNAME << "(A)" << std::endl; \
879 return EXIT_FAILURE; \
882 for (std::size_t i=0; i<std_C.size(); ++i) \
883 for (std::size_t j=0; j<std_C[i].size(); ++j) \
884 std_C[i][j] = std::FUNCNAME(std_A[i][j] + std_B[i][j]); \
885 vcl_C = viennacl::linalg::element_##FUNCNAME(vcl_A + vcl_B); \
887 if (!check_for_equality(std_C, vcl_C, epsilon)) \
889 std::cout << "Failure at C = " << #FUNCNAME << "(A + B)" << std::endl; \
890 return EXIT_FAILURE; \
893 for (std::size_t i=0; i<std_C.size(); ++i) \
894 for (std::size_t j=0; j<std_C[i].size(); ++j) \
895 std_C[i][j] += std::FUNCNAME(std_A[i][j]); \
896 vcl_C += viennacl::linalg::element_##FUNCNAME(vcl_A); \
898 if (!check_for_equality(std_C, vcl_C, epsilon)) \
900 std::cout << "Failure at C += " << #FUNCNAME << "(A)" << std::endl; \
901 return EXIT_FAILURE; \
904 for (std::size_t i=0; i<std_C.size(); ++i) \
905 for (std::size_t j=0; j<std_C[i].size(); ++j) \
906 std_C[i][j] += std::FUNCNAME(std_A[i][j] + std_B[i][j]); \
907 vcl_C += viennacl::linalg::element_##FUNCNAME(vcl_A + vcl_B); \
909 if (!check_for_equality(std_C, vcl_C, epsilon)) \
911 std::cout << "Failure at C += " << #FUNCNAME << "(A + B)" << std::endl; \
912 return EXIT_FAILURE; \
915 for (std::size_t i=0; i<std_C.size(); ++i) \
916 for (std::size_t j=0; j<std_C[i].size(); ++j) \
917 std_C[i][j] -= std::FUNCNAME(std_A[i][j]); \
918 vcl_C -= viennacl::linalg::element_##FUNCNAME(vcl_A); \
920 if (!check_for_equality(std_C, vcl_C, epsilon)) \
922 std::cout << "Failure at C -= " << #FUNCNAME << "(A)" << std::endl; \
923 return EXIT_FAILURE; \
926 for (std::size_t i=0; i<std_C.size(); ++i) \
927 for (std::size_t j=0; j<std_C[i].size(); ++j) \
928 std_C[i][j] -= std::FUNCNAME(std_A[i][j] + std_B[i][j]); \
929 vcl_C -= viennacl::linalg::element_##FUNCNAME(vcl_A + vcl_B); \
931 if (!check_for_equality(std_C, vcl_C, epsilon)) \
933 std::cout << "Failure at C -= " << #FUNCNAME << "(A + B)" << std::endl; \
934 return EXIT_FAILURE; \
952 std::cout <<
"Complicated expressions: ";
956 for (std::size_t i=0; i<std_C.size(); ++i)
957 for (std::size_t j=0; j<std_C[i].size(); ++j)
958 std_B[i][j] += alpha * (- std_A[i][j] - beta * std_C[i][j] + std_A[i][j]);
959 vcl_B += gpu_alpha * (- vcl_A - vcl_C * beta + vcl_A);
964 for (std::size_t i=0; i<std_C.size(); ++i)
965 for (std::size_t j=0; j<std_C[i].size(); ++j)
966 std_B[i][j] += (- std_A[i][j] - beta * std_C[i][j] + std_A[i][j] * beta) / gpu_alpha;
967 vcl_B += (- vcl_A - vcl_C * beta + gpu_beta * vcl_A) / gpu_alpha;
973 for (std::size_t i=0; i<std_C.size(); ++i)
974 for (std::size_t j=0; j<std_C[i].size(); ++j)
975 std_B[i][j] -= alpha * (- std_A[i][j] - beta * std_C[i][j] - std_A[i][j]);
976 vcl_B -= gpu_alpha * (- vcl_A - vcl_C * beta - vcl_A);
981 for (std::size_t i=0; i<std_C.size(); ++i)
982 for (std::size_t j=0; j<std_C[i].size(); ++j)
983 std_B[i][j] -= (- std_A[i][j] - beta * std_C[i][j] - std_A[i][j] * beta) / alpha;
984 vcl_B -= (- vcl_A - vcl_C * beta - gpu_beta * vcl_A) / gpu_alpha;
989 std::cout << std::endl;
990 std::cout <<
"----------------------------------------------" << std::endl;
991 std::cout << std::endl;
1000 template<
typename T,
typename ScalarType>
1006 std::size_t dim_rows = 131;
1007 std::size_t dim_cols = 33;
1012 std::vector<std::vector<ScalarType> > std_A(dim_rows, std::vector<ScalarType>(dim_cols));
1013 std::vector<std::vector<ScalarType> > std_B(dim_rows, std::vector<ScalarType>(dim_cols));
1014 std::vector<std::vector<ScalarType> > std_C(dim_rows, std::vector<ScalarType>(dim_cols));
1016 for (std::size_t i=0; i<std_A.size(); ++i)
1017 for (std::size_t j=0; j<std_A[i].size(); ++j)
1019 std_A[i][j] =
ScalarType((i+2) + (j+1)*(i+2));
1020 std_B[i][j] =
ScalarType((j+2) + (j+1)*(j+2));
1021 std_C[i][j] =
ScalarType((i+1) + (i+1)*(i+2));
1024 std::vector<std::vector<ScalarType> > std_A_large(4 * dim_rows, std::vector<ScalarType>(4 * dim_cols));
1025 for (std::size_t i=0; i<std_A_large.size(); ++i)
1026 for (std::size_t j=0; j<std_A_large[i].size(); ++j)
1030 VCLMatrixType vcl_A_full(4 * dim_rows, 4 * dim_cols);
1031 VCLMatrixType vcl_B_full(4 * dim_rows, 4 * dim_cols);
1032 VCLMatrixType vcl_C_full(4 * dim_rows, 4 * dim_cols);
1041 VCLMatrixType vcl_A(dim_rows, dim_cols);
1055 VCLMatrixType vcl_B(dim_rows, dim_cols);
1069 VCLMatrixType vcl_C(dim_rows, dim_cols);
1092 std::cout << std::endl;
1093 std::cout <<
"//" << std::endl;
1094 std::cout <<
"////////// Test: Copy CTOR //////////" << std::endl;
1095 std::cout <<
"//" << std::endl;
1098 std::cout <<
"Testing matrix created from range... ";
1099 VCLMatrixType vcl_temp = vcl_range_A;
1101 std::cout <<
"PASSED!" << std::endl;
1104 std::cout <<
"vcl_temp: " << vcl_temp << std::endl;
1105 std::cout <<
"vcl_range_A: " << vcl_range_A << std::endl;
1106 std::cout <<
"vcl_A: " << vcl_A << std::endl;
1107 std::cout << std::endl <<
"TEST failed!" << std::endl;
1108 return EXIT_FAILURE;
1111 std::cout <<
"Testing matrix created from slice... ";
1112 VCLMatrixType vcl_temp2 = vcl_range_B;
1114 std::cout <<
"PASSED!" << std::endl;
1117 std::cout << std::endl <<
"TEST failed!" << std::endl;
1118 return EXIT_FAILURE;
1122 std::cout <<
"//" << std::endl;
1123 std::cout <<
"////////// Test: Initializer for matrix type //////////" << std::endl;
1124 std::cout <<
"//" << std::endl;
1127 std::vector<std::vector<ScalarType> > std_dummy1(std_A.size(), std::vector<ScalarType>(std_A.size()));
1128 for (std::size_t i=0; i<std_A.size(); ++i) std_dummy1[i][i] =
ScalarType(1);
1129 std::vector<std::vector<ScalarType> > std_dummy2(std_A.size(), std::vector<ScalarType>(std_A.size(),
ScalarType(3)));
1130 std::vector<std::vector<ScalarType> > std_dummy3(std_A.size(), std::vector<ScalarType>(std_A.size()));
1136 std::cout <<
"Testing initializer CTOR... ";
1141 std::cout <<
"PASSED!" << std::endl;
1144 std::cout << std::endl <<
"TEST failed!" << std::endl;
1145 return EXIT_FAILURE;
1148 std_dummy1 = std::vector<std::vector<ScalarType> >(std_A.size(), std::vector<ScalarType>(std_A.size()));
1149 std_dummy2 = std::vector<std::vector<ScalarType> >(std_A.size(), std::vector<ScalarType>(std_A.size()));
1150 for (std::size_t i=0; i<std_A.size(); ++i) std_dummy2[i][i] =
ScalarType(1);
1151 std_dummy3 = std::vector<std::vector<ScalarType> >(std_A.size(), std::vector<ScalarType>(std_A.size(), 3));
1157 std::cout <<
"Testing initializer assignment... ";
1162 std::cout <<
"PASSED!" << std::endl;
1165 std::cout << std::endl <<
"TEST failed!" << std::endl;
1166 return EXIT_FAILURE;
1170 std::cout <<
"//" << std::endl;
1171 std::cout <<
"////////// Test: Norms //////////" << std::endl;
1172 std::cout <<
"//" << std::endl;
1191 for (std::size_t i=0; i<std_C.size(); ++i)
1192 for (std::size_t j=0; j<std_C[i].size(); ++j)
1193 std_norm_frobenius += std_C[i][j] * std_C[i][j];
1194 std_norm_frobenius = std::sqrt(std_norm_frobenius);
1196 if ( std::fabs(std_norm_frobenius - vcl_norm_frobenius) / std_norm_frobenius > epsilon)
1198 std::cerr <<
"Failure at norm_frobenius()" << std::endl;
1199 return EXIT_FAILURE;
1204 if ( std::fabs(device_std_norm_frobenius - device_vcl_norm_frobenius) / device_std_norm_frobenius > epsilon)
1206 std::cerr <<
"Failure at norm_frobenius()" << std::endl;
1207 return EXIT_FAILURE;
1211 if ( std::fabs(std_norm_frobenius - vcl_norm_frobenius) / std_norm_frobenius > epsilon)
1213 std::cerr <<
"Failure at norm_frobenius() with range" << std::endl;
1214 return EXIT_FAILURE;
1218 if ( std::fabs(device_std_norm_frobenius - device_vcl_norm_frobenius) / device_std_norm_frobenius > epsilon)
1220 std::cerr <<
"Failure at norm_frobenius() with range" << std::endl;
1221 return EXIT_FAILURE;
1225 if ( std::fabs(std_norm_frobenius - vcl_norm_frobenius) / std_norm_frobenius > epsilon)
1227 std::cerr <<
"Failure at norm_frobenius() with slice" << std::endl;
1228 return EXIT_FAILURE;
1232 if ( std::fabs(device_std_norm_frobenius - device_vcl_norm_frobenius) / device_std_norm_frobenius > epsilon)
1234 std::cerr <<
"Failure at norm_frobenius() with slice" << std::endl;
1235 return EXIT_FAILURE;
1238 std::cout <<
"PASSED!" << std::endl;
1245 std::cout <<
"//" << std::endl;
1246 std::cout <<
"////////// Test: Operations //////////" << std::endl;
1247 std::cout <<
"//" << std::endl;
1251 std::cout <<
"Testing A=matrix, B=matrix, C=matrix ..." << std::endl;
1256 std_A, std_B, std_C,
1257 vcl_A, vcl_B, vcl_C) != EXIT_SUCCESS)
1259 return EXIT_FAILURE;
1262 std::cout <<
"Testing A=matrix, B=matrix, C=range ..." << std::endl;
1267 std_A, std_B, std_C,
1268 vcl_A, vcl_B, vcl_range_C) != EXIT_SUCCESS)
1270 return EXIT_FAILURE;
1273 std::cout <<
"Testing A=matrix, B=matrix, C=slice ..." << std::endl;
1278 std_A, std_B, std_C,
1279 vcl_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS)
1281 return EXIT_FAILURE;
1284 std::cout <<
"Testing A=matrix, B=range, C=matrix ..." << std::endl;
1289 std_A, std_B, std_C,
1290 vcl_A, vcl_range_B, vcl_C) != EXIT_SUCCESS)
1292 return EXIT_FAILURE;
1295 std::cout <<
"Testing A=matrix, B=range, C=range ..." << std::endl;
1300 std_A, std_B, std_C,
1301 vcl_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS)
1303 return EXIT_FAILURE;
1306 std::cout <<
"Testing A=matrix, B=range, C=slice ..." << std::endl;
1311 std_A, std_B, std_C,
1312 vcl_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS)
1314 return EXIT_FAILURE;
1318 std::cout <<
"Testing A=matrix, B=slice, C=matrix ..." << std::endl;
1323 std_A, std_B, std_C,
1324 vcl_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS)
1326 return EXIT_FAILURE;
1329 std::cout <<
"Testing A=matrix, B=slice, C=range ..." << std::endl;
1334 std_A, std_B, std_C,
1335 vcl_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS)
1337 return EXIT_FAILURE;
1340 std::cout <<
"Testing A=matrix, B=slice, C=slice ..." << std::endl;
1345 std_A, std_B, std_C,
1346 vcl_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS)
1348 return EXIT_FAILURE;
1354 std::cout <<
"Testing A=range, B=matrix, C=matrix ..." << std::endl;
1359 std_A, std_B, std_C,
1360 vcl_range_A, vcl_B, vcl_C) != EXIT_SUCCESS)
1362 return EXIT_FAILURE;
1365 std::cout <<
"Testing A=range, B=matrix, C=range ..." << std::endl;
1370 std_A, std_B, std_C,
1371 vcl_range_A, vcl_B, vcl_range_C) != EXIT_SUCCESS)
1373 return EXIT_FAILURE;
1376 std::cout <<
"Testing A=range, B=matrix, C=slice ..." << std::endl;
1381 std_A, std_B, std_C,
1382 vcl_range_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS)
1384 return EXIT_FAILURE;
1389 std::cout <<
"Testing A=range, B=range, C=matrix ..." << std::endl;
1394 std_A, std_B, std_C,
1395 vcl_range_A, vcl_range_B, vcl_C) != EXIT_SUCCESS)
1397 return EXIT_FAILURE;
1400 std::cout <<
"Testing A=range, B=range, C=range ..." << std::endl;
1405 std_A, std_B, std_C,
1406 vcl_range_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS)
1408 return EXIT_FAILURE;
1411 std::cout <<
"Testing A=range, B=range, C=slice ..." << std::endl;
1416 std_A, std_B, std_C,
1417 vcl_range_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS)
1419 return EXIT_FAILURE;
1424 std::cout <<
"Testing A=range, B=slice, C=matrix ..." << std::endl;
1429 std_A, std_B, std_C,
1430 vcl_range_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS)
1432 return EXIT_FAILURE;
1435 std::cout <<
"Testing A=range, B=slice, C=range ..." << std::endl;
1440 std_A, std_B, std_C,
1441 vcl_range_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS)
1443 return EXIT_FAILURE;
1446 std::cout <<
"Testing A=range, B=slice, C=slice ..." << std::endl;
1451 std_A, std_B, std_C,
1452 vcl_range_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS)
1454 return EXIT_FAILURE;
1459 std::cout <<
"Testing A=slice, B=matrix, C=matrix ..." << std::endl;
1464 std_A, std_B, std_C,
1465 vcl_slice_A, vcl_B, vcl_C) != EXIT_SUCCESS)
1467 return EXIT_FAILURE;
1470 std::cout <<
"Testing A=slice, B=matrix, C=range ..." << std::endl;
1475 std_A, std_B, std_C,
1476 vcl_slice_A, vcl_B, vcl_range_C) != EXIT_SUCCESS)
1478 return EXIT_FAILURE;
1481 std::cout <<
"Testing A=slice, B=matrix, C=slice ..." << std::endl;
1486 std_A, std_B, std_C,
1487 vcl_slice_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS)
1489 return EXIT_FAILURE;
1494 std::cout <<
"Testing A=slice, B=range, C=matrix ..." << std::endl;
1499 std_A, std_B, std_C,
1500 vcl_slice_A, vcl_range_B, vcl_C) != EXIT_SUCCESS)
1502 return EXIT_FAILURE;
1505 std::cout <<
"Testing A=slice, B=range, C=range ..." << std::endl;
1510 std_A, std_B, std_C,
1511 vcl_slice_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS)
1513 return EXIT_FAILURE;
1516 std::cout <<
"Testing A=slice, B=range, C=slice ..." << std::endl;
1521 std_A, std_B, std_C,
1522 vcl_slice_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS)
1524 return EXIT_FAILURE;
1529 std::cout <<
"Testing A=slice, B=slice, C=matrix ..." << std::endl;
1534 std_A, std_B, std_C,
1535 vcl_slice_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS)
1537 return EXIT_FAILURE;
1540 std::cout <<
"Testing A=slice, B=slice, C=range ..." << std::endl;
1545 std_A, std_B, std_C,
1546 vcl_slice_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS)
1548 return EXIT_FAILURE;
1551 std::cout <<
"Testing A=slice, B=slice, C=slice ..." << std::endl;
1556 std_A, std_B, std_C,
1557 vcl_slice_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS)
1559 return EXIT_FAILURE;
1563 return EXIT_SUCCESS;
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix.
Class for representing strided submatrices of a bigger matrix A.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
T max(const T &lhs, const T &rhs)
Maximum.
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Generic interface for the Frobenius norm.
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
Represents a vector consisting of zeros only. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Proxy classes for vectors.
Proxy classes for matrices.
bool check_for_equality(std::vector< std::vector< NumericT > > const &std_A, VCLMatrixType const &vcl_A, double epsilon)
scalar_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_norm_frobenius > norm_frobenius(const matrix_base< NumericT > &A)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
Class for representing non-strided submatrices of a bigger matrix A.
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Implementation of the ViennaCL scalar class.
int run_test(double epsilon, STLMatrixType &std_A, STLMatrixType &std_B, STLMatrixType &std_C, ViennaCLMatrixType1 &vcl_A, ViennaCLMatrixType2 &vcl_B, ViennaCLMatrixType3 vcl_C)
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...