45 template<
typename ScalarType>
54 template<
typename ScalarType>
63 template<
typename ScalarType>
72 template<
typename ScalarType,
typename VCLVectorType>
75 std::vector<ScalarType> v2_cpu(v2.size());
79 for (
unsigned int i=0;i<v1.size(); ++i)
81 if (v2_cpu[i] != v1[i])
88 template<
typename T1,
typename T2>
89 int check(T1
const & t1, T2
const & t2)
91 int retval = EXIT_SUCCESS;
93 if (
diff(t1, t2) != 0)
95 std::cout <<
"# Error! Difference: " << std::abs(
diff(t1, t2)) << std::endl;
96 retval = EXIT_FAILURE;
105 template<
typename NumericT,
typename STLVectorType,
typename ViennaCLVectorType1,
typename ViennaCLVectorType2 >
106 int test(STLVectorType & std_v1, STLVectorType & std_v2,
107 ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
109 int retval = EXIT_SUCCESS;
117 std::cout <<
"Checking for zero_vector initializer..." << std::endl;
118 for (std::size_t i=0; i<std_v1.size(); ++i)
121 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
124 std::cout <<
"Checking for scalar_vector initializer..." << std::endl;
125 for (std::size_t i=0; i<std_v1.size(); ++i)
126 std_v1[i] = cpu_result;
128 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
131 for (std::size_t i=0; i<std_v1.size(); ++i)
132 std_v1[i] = cpu_result + 1;
134 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
137 std::cout <<
"Checking for unit_vector initializer..." << std::endl;
138 for (std::size_t i=0; i<std_v1.size(); ++i)
139 std_v1[i] = (i == 5) ? 1 : 0;
141 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
144 for (std::size_t i=0; i<std_v1.size(); ++i)
153 std::cout <<
"Checking for successful copy..." << std::endl;
154 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
156 if (
check(std_v2, vcl_v2) != EXIT_SUCCESS)
164 std::cout <<
"Testing inner_prod..." << std::endl;
166 for (std::size_t i=0; i<std_v1.size(); ++i)
167 cpu_result += std_v1[i] * std_v2[i];
171 if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
173 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
177 for (std::size_t i=0; i<std_v1.size(); ++i)
178 cpu_result += (std_v1[i] + std_v2[i]) * (std_v2[i] - std_v1[i]);
182 if (
check(cpu_result, cpu_result3) != EXIT_SUCCESS)
184 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
189 std::cout <<
"Testing norm_1..." << std::endl;
191 for (std::size_t i=0; i<std_v1.size(); ++i)
192 cpu_result += std::abs(std_v1[i]);
195 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
199 for (std::size_t i=0; i<std_v1.size(); ++i)
200 cpu_result2 += std::abs(std_v1[i]);
203 if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
207 for (std::size_t i=0; i<std_v1.size(); ++i)
208 cpu_result2 += std::abs(std_v1[i] + std_v2[i]);
211 if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
215 std::cout <<
"Testing norm_inf..." << std::endl;
217 for (std::size_t i=0; i<std_v1.size(); ++i)
218 if (std::abs(std_v1[i]) > cpu_result)
219 cpu_result = std::abs(std_v1[i]);
222 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
225 cpu_result2 = cpu_result;
229 if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
233 for (std::size_t i=0; i<std_v1.size(); ++i)
234 if (std_v1[i] + std_v2[i] > cpu_result2)
235 cpu_result2 = std::abs(std_v1[i] + std_v2[i]);
238 if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
242 std::cout <<
"Testing index_norm_inf..." << std::endl;
244 std::size_t cpu_index = 0;
246 for (std::size_t i=0; i<std_v1.size(); ++i)
247 if (std::abs(std_v1[i]) > cpu_result)
249 cpu_result = std::abs(std_v1[i]);
254 if (
check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index)) != EXIT_SUCCESS)
259 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
264 for (std::size_t i=0; i<std_v1.size(); ++i)
265 if (std::abs(std_v1[i] + std_v2[i]) > cpu_result)
267 cpu_result = std::abs(std_v1[i] + std_v2[i]);
270 cpu_result = std_v1[cpu_index];
273 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
277 std::cout <<
"Testing max..." << std::endl;
278 cpu_result = std_v1[0];
279 for (std::size_t i=0; i<std_v1.size(); ++i)
280 cpu_result = std::max<NumericT>(cpu_result, std_v1[i]);
283 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
286 cpu_result = std_v1[0];
287 for (std::size_t i=0; i<std_v1.size(); ++i)
288 cpu_result = std::max<NumericT>(cpu_result, std_v1[i]);
289 gpu_result = cpu_result;
293 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
296 cpu_result = std_v1[0] + std_v2[0];
297 for (std::size_t i=0; i<std_v1.size(); ++i)
298 cpu_result = std::max<NumericT>(cpu_result, std_v1[i] + std_v2[i]);
299 gpu_result = cpu_result;
303 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
308 std::cout <<
"Testing min..." << std::endl;
309 cpu_result = std_v1[0];
310 for (std::size_t i=0; i<std_v1.size(); ++i)
311 cpu_result = std::min<NumericT>(cpu_result, std_v1[i]);
314 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
317 cpu_result = std_v1[0];
318 for (std::size_t i=0; i<std_v1.size(); ++i)
319 cpu_result = std::min<NumericT>(cpu_result, std_v1[i]);
320 gpu_result = cpu_result;
324 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
327 cpu_result = std_v1[0] + std_v2[0];
328 for (std::size_t i=0; i<std_v1.size(); ++i)
329 cpu_result = std::min<NumericT>(cpu_result, std_v1[i] + std_v2[i]);
330 gpu_result = cpu_result;
334 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
338 std::cout <<
"Testing sum..." << std::endl;
340 for (std::size_t i=0; i<std_v1.size(); ++i)
341 cpu_result += std_v1[i];
345 if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
347 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
351 for (std::size_t i=0; i<std_v1.size(); ++i)
352 cpu_result += std_v1[i] + std_v2[i];
356 if (
check(cpu_result, cpu_result3) != EXIT_SUCCESS)
358 if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
368 std::vector<NumericT> x = std_v1;
369 std::vector<NumericT> y = std_v2;
370 for (std::size_t i=0; i<std_v1.size(); ++i)
380 if (
check(x, vcl_v1) != EXIT_SUCCESS)
382 if (
check(y, vcl_v2) != EXIT_SUCCESS)
387 std::cout <<
"Testing assignments..." << std::endl;
389 for (
size_t i=0; i < std_v1.size(); ++i)
392 for (
size_t i=0; i < vcl_v1.size(); ++i)
395 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
402 std::cout <<
"Testing scaling with CPU scalar..." << std::endl;
406 for (std::size_t i=0; i<std_v1.size(); ++i)
410 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
413 std::cout <<
"Testing scaling with GPU scalar..." << std::endl;
414 for (std::size_t i=0; i<std_v1.size(); ++i)
418 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
424 std::cout <<
"Testing shrinking with CPU scalar..." << std::endl;
425 for (std::size_t i=0; i<std_v1.size(); ++i)
429 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
432 std::cout <<
"Testing shrinking with GPU scalar..." << std::endl;
433 for (std::size_t i=0; i<std_v1.size(); ++i)
437 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
444 for (
size_t i=0; i < std_v1.size(); ++i)
446 for (std::size_t i=0; i<std_v1.size(); ++i)
447 std_v2[i] = 3 * std_v1[i];
451 std::cout <<
"Testing add on vector..." << std::endl;
453 std::cout <<
"Checking for successful copy..." << std::endl;
454 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
456 if (
check(std_v2, vcl_v2) != EXIT_SUCCESS)
459 for (std::size_t i=0; i<std_v1.size(); ++i)
460 std_v1[i] = std_v1[i] + std_v2[i];
461 vcl_v1 = vcl_v1 + vcl_v2;
463 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
466 std::cout <<
"Testing add on vector with flipsign..." << std::endl;
467 for (std::size_t i=0; i<std_v1.size(); ++i)
468 std_v1[i] = - std_v1[i] + std_v2[i];
469 vcl_v1 = - vcl_v1 + vcl_v2;
471 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
474 std::cout <<
"Testing inplace-add on vector..." << std::endl;
475 for (std::size_t i=0; i<std_v1.size(); ++i)
476 std_v1[i] += std_v2[i];
479 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
485 std::cout <<
"Testing sub on vector..." << std::endl;
486 for (std::size_t i=0; i<std_v1.size(); ++i)
487 std_v2[i] = 3 * std_v1[i];
491 for (std::size_t i=0; i<std_v1.size(); ++i)
492 std_v1[i] = std_v1[i] - std_v2[i];
493 vcl_v1 = vcl_v1 - vcl_v2;
495 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
498 std::cout <<
"Testing inplace-sub on vector..." << std::endl;
499 for (std::size_t i=0; i<std_v1.size(); ++i)
500 std_v1[i] -= std_v2[i];
503 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
511 std::cout <<
"Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
512 for (
size_t i=0; i < std_v1.size(); ++i)
514 for (std::size_t i=0; i<std_v1.size(); ++i)
515 std_v2[i] = 3 * std_v1[i];
519 for (std::size_t i=0; i<std_v1.size(); ++i)
520 std_v1[i] = std_v1[i] + alpha * std_v2[i];
521 vcl_v1 = vcl_v1 + alpha * vcl_v2;
523 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
526 std::cout <<
"Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
527 for (std::size_t i=0; i<std_v1.size(); ++i)
528 std_v2[i] = 3 * std_v1[i];
532 for (std::size_t i=0; i<std_v1.size(); ++i)
533 std_v1[i] = alpha * std_v1[i] + std_v2[i];
534 vcl_v1 = alpha * vcl_v1 + vcl_v2;
536 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
539 std::cout <<
"Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
540 for (std::size_t i=0; i<std_v1.size(); ++i)
541 std_v2[i] = 3 * std_v1[i];
545 for (std::size_t i=0; i<std_v1.size(); ++i)
546 std_v1[i] = alpha * std_v1[i] + beta * std_v2[i];
547 vcl_v1 = alpha * vcl_v1 + beta * vcl_v2;
549 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
553 std::cout <<
"Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
554 for (std::size_t i=0; i<std_v1.size(); ++i)
555 std_v2[i] = 3 * std_v1[i];
559 for (std::size_t i=0; i<std_v1.size(); ++i)
560 std_v1[i] += alpha * std_v2[i];
561 vcl_v1 += alpha * vcl_v2;
563 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
567 std::cout <<
"Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
568 for (std::size_t i=0; i<std_v1.size(); ++i)
569 std_v2[i] = 3 * std_v1[i];
573 for (std::size_t i=0; i<std_v1.size(); ++i)
574 std_v1[i] = std_v1[i] + alpha * std_v2[i];
575 vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
577 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
580 std::cout <<
"Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
581 for (std::size_t i=0; i<std_v1.size(); ++i)
582 std_v2[i] = 3 * std_v1[i];
586 for (std::size_t i=0; i<std_v1.size(); ++i)
587 std_v1[i] = std_v1[i] + alpha * std_v2[i];
588 vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
590 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
593 std::cout <<
"Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
594 for (std::size_t i=0; i<std_v1.size(); ++i)
595 std_v2[i] = 3 * std_v1[i];
599 for (std::size_t i=0; i<std_v1.size(); ++i)
600 std_v1[i] = alpha * std_v1[i] + beta * std_v2[i];
601 vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
603 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
607 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
608 for (std::size_t i=0; i<std_v1.size(); ++i)
609 std_v2[i] = 3 * std_v1[i];
613 for (std::size_t i=0; i<std_v1.size(); ++i)
614 std_v1[i] += alpha * std_v1[i] + beta * std_v2[i];
615 vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
617 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
620 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
621 for (std::size_t i=0; i<std_v1.size(); ++i)
622 std_v2[i] = 3 * std_v1[i];
626 for (std::size_t i=0; i<std_v1.size(); ++i)
627 std_v1[i] += alpha * std_v1[i] - beta * std_v2[i];
628 vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
630 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
635 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
636 for (std::size_t i=0; i<std_v1.size(); ++i)
637 std_v2[i] = 3 * std_v1[i];
641 for (std::size_t i=0; i<std_v1.size(); ++i)
642 std_v1[i] += alpha * std_v2[i];
643 vcl_v1 += gpu_alpha * vcl_v2;
645 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
652 std::cout <<
"Testing division-add on vector with CPU scalar (right)..." << std::endl;
653 for (
size_t i=0; i < std_v1.size(); ++i)
655 for (std::size_t i=0; i<std_v1.size(); ++i)
656 std_v2[i] = 3 * std_v1[i];
660 for (std::size_t i=0; i<std_v1.size(); ++i)
661 std_v1[i] = std_v1[i] + std_v2[i] / alpha;
662 vcl_v1 = vcl_v1 + vcl_v2 / alpha;
664 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
668 std::cout <<
"Testing division-add on vector with CPU scalar (left)..." << std::endl;
669 for (std::size_t i=0; i<std_v1.size(); ++i)
670 std_v2[i] = 3 * std_v1[i];
674 for (std::size_t i=0; i<std_v1.size(); ++i)
675 std_v1[i] = std_v1[i] / alpha + std_v2[i];
676 vcl_v1 = vcl_v1 / alpha + vcl_v2;
678 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
681 std::cout <<
"Testing division-add on vector with CPU scalar (both)..." << std::endl;
682 for (std::size_t i=0; i<std_v1.size(); ++i)
683 std_v2[i] = 3 * std_v1[i];
687 for (std::size_t i=0; i<std_v1.size(); ++i)
688 std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta;
689 vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta;
691 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
694 std::cout <<
"Testing division-multiply-add on vector with CPU scalar..." << std::endl;
695 for (std::size_t i=0; i<std_v1.size(); ++i)
696 std_v2[i] = 3 * std_v1[i];
700 for (std::size_t i=0; i<std_v1.size(); ++i)
701 std_v1[i] = std_v1[i] / alpha + std_v2[i] * beta;
702 vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;
704 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
708 std::cout <<
"Testing multiply-division-add on vector with CPU scalar..." << std::endl;
709 for (std::size_t i=0; i<std_v1.size(); ++i)
710 std_v2[i] = 3 * std_v1[i];
714 for (std::size_t i=0; i<std_v1.size(); ++i)
715 std_v1[i] = std_v1[i] * alpha + std_v2[i] / beta;
716 vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;
718 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
723 std::cout <<
"Testing inplace division-add on vector with CPU scalar..." << std::endl;
724 for (std::size_t i=0; i<std_v1.size(); ++i)
725 std_v2[i] = 3 * std_v1[i];
729 for (std::size_t i=0; i<std_v1.size(); ++i)
730 std_v1[i] += std_v2[i] / alpha;
731 vcl_v1 += vcl_v2 / alpha;
733 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
737 std::cout <<
"Testing division-add on vector with GPU scalar (right)..." << std::endl;
738 for (std::size_t i=0; i<std_v1.size(); ++i)
739 std_v2[i] = 3 * std_v1[i];
743 for (std::size_t i=0; i<std_v1.size(); ++i)
744 std_v1[i] = std_v1[i] + std_v2[i] / alpha;
745 vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
747 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
750 std::cout <<
"Testing division-add on vector with GPU scalar (left)..." << std::endl;
751 for (std::size_t i=0; i<std_v1.size(); ++i)
752 std_v2[i] = 3 * std_v1[i];
756 for (std::size_t i=0; i<std_v1.size(); ++i)
757 std_v1[i] = std_v1[i] + std_v2[i] / alpha;
758 vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
760 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
763 std::cout <<
"Testing division-add on vector with GPU scalar (both)..." << std::endl;
764 for (std::size_t i=0; i<std_v1.size(); ++i)
765 std_v2[i] = 3 * std_v1[i];
769 for (std::size_t i=0; i<std_v1.size(); ++i)
770 std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta;
771 vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
773 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
777 std::cout <<
"Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
778 for (std::size_t i=0; i<std_v1.size(); ++i)
779 std_v2[i] = 3 * std_v1[i];
783 for (std::size_t i=0; i<std_v1.size(); ++i)
784 std_v1[i] += std_v1[i] / alpha + std_v2[i] / beta;
785 vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
787 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
790 std::cout <<
"Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl;
791 for (std::size_t i=0; i<std_v1.size(); ++i)
792 std_v2[i] = 3 * std_v1[i];
796 for (std::size_t i=0; i<std_v1.size(); ++i)
797 std_v1[i] += std_v1[i] / alpha - std_v2[i] / beta;
798 vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
800 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
803 std::cout <<
"Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
804 for (std::size_t i=0; i<std_v1.size(); ++i)
805 std_v2[i] = 3 * std_v1[i];
809 for (std::size_t i=0; i<std_v1.size(); ++i)
810 std_v1[i] += std_v1[i] / alpha + std_v2[i] * beta;
811 vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
813 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
816 std::cout <<
"Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl;
817 for (std::size_t i=0; i<std_v1.size(); ++i)
818 std_v2[i] = 3 * std_v1[i];
822 for (std::size_t i=0; i<std_v1.size(); ++i)
823 std_v1[i] += std_v1[i] * alpha - std_v2[i] / beta;
824 vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
826 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
831 std::cout <<
"Testing inplace division-add on vector with GPU scalar..." << std::endl;
832 for (std::size_t i=0; i<std_v1.size(); ++i)
833 std_v2[i] = 3 * std_v1[i];
837 for (std::size_t i=0; i<std_v1.size(); ++i)
838 std_v1[i] += std_v2[i] * alpha;
839 vcl_v1 += vcl_v2 * gpu_alpha;
841 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
847 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
848 for (
size_t i=0; i < std_v1.size(); ++i)
850 for (std::size_t i=0; i<std_v1.size(); ++i)
851 std_v2[i] = 3 * std_v1[i];
855 for (std::size_t i=0; i<std_v1.size(); ++i)
856 std_v1[i] = std_v1[i] - alpha * std_v2[i];
857 vcl_v1 = vcl_v1 - alpha * vcl_v2;
859 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
863 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
864 for (std::size_t i=0; i<std_v1.size(); ++i)
865 std_v2[i] = 3 * std_v1[i];
869 for (std::size_t i=0; i<std_v1.size(); ++i)
870 std_v1[i] = alpha * std_v1[i] - std_v2[i];
871 vcl_v1 = alpha * vcl_v1 - vcl_v2;
873 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
876 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
877 for (std::size_t i=0; i<std_v1.size(); ++i)
878 std_v2[i] = 3 * std_v1[i];
882 for (std::size_t i=0; i<std_v1.size(); ++i)
883 std_v1[i] = alpha * std_v1[i] - beta * std_v2[i];
884 vcl_v1 = alpha * vcl_v1 - beta * vcl_v2;
886 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
890 std::cout <<
"Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
891 for (std::size_t i=0; i<std_v1.size(); ++i)
892 std_v2[i] = 3 * std_v1[i];
896 for (std::size_t i=0; i<std_v1.size(); ++i)
897 std_v1[i] -= alpha * std_v2[i];
898 vcl_v1 -= alpha * vcl_v2;
900 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
904 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
905 for (std::size_t i=0; i<std_v1.size(); ++i)
906 std_v2[i] = 3 * std_v1[i];
910 for (std::size_t i=0; i<std_v1.size(); ++i)
911 std_v1[i] = std_v1[i] - alpha * std_v2[i];
912 vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
914 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
917 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
918 for (std::size_t i=0; i<std_v1.size(); ++i)
919 std_v2[i] = 3 * std_v1[i];
923 for (std::size_t i=0; i<std_v1.size(); ++i)
924 std_v1[i] = std_v1[i] - alpha * std_v2[i];
925 vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
927 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
930 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
931 for (std::size_t i=0; i<std_v1.size(); ++i)
932 std_v2[i] = 3 * std_v1[i];
936 for (std::size_t i=0; i<std_v1.size(); ++i)
937 std_v1[i] = alpha * std_v1[i] - beta * std_v2[i];
938 vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
940 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
943 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
944 for (std::size_t i=0; i<std_v1.size(); ++i)
945 std_v2[i] = 3 * std_v1[i];
949 for (std::size_t i=0; i<std_v1.size(); ++i)
950 std_v1[i] -= alpha * std_v1[i] + beta * std_v2[i];
951 vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
953 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
956 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
957 for (std::size_t i=0; i<std_v1.size(); ++i)
958 std_v2[i] = 3 * std_v1[i];
962 for (std::size_t i=0; i<std_v1.size(); ++i)
963 std_v1[i] -= alpha * std_v1[i] - beta * std_v2[i];
964 vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
966 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
970 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
971 for (std::size_t i=0; i<std_v1.size(); ++i)
972 std_v2[i] = 3 * std_v1[i];
976 for (std::size_t i=0; i<std_v1.size(); ++i)
977 std_v1[i] -= alpha * std_v2[i];
978 vcl_v1 -= gpu_alpha * vcl_v2;
980 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
988 std::cout <<
"Testing division-subtract on vector with CPU scalar (right)..." << std::endl;
989 for (
size_t i=0; i < std_v1.size(); ++i)
991 for (std::size_t i=0; i<std_v1.size(); ++i)
992 std_v2[i] = 3 * std_v1[i];
996 for (std::size_t i=0; i<std_v1.size(); ++i)
997 std_v1[i] = std_v1[i] - std_v2[i] / alpha;
998 vcl_v1 = vcl_v1 - vcl_v2 / alpha;
1000 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1001 return EXIT_FAILURE;
1004 std::cout <<
"Testing division-subtract on vector with CPU scalar (left)..." << std::endl;
1005 for (std::size_t i=0; i<std_v1.size(); ++i)
1006 std_v2[i] = 3 * std_v1[i];
1010 for (std::size_t i=0; i<std_v1.size(); ++i)
1011 std_v1[i] = std_v1[i] / alpha - std_v2[i];
1012 vcl_v1 = vcl_v1 / alpha - vcl_v2;
1014 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1015 return EXIT_FAILURE;
1017 std::cout <<
"Testing division-subtract on vector with CPU scalar (both)..." << std::endl;
1018 for (std::size_t i=0; i<std_v1.size(); ++i)
1019 std_v2[i] = 3 * std_v1[i];
1023 for (std::size_t i=0; i<std_v1.size(); ++i)
1024 std_v1[i] = std_v1[i] / alpha - std_v2[i] / alpha;
1025 vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha;
1027 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1028 return EXIT_FAILURE;
1031 std::cout <<
"Testing inplace division-subtract on vector with CPU scalar..." << std::endl;
1032 for (std::size_t i=0; i<std_v1.size(); ++i)
1033 std_v2[i] = 3 * std_v1[i];
1037 for (std::size_t i=0; i<std_v1.size(); ++i)
1038 std_v1[i] -= std_v2[i] / alpha;
1039 vcl_v1 -= vcl_v2 / alpha;
1041 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1042 return EXIT_FAILURE;
1044 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1045 for (std::size_t i=0; i<std_v1.size(); ++i)
1046 std_v2[i] = 3 * std_v1[i];
1050 for (std::size_t i=0; i<std_v1.size(); ++i)
1051 std_v1[i] -= std_v2[i] / alpha;
1052 vcl_v1 -= vcl_v2 / gpu_alpha;
1054 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1055 return EXIT_FAILURE;
1058 std::cout <<
"Testing division-subtract on vector with GPU scalar (right)..." << std::endl;
1059 for (std::size_t i=0; i<std_v1.size(); ++i)
1060 std_v2[i] = 3 * std_v1[i];
1064 for (std::size_t i=0; i<std_v1.size(); ++i)
1065 std_v1[i] = std_v1[i] - std_v2[i] / alpha;
1066 vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1068 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1069 return EXIT_FAILURE;
1071 std::cout <<
"Testing division-subtract on vector with GPU scalar (left)..." << std::endl;
1072 for (std::size_t i=0; i<std_v1.size(); ++i)
1073 std_v2[i] = 3 * std_v1[i];
1077 for (std::size_t i=0; i<std_v1.size(); ++i)
1078 std_v1[i] = std_v1[i] - std_v2[i] / alpha;
1079 vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1081 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1082 return EXIT_FAILURE;
1084 std::cout <<
"Testing division-subtract on vector with GPU scalar (both)..." << std::endl;
1085 for (std::size_t i=0; i<std_v1.size(); ++i)
1086 std_v2[i] = 3 * std_v1[i];
1090 for (std::size_t i=0; i<std_v1.size(); ++i)
1091 std_v1[i] = std_v1[i] / alpha - std_v2[i] / beta;
1092 vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1094 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1095 return EXIT_FAILURE;
1097 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1098 for (std::size_t i=0; i<std_v1.size(); ++i)
1099 std_v2[i] = 3 * std_v1[i];
1103 for (std::size_t i=0; i<std_v1.size(); ++i)
1104 std_v1[i] -= std_v1[i] / alpha + std_v2[i] / beta;
1105 vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1107 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1108 return EXIT_FAILURE;
1110 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1111 for (std::size_t i=0; i<std_v1.size(); ++i)
1112 std_v2[i] = 3 * std_v1[i];
1116 for (std::size_t i=0; i<std_v1.size(); ++i)
1117 std_v1[i] -= std_v1[i] / alpha - std_v2[i] / beta;
1118 vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1120 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1121 return EXIT_FAILURE;
1123 std::cout <<
"Testing multiply-division-subtract on vector with GPU scalar..." << std::endl;
1124 for (std::size_t i=0; i<std_v1.size(); ++i)
1125 std_v2[i] = 3 * std_v1[i];
1129 for (std::size_t i=0; i<std_v1.size(); ++i)
1130 std_v1[i] = std_v1[i] * alpha - std_v2[i] / beta;
1131 vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1133 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1134 return EXIT_FAILURE;
1136 std::cout <<
"Testing division-multiply-subtract on vector with GPU scalar..." << std::endl;
1137 for (std::size_t i=0; i<std_v1.size(); ++i)
1138 std_v2[i] = 3 * std_v1[i];
1142 for (std::size_t i=0; i<std_v1.size(); ++i)
1143 std_v1[i] = std_v1[i] / alpha - std_v2[i] * beta;
1144 vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1146 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1147 return EXIT_FAILURE;
1149 std::cout <<
"Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl;
1150 for (std::size_t i=0; i<std_v1.size(); ++i)
1151 std_v2[i] = 3 * std_v1[i];
1155 for (std::size_t i=0; i<std_v1.size(); ++i)
1156 std_v1[i] -= std_v1[i] * alpha + std_v2[i] / beta;
1157 vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta;
1159 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1160 return EXIT_FAILURE;
1162 std::cout <<
"Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl;
1163 for (std::size_t i=0; i<std_v1.size(); ++i)
1164 std_v2[i] = 3 * std_v1[i];
1168 for (std::size_t i=0; i<std_v1.size(); ++i)
1169 std_v1[i] -= std_v1[i] / alpha + std_v2[i] * beta;
1170 vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1172 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1173 return EXIT_FAILURE;
1175 std::cout <<
"Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1176 for (std::size_t i=0; i<std_v1.size(); ++i)
1177 std_v2[i] = 3 * std_v1[i];
1181 for (std::size_t i=0; i<std_v1.size(); ++i)
1182 std_v1[i] -= std_v1[i] * alpha - std_v2[i] / beta;
1183 vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1185 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1186 return EXIT_FAILURE;
1188 std::cout <<
"Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1189 for (std::size_t i=0; i<std_v1.size(); ++i)
1190 std_v2[i] = 3 * std_v1[i];
1194 for (std::size_t i=0; i<std_v1.size(); ++i)
1195 std_v1[i] -= std_v1[i] / alpha - std_v2[i] * beta;
1196 vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1198 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1199 return EXIT_FAILURE;
1202 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1203 for (std::size_t i=0; i<std_v1.size(); ++i)
1204 std_v2[i] = 3 * std_v1[i];
1208 for (std::size_t i=0; i<std_v1.size(); ++i)
1209 std_v1[i] -= alpha * std_v2[i];
1210 vcl_v1 -= gpu_alpha * vcl_v2;
1212 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1213 return EXIT_FAILURE;
1220 for (
size_t i=0; i < std_v1.size(); ++i)
1222 for (std::size_t i=0; i<std_v1.size(); ++i)
1223 std_v2[i] = 3 * std_v1[i];
1227 std::cout <<
"Testing three vector additions..." << std::endl;
1228 for (std::size_t i=0; i<std_v1.size(); ++i)
1229 std_v1[i] = std_v2[i] + std_v1[i] + std_v2[i];
1230 vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;
1232 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1233 return EXIT_FAILURE;
1236 for (std::size_t i=0; i<std_v1.size(); ++i)
1237 std_v2[i] = 3 * std_v1[i];
1241 std::cout <<
"Testing complicated vector expression with CPU scalar..." << std::endl;
1242 for (std::size_t i=0; i<std_v1.size(); ++i)
1243 std_v1[i] = beta * (std_v1[i] - alpha * std_v2[i]);
1244 vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2);
1246 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1247 return EXIT_FAILURE;
1249 std::cout <<
"Testing complicated vector expression with GPU scalar..." << std::endl;
1250 for (std::size_t i=0; i<std_v1.size(); ++i)
1251 std_v1[i] = beta * (std_v1[i] - alpha * std_v2[i]);
1252 vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2);
1254 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1255 return EXIT_FAILURE;
1258 for (std::size_t i=0; i<std_v1.size(); ++i)
1259 std_v2[i] = 3 * std_v1[i];
1263 std::cout <<
"Testing swap..." << std::endl;
1264 swap(std_v1, std_v2);
1265 swap(vcl_v1, vcl_v2);
1267 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1268 return EXIT_FAILURE;
1271 for (std::size_t i=0; i<std_v1.size(); ++i)
1280 std::cout <<
"Testing unary operator-..." << std::endl;
1281 for (std::size_t i=0; i<std_v1.size(); ++i)
1282 std_v1[i] = -std_v2[i];
1285 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1286 return EXIT_FAILURE;
1289 std::cout <<
"Testing elementwise multiplication..." << std::endl;
1290 std::cout <<
" v1 = element_prod(v1, v2);" << std::endl;
1291 for (std::size_t i=0; i<std_v1.size(); ++i)
1292 std_v1[i] = std_v1[i] * std_v2[i];
1295 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1296 return EXIT_FAILURE;
1298 std::cout <<
" v1 += element_prod(v1, v2);" << std::endl;
1299 for (std::size_t i=0; i<std_v1.size(); ++i)
1300 std_v1[i] += std_v1[i] * std_v2[i];
1303 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1304 return EXIT_FAILURE;
1306 std::cout <<
" v1 -= element_prod(v1, v2);" << std::endl;
1307 for (std::size_t i=0; i<std_v1.size(); ++i)
1308 std_v1[i] -= std_v1[i] * std_v2[i];
1311 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1312 return EXIT_FAILURE;
1315 std::cout <<
" v1 = element_prod(v1 + v2, v2);" << std::endl;
1316 for (std::size_t i=0; i<std_v1.size(); ++i)
1317 std_v1[i] = (std_v1[i] + std_v2[i]) * std_v2[i];
1320 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1321 return EXIT_FAILURE;
1323 std::cout <<
" v1 += element_prod(v1 + v2, v2);" << std::endl;
1324 for (std::size_t i=0; i<std_v1.size(); ++i)
1325 std_v1[i] += (std_v1[i] + std_v2[i]) * std_v2[i];
1328 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1329 return EXIT_FAILURE;
1331 std::cout <<
" v1 -= element_prod(v1 + v2, v2);" << std::endl;
1332 for (std::size_t i=0; i<std_v1.size(); ++i)
1333 std_v1[i] -= (std_v1[i] + std_v2[i]) * std_v2[i];
1336 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1337 return EXIT_FAILURE;
1340 std::cout <<
" v1 = element_prod(v1, v2 + v1);" << std::endl;
1341 for (std::size_t i=0; i<std_v1.size(); ++i)
1342 std_v1[i] = std_v1[i] * (std_v2[i] + std_v1[i]);
1345 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1346 return EXIT_FAILURE;
1348 std::cout <<
" v1 += element_prod(v1, v2 + v1);" << std::endl;
1349 for (std::size_t i=0; i<std_v1.size(); ++i)
1350 std_v1[i] += std_v1[i] * (std_v2[i] + std_v1[i]);
1353 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1354 return EXIT_FAILURE;
1356 std::cout <<
" v1 -= element_prod(v1, v2 + v1);" << std::endl;
1357 for (std::size_t i=0; i<std_v1.size(); ++i)
1358 std_v1[i] -= std_v1[i] * (std_v2[i] + std_v1[i]);
1361 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1362 return EXIT_FAILURE;
1365 std::cout <<
" v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
1366 for (std::size_t i=0; i<std_v1.size(); ++i)
1367 std_v1[i] = (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
1370 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1371 return EXIT_FAILURE;
1373 std::cout <<
" v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
1374 for (std::size_t i=0; i<std_v1.size(); ++i)
1375 std_v1[i] += (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
1378 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1379 return EXIT_FAILURE;
1381 std::cout <<
" v1 -= element_prod(v1 + v2, v2 + v1);" << std::endl;
1382 for (std::size_t i=0; i<std_v1.size(); ++i)
1383 std_v1[i] -= (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
1386 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1387 return EXIT_FAILURE;
1390 std::cout <<
"Testing elementwise division..." << std::endl;
1391 for (std::size_t i=0; i<std_v1.size(); ++i)
1400 for (std::size_t i=0; i<std_v1.size(); ++i)
1401 std_v1[i] = std_v1[i] / std_v2[i];
1404 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1405 return EXIT_FAILURE;
1407 for (std::size_t i=0; i<std_v1.size(); ++i)
1408 std_v1[i] += std_v1[i] / std_v2[i];
1411 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1412 return EXIT_FAILURE;
1414 for (std::size_t i=0; i<std_v1.size(); ++i)
1415 std_v1[i] -= std_v1[i] / std_v2[i];
1418 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1419 return EXIT_FAILURE;
1422 for (std::size_t i=0; i<std_v1.size(); ++i)
1423 std_v1[i] = (std_v1[i] + std_v2[i]) / std_v2[i];
1426 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1427 return EXIT_FAILURE;
1429 for (std::size_t i=0; i<std_v1.size(); ++i)
1430 std_v1[i] += (std_v1[i] + std_v2[i]) / std_v2[i];
1433 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1434 return EXIT_FAILURE;
1436 for (std::size_t i=0; i<std_v1.size(); ++i)
1437 std_v1[i] -= (std_v1[i] + std_v2[i]) / std_v2[i];
1440 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1441 return EXIT_FAILURE;
1444 for (std::size_t i=0; i<std_v1.size(); ++i)
1445 std_v1[i] = std_v1[i] / (std_v2[i] + std_v1[i]);
1448 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1449 return EXIT_FAILURE;
1451 for (std::size_t i=0; i<std_v1.size(); ++i)
1452 std_v1[i] += std_v1[i] / (std_v2[i] + std_v1[i]);
1455 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1456 return EXIT_FAILURE;
1458 for (std::size_t i=0; i<std_v1.size(); ++i)
1459 std_v1[i] -= std_v1[i] / (std_v2[i] + std_v1[i]);
1462 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1463 return EXIT_FAILURE;
1466 for (std::size_t i=0; i<std_v1.size(); ++i)
1467 std_v1[i] = (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
1470 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1471 return EXIT_FAILURE;
1473 for (std::size_t i=0; i<std_v1.size(); ++i)
1474 std_v1[i] += (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
1477 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1478 return EXIT_FAILURE;
1480 for (std::size_t i=0; i<std_v1.size(); ++i)
1481 std_v1[i] -= (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
1484 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1485 return EXIT_FAILURE;
1487 std::cout <<
"Testing unary elementwise operations..." << std::endl;
1489 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \
1490 for (std::size_t i=0; i<std_v1.size(); ++i) \
1491 std_v2[i] = 3 * std_v1[i]; \
1492 viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); \
1493 viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); \
1495 for (std::size_t i=0; i<std_v1.size(); ++i) \
1496 std_v1[i] = std::FUNCNAME(std_v2[i]); \
1497 vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1499 if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1501 std::cout << "Failure at v1 = " << #FUNCNAME << "(v2)" << std::endl; \
1502 return EXIT_FAILURE; \
1505 for (std::size_t i=0; i<std_v1.size(); ++i) \
1506 std_v1[i] = std::FUNCNAME(std_v1[i] + std_v2[i]); \
1507 vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1509 if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1511 std::cout << "Failure at v1 = " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1512 return EXIT_FAILURE; \
1515 for (std::size_t i=0; i<std_v1.size(); ++i) \
1516 std_v1[i] += std::FUNCNAME(std_v1[i]); \
1517 vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1); \
1519 if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1521 std::cout << "Failure at v1 += " << #FUNCNAME << "(v2)" << std::endl; \
1522 return EXIT_FAILURE; \
1525 for (std::size_t i=0; i<std_v1.size(); ++i) \
1526 std_v1[i] += std::FUNCNAME(std_v1[i] + std_v2[i]); \
1527 vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1529 if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1531 std::cout << "Failure at v1 += " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1532 return EXIT_FAILURE; \
1535 for (std::size_t i=0; i<std_v1.size(); ++i) \
1536 std_v1[i] -= std::FUNCNAME(std_v2[i]); \
1537 vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1539 if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1541 std::cout << "Failure at v1 -= " << #FUNCNAME << "(v2)" << std::endl; \
1542 return EXIT_FAILURE; \
1545 for (std::size_t i=0; i<std_v1.size(); ++i) \
1546 std_v1[i] -= std::FUNCNAME(std_v1[i] + std_v2[i]); \
1547 vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1549 if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1551 std::cout << "Failure at v1 -= " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1552 return EXIT_FAILURE; \
1570 std::cout <<
"Testing lenghty sum of scaled vectors..." << std::endl;
1571 for (std::size_t i=0; i<std_v1.size(); ++i)
1572 std_v2[i] = 3 * std_v1[i];
1576 for (std::size_t i=0; i<std_v1.size(); ++i)
1577 std_v1[i] = std_v2[i] / alpha + beta * std_v1[i] - alpha * std_v2[i] + beta * std_v1[i] - alpha * std_v1[i];
1578 vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1;
1580 if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
1581 return EXIT_FAILURE;
1588 template<
typename NumericT >
1591 int retval = EXIT_SUCCESS;
1592 std::size_t
size = 12345;
1594 std::cout <<
"Running tests for vector of size " << size << std::endl;
1599 std::vector<NumericT> std_full_vec(size);
1600 std::vector<NumericT> std_full_vec2(std_full_vec.size());
1602 for (std::size_t i=0; i<std_full_vec.size(); ++i)
1608 std::vector<NumericT> std_range_vec (2 * std_full_vec.size() / 4 - std_full_vec.size() / 4);
1609 std::vector<NumericT> std_range_vec2(2 * std_full_vec.size() / 4 - std_full_vec.size() / 4);
1611 for (std::size_t i=0; i<std_range_vec.size(); ++i)
1612 std_range_vec[i] = std_full_vec[i + std_full_vec.size() / 4];
1613 for (std::size_t i=0; i<std_range_vec2.size(); ++i)
1614 std_range_vec2[i] = std_full_vec2[i + 2 * std_full_vec2.size() / 4];
1616 std::vector<NumericT> std_slice_vec (std_full_vec.size() / 4);
1617 std::vector<NumericT> std_slice_vec2(std_full_vec.size() / 4);
1619 for (std::size_t i=0; i<std_slice_vec.size(); ++i)
1620 std_slice_vec[i] = std_full_vec[3*i + std_full_vec.size() / 4];
1621 for (std::size_t i=0; i<std_slice_vec2.size(); ++i)
1622 std_slice_vec2[i] = std_full_vec2[2*i + 2 * std_full_vec2.size() / 4];
1631 viennacl::copy(std_full_vec2.begin(), std_full_vec2.end(), vcl_full_vec2.begin());
1633 viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4);
1634 viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4);
1642 std::vector<NumericT> std_short_vec(std_range_vec);
1643 std::vector<NumericT> std_short_vec2(std_range_vec2);
1645 std::cout <<
"Testing creation of vectors from range..." << std::endl;
1646 if (
check(std_short_vec, vcl_short_vec) != EXIT_SUCCESS)
1647 return EXIT_FAILURE;
1648 if (
check(std_short_vec2, vcl_short_vec2) != EXIT_SUCCESS)
1649 return EXIT_FAILURE;
1652 viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4);
1653 viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4);
1660 std::vector<NumericT> std_short_vec(std_slice_vec);
1661 std::vector<NumericT> std_short_vec2(std_slice_vec2);
1663 std::cout <<
"Testing creation of vectors from slice..." << std::endl;
1664 if (
check(std_short_vec, vcl_short_vec) != EXIT_SUCCESS)
1665 return EXIT_FAILURE;
1666 if (
check(std_short_vec2, vcl_short_vec2) != EXIT_SUCCESS)
1667 return EXIT_FAILURE;
1674 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl;
1675 retval = test<NumericT>(std_short_vec, std_short_vec2,
1676 vcl_short_vec, vcl_short_vec2);
1677 if (retval != EXIT_SUCCESS)
1678 return EXIT_FAILURE;
1680 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = range **" << std::endl;
1681 retval = test<NumericT>(std_short_vec, std_short_vec2,
1682 vcl_short_vec, vcl_range_vec2);
1683 if (retval != EXIT_SUCCESS)
1684 return EXIT_FAILURE;
1686 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl;
1687 retval = test<NumericT>(std_short_vec, std_short_vec2,
1688 vcl_short_vec, vcl_slice_vec2);
1689 if (retval != EXIT_SUCCESS)
1690 return EXIT_FAILURE;
1694 std::cout <<
" ** vcl_v1 = range, vcl_v2 = vector **" << std::endl;
1695 retval = test<NumericT>(std_short_vec, std_short_vec2,
1696 vcl_range_vec, vcl_short_vec2);
1697 if (retval != EXIT_SUCCESS)
1698 return EXIT_FAILURE;
1700 std::cout <<
" ** vcl_v1 = range, vcl_v2 = range **" << std::endl;
1701 retval = test<NumericT>(std_short_vec, std_short_vec2,
1702 vcl_range_vec, vcl_range_vec2);
1703 if (retval != EXIT_SUCCESS)
1704 return EXIT_FAILURE;
1706 std::cout <<
" ** vcl_v1 = range, vcl_v2 = slice **" << std::endl;
1707 retval = test<NumericT>(std_short_vec, std_short_vec2,
1708 vcl_range_vec, vcl_slice_vec2);
1709 if (retval != EXIT_SUCCESS)
1710 return EXIT_FAILURE;
1714 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl;
1715 retval = test<NumericT>(std_short_vec, std_short_vec2,
1716 vcl_slice_vec, vcl_short_vec2);
1717 if (retval != EXIT_SUCCESS)
1718 return EXIT_FAILURE;
1720 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = range **" << std::endl;
1721 retval = test<NumericT>(std_short_vec, std_short_vec2,
1722 vcl_slice_vec, vcl_range_vec2);
1723 if (retval != EXIT_SUCCESS)
1724 return EXIT_FAILURE;
1726 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl;
1727 retval = test<NumericT>(std_short_vec, std_short_vec2,
1728 vcl_slice_vec, vcl_slice_vec2);
1729 if (retval != EXIT_SUCCESS)
1730 return EXIT_FAILURE;
1732 return EXIT_SUCCESS;
1742 std::cout << std::endl;
1743 std::cout <<
"----------------------------------------------" << std::endl;
1744 std::cout <<
"----------------------------------------------" << std::endl;
1745 std::cout <<
"## Test :: Vector with Integer types" << std::endl;
1746 std::cout <<
"----------------------------------------------" << std::endl;
1747 std::cout <<
"----------------------------------------------" << std::endl;
1748 std::cout << std::endl;
1750 int retval = EXIT_SUCCESS;
1752 std::cout << std::endl;
1753 std::cout <<
"----------------------------------------------" << std::endl;
1754 std::cout << std::endl;
1756 std::cout <<
"# Testing setup:" << std::endl;
1757 std::cout <<
" numeric: int" << std::endl;
1758 retval = test<int>();
1759 if ( retval == EXIT_SUCCESS )
1760 std::cout <<
"# Test passed" << std::endl;
1764 std::cout << std::endl;
1765 std::cout <<
"----------------------------------------------" << std::endl;
1766 std::cout << std::endl;
1768 std::cout <<
"# Testing setup:" << std::endl;
1769 std::cout <<
" numeric: long" << std::endl;
1770 retval = test<long>();
1771 if ( retval == EXIT_SUCCESS )
1772 std::cout <<
"# Test passed" << std::endl;
1776 std::cout << std::endl;
1777 std::cout <<
"----------------------------------------------" << std::endl;
1778 std::cout << std::endl;
1780 std::cout << std::endl;
1781 std::cout <<
"------- Test completed --------" << std::endl;
1782 std::cout << std::endl;
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
int check(T1 const &t1, T2 const &t2)
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
int test(STLVectorType &std_v1, STLVectorType &std_v2, ViennaCLVectorType1 &vcl_v1, ViennaCLVectorType2 &vcl_v2)
Class for representing non-strided subvectors of a bigger vector x.
Class for representing strided subvectors of a bigger vector x.
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
Proxy classes for vectors.
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied.
Represents a vector consisting of 1 at a given index and zeros otherwise.
Stub routines for the summation of elements in a vector, or all elements in either a row or column of...
viennacl::vector< int > v2
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
NumericT max(std::vector< NumericT > const &v1)
T norm_inf(std::vector< T, A > const &v1)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
T norm_1(std::vector< T, A > const &v1)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...
NumericT min(std::vector< NumericT > const &v1)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)