47 template<
typename NumericT>
52 : values_(p_values), start_(start_idx), inc_(increment), size_(num_elements) {}
57 std::size_t
size()
const {
return size_; }
66 template<
typename NumericT>
69 std::vector<NumericT> std_vec(host_vec.
size());
71 for (std::size_t i=0; i<host_vec.
size(); ++i)
72 std_vec[i] = host_vec[i];
77 template<
typename NumericT>
80 std::vector<NumericT> std_vec(vcl_vec.
size());
84 for (std::size_t i=0; i<host_vec.
size(); ++i)
85 host_vec[i] = std_vec[i];
92 template<
typename ScalarType>
96 if (std::fabs(s1 - s2) > 0 )
97 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
103 template<
typename ScalarType>
107 if (std::fabs(s1 - s2) > 0 )
108 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
114 template<
typename ScalarType>
118 if (std::fabs(s1 - s2) > 0 )
119 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
125 template<
typename ScalarType,
typename ViennaCLVectorType>
128 std::vector<ScalarType> v2_cpu(vcl_vec.size());
132 for (
unsigned int i=0;i<v1.
size(); ++i)
134 if (
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
135 v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) /
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
141 for (std::size_t i=0; i<v2_cpu.size(); ++i)
142 ret =
std::max(ret, std::fabs(v2_cpu[i]));
147 template<
typename T1,
typename T2>
148 int check(T1
const & t1, T2
const & t2,
double epsilon)
150 int retval = EXIT_SUCCESS;
152 double temp = std::fabs(
diff(t1, t2));
155 std::cout <<
"# Error! Relative difference: " << temp << std::endl;
156 retval = EXIT_FAILURE;
165 template<
typename NumericT,
typename Epsilon,
typename HostVectorType,
typename ViennaCLVectorType1,
typename ViennaCLVectorType2 >
166 int test(Epsilon
const& epsilon,
167 HostVectorType & host_v1, HostVectorType & host_v2,
168 ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
170 int retval = EXIT_SUCCESS;
180 std::cout <<
"Checking for zero_vector initializer..." << std::endl;
181 for (std::size_t i=0; i<host_v1.size(); ++i)
184 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
187 std::cout <<
"Checking for scalar_vector initializer..." << std::endl;
188 for (std::size_t i=0; i<host_v1.size(); ++i)
191 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
194 for (std::size_t i=0; i<host_v1.size(); ++i)
197 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
200 std::cout <<
"Checking for unit_vector initializer..." << std::endl;
201 for (std::size_t i=0; i<host_v1.size(); ++i)
205 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
209 for (std::size_t i=0; i<host_v1.size(); ++i)
211 host_v1[i] =
NumericT(1.0) + randomNumber();
212 host_v2[i] =
NumericT(1.0) + randomNumber();
218 std::cout <<
"Checking for successful copy..." << std::endl;
219 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
221 if (
check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
229 std::cout <<
"Testing inner_prod..." << std::endl;
231 for (std::size_t i=0; i<host_v1.size(); ++i)
232 cpu_result += host_v1[i] * host_v2[i];
236 std::cout <<
"Reference: " << cpu_result << std::endl;
237 std::cout << cpu_result2 << std::endl;
238 std::cout << gpu_result << std::endl;
239 if (
check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS)
241 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
245 for (std::size_t i=0; i<host_v1.size(); ++i)
246 cpu_result += (host_v1[i] + host_v2[i]) * (host_v2[i] - host_v1[i]);
250 std::cout <<
"Reference: " << cpu_result << std::endl;
251 std::cout << cpu_result3 << std::endl;
252 std::cout << gpu_result << std::endl;
253 if (
check(cpu_result, cpu_result3, epsilon) != EXIT_SUCCESS)
255 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
259 std::cout <<
"Testing norm_1..." << std::endl;
261 for (std::size_t i=0; i<host_v1.size(); ++i)
262 cpu_result += std::fabs(host_v1[i]);
265 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
268 gpu_result = 2 * cpu_result;
270 for (std::size_t i=0; i<host_v1.size(); ++i)
271 cpu_result += std::fabs(host_v1[i]);
272 gpu_result = cpu_result;
276 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
280 for (std::size_t i=0; i<host_v1.size(); ++i)
281 cpu_result += std::fabs(host_v1[i] + host_v2[i]);
282 gpu_result = cpu_result;
286 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
290 std::cout <<
"Testing norm_2..." << std::endl;
292 for (std::size_t i=0; i<host_v1.size(); ++i)
293 cpu_result += host_v1[i] * host_v1[i];
294 cpu_result = std::sqrt(cpu_result);
297 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
300 gpu_result = 2 * cpu_result;
302 for (std::size_t i=0; i<host_v1.size(); ++i)
303 cpu_result += host_v1[i] * host_v1[i];
304 gpu_result = std::sqrt(cpu_result);
307 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
311 for (std::size_t i=0; i<host_v1.size(); ++i)
312 cpu_result += (host_v1[i] + host_v2[i]) * (host_v1[i] + host_v2[i]);
313 gpu_result = std::sqrt(cpu_result);
316 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
320 std::cout <<
"Testing norm_inf..." << std::endl;
321 cpu_result = std::fabs(host_v1[0]);
322 for (std::size_t i=0; i<host_v1.size(); ++i)
323 cpu_result =
std::max(std::fabs(host_v1[i]), cpu_result);
326 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
329 gpu_result = 2 * cpu_result;
330 cpu_result = std::fabs(host_v1[0]);
331 for (std::size_t i=0; i<host_v1.size(); ++i)
332 cpu_result =
std::max(std::fabs(host_v1[i]), cpu_result);
333 gpu_result = cpu_result;
337 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
340 cpu_result = std::fabs(host_v1[0]);
341 for (std::size_t i=0; i<host_v1.size(); ++i)
342 cpu_result =
std::max(std::fabs(host_v1[i] + host_v2[i]), cpu_result);
343 gpu_result = cpu_result;
347 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
351 std::cout <<
"Testing index_norm_inf..." << std::endl;
352 std::size_t cpu_index = 0;
353 cpu_result = std::fabs(host_v1[0]);
354 for (std::size_t i=0; i<host_v1.size(); ++i)
356 if (std::fabs(host_v1[i]) > cpu_result)
358 cpu_result = std::fabs(host_v1[i]);
364 if (
check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index), epsilon) != EXIT_SUCCESS)
367 cpu_result = host_v1[cpu_index];
370 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
373 cpu_result = std::fabs(host_v1[0] + host_v2[0]);
374 for (std::size_t i=0; i<host_v1.size(); ++i)
376 if (std::fabs(host_v1[i] + host_v2[i]) > cpu_result)
378 cpu_result = std::fabs(host_v1[i] + host_v2[i]);
382 cpu_result = host_v1[cpu_index];
385 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
390 std::cout <<
"Testing max..." << std::endl;
391 cpu_result = host_v1[0];
392 for (std::size_t i=0; i<host_v1.size(); ++i)
393 cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
396 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
399 cpu_result = host_v1[0];
400 for (std::size_t i=0; i<host_v1.size(); ++i)
401 cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
402 gpu_result = cpu_result;
406 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
409 cpu_result = host_v1[0] + host_v2[0];
410 for (std::size_t i=0; i<host_v1.size(); ++i)
411 cpu_result = std::max<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
412 gpu_result = cpu_result;
416 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
421 std::cout <<
"Testing min..." << std::endl;
422 cpu_result = host_v1[0];
423 for (std::size_t i=0; i<host_v1.size(); ++i)
424 cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
427 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
430 cpu_result = host_v1[0];
431 for (std::size_t i=0; i<host_v1.size(); ++i)
432 cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
433 gpu_result = cpu_result;
437 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
440 cpu_result = host_v1[0] + host_v2[0];
441 for (std::size_t i=0; i<host_v1.size(); ++i)
442 cpu_result = std::min<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
443 gpu_result = cpu_result;
447 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
451 std::cout <<
"Testing sum..." << std::endl;
453 for (std::size_t i=0; i<host_v1.size(); ++i)
454 cpu_result += host_v1[i];
458 if (
check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS)
460 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
464 for (std::size_t i=0; i<host_v1.size(); ++i)
465 cpu_result += host_v1[i] + host_v2[i];
469 if (
check(cpu_result, cpu_result3, epsilon) != EXIT_SUCCESS)
471 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
481 for (std::size_t i=0; i<host_v1.size(); ++i)
489 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
491 if (
check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
496 std::cout <<
"Testing assignments..." << std::endl;
498 for (
size_t i=0; i < host_v1.size(); ++i)
501 for (
size_t i=0; i < vcl_v1.size(); ++i)
504 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
507 std::cout <<
"Testing assignments via iterators..." << std::endl;
509 host_v1[2] =
static_cast<NumericT>(1.9);
510 vcl_v1[2] =
static_cast<NumericT>(1.9);
512 host_v1[2] =
static_cast<NumericT>(1.5);
513 typename ViennaCLVectorType1::iterator vcl_v1_it = vcl_v1.begin();
516 *vcl_v1_it =
static_cast<NumericT>(1.5);
518 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
524 for (std::size_t i=0; i < host_v1.size(); ++i)
526 host_v1[i] =
NumericT(1.0) + randomNumber();
527 host_v2[i] =
NumericT(3.1415) * host_v1[i];
532 std::cout <<
"Testing scaling with CPU scalar..." << std::endl;
536 for (std::size_t i=0; i < host_v1.size(); ++i)
537 host_v1[i] *=
NumericT(
long(alpha));
538 vcl_v1 *= long(alpha);
540 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
543 for (std::size_t i=0; i < host_v1.size(); ++i)
544 host_v1[i] *=
NumericT(
float(alpha));
545 vcl_v1 *= float(alpha);
547 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
550 for (std::size_t i=0; i < host_v1.size(); ++i)
551 host_v1[i] *=
NumericT(
double(alpha));
552 vcl_v1 *= double(alpha);
554 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
558 std::cout <<
"Testing scaling with GPU scalar..." << std::endl;
559 for (std::size_t i=0; i < host_v1.size(); ++i)
563 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
566 std::cout <<
"Testing scaling with scalar expression..." << std::endl;
568 for (std::size_t i=0; i < host_v1.size(); ++i)
569 cpu_result += host_v1[i] * host_v2[i];
570 for (std::size_t i=0; i < host_v1.size(); ++i)
571 host_v1[i] *= cpu_result;
574 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
580 std::cout <<
"Testing shrinking with CPU scalar..." << std::endl;
581 for (std::size_t i=0; i < host_v1.size(); ++i)
583 vcl_v1 /= long(beta);
585 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
588 for (std::size_t i=0; i < host_v1.size(); ++i)
589 host_v1[i] /=
NumericT(
float(beta));
590 vcl_v1 /= float(beta);
592 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
595 for (std::size_t i=0; i < host_v1.size(); ++i)
596 host_v1[i] /=
NumericT(
double(beta));
597 vcl_v1 /= double(beta);
599 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
603 std::cout <<
"Testing shrinking with GPU scalar..." << std::endl;
604 for (std::size_t i=0; i < host_v1.size(); ++i)
608 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
616 for (
size_t i=0; i < host_v1.size(); ++i)
618 host_v1[i] =
NumericT(1.0) + randomNumber();
619 host_v2[i] =
NumericT(3.1415) * host_v1[i];
624 std::cout <<
"Testing add on vector..." << std::endl;
626 std::cout <<
"Checking for successful copy..." << std::endl;
627 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
629 if (
check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
632 for (
size_t i=0; i < host_v1.size(); ++i)
633 host_v1[i] = host_v1[i] + host_v2[i];
634 vcl_v1 = vcl_v1 + vcl_v2;
636 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
639 std::cout <<
"Testing add on vector with flipsign..." << std::endl;
640 for (
size_t i=0; i < host_v1.size(); ++i)
641 host_v1[i] = - host_v1[i] + host_v2[i];
642 vcl_v1 = - vcl_v1 + vcl_v2;
644 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
647 std::cout <<
"Testing inplace-add on vector..." << std::endl;
648 for (
size_t i=0; i < host_v1.size(); ++i)
649 host_v1[i] += host_v2[i];
652 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
655 std::cout <<
"Testing assignment to vector with vector multiplied by scalar expression..." << std::endl;
657 for (std::size_t i=0; i < host_v1.size(); ++i)
658 cpu_result += host_v1[i] * host_v2[i];
659 for (std::size_t i=0; i < host_v1.size(); ++i)
660 host_v1[i] = cpu_result * host_v2[i];
667 std::cout <<
"Testing sub on vector..." << std::endl;
668 for (std::size_t i=0; i < host_v1.size(); ++i)
669 host_v2[i] =
NumericT(3.1415) * host_v1[i];
673 for (std::size_t i=0; i < host_v1.size(); ++i)
674 host_v1[i] = host_v1[i] - host_v2[i];
675 vcl_v1 = vcl_v1 - vcl_v2;
677 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
680 std::cout <<
"Testing inplace-sub on vector..." << std::endl;
681 for (std::size_t i=0; i < host_v1.size(); ++i)
682 host_v1[i] -= host_v2[i];
685 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
693 std::cout <<
"Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
694 for (
size_t i=0; i < host_v1.size(); ++i)
696 host_v1[i] =
NumericT(1.0) + randomNumber();
697 host_v2[i] =
NumericT(3.1415) * host_v1[i];
702 for (std::size_t i=0; i < host_v1.size(); ++i)
703 host_v1[i] = host_v1[i] + host_v2[i] *
NumericT(
float(alpha));
704 vcl_v1 = vcl_v1 + vcl_v2 * float(alpha);
706 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
709 for (std::size_t i=0; i < host_v1.size(); ++i)
710 host_v1[i] = host_v1[i] + host_v2[i] *
NumericT(
double(alpha));
711 vcl_v1 = vcl_v1 + vcl_v2 * double(alpha);
713 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
717 std::cout <<
"Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
718 for (std::size_t i=0; i < host_v1.size(); ++i)
719 host_v2[i] =
NumericT(3.1415) * host_v1[i];
723 for (std::size_t i=0; i < host_v1.size(); ++i)
724 host_v1[i] =
NumericT(
long(alpha)) * host_v1[i] + host_v2[i];
725 vcl_v1 = long(alpha) * vcl_v1 + vcl_v2;
727 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
730 for (std::size_t i=0; i < host_v1.size(); ++i)
731 host_v1[i] =
NumericT(
float(alpha)) * host_v1[i] + host_v2[i];
732 vcl_v1 = float(alpha) * vcl_v1 + vcl_v2;
734 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
737 for (std::size_t i=0; i < host_v1.size(); ++i)
738 host_v1[i] =
NumericT(
double(alpha)) * host_v1[i] + host_v2[i];
739 vcl_v1 = double(alpha) * vcl_v1 + vcl_v2;
741 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
745 std::cout <<
"Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
746 for (std::size_t i=0; i < host_v1.size(); ++i)
747 host_v2[i] =
NumericT(3.1415) * host_v1[i];
751 for (std::size_t i=0; i < host_v1.size(); ++i)
752 host_v1[i] =
NumericT(
long(alpha)) * host_v1[i] +
NumericT(
long(beta)) * host_v2[i];
753 vcl_v1 = long(alpha) * vcl_v1 + long(beta) * vcl_v2;
755 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
758 for (std::size_t i=0; i < host_v1.size(); ++i)
759 host_v1[i] =
NumericT(
float(alpha)) * host_v1[i] +
NumericT(
float(beta)) * host_v2[i];
760 vcl_v1 = float(alpha) * vcl_v1 + float(beta) * vcl_v2;
762 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
765 for (std::size_t i=0; i < host_v1.size(); ++i)
766 host_v1[i] =
NumericT(
double(alpha)) * host_v1[i] +
NumericT(
double(beta)) * host_v2[i];
767 vcl_v1 = double(alpha) * vcl_v1 + double(beta) * vcl_v2;
769 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
773 std::cout <<
"Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
774 for (std::size_t i=0; i < host_v1.size(); ++i)
775 host_v2[i] =
NumericT(3.1415) * host_v1[i];
779 for (std::size_t i=0; i < host_v1.size(); ++i)
780 host_v1[i] += host_v2[i] *
NumericT(
long(alpha));
781 vcl_v1 += vcl_v2 * long(alpha);
783 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
786 for (std::size_t i=0; i < host_v1.size(); ++i)
787 host_v1[i] += host_v2[i] *
NumericT(
float(alpha));
788 vcl_v1 += vcl_v2 * float(alpha);
790 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
793 for (std::size_t i=0; i < host_v1.size(); ++i)
794 host_v1[i] +=
NumericT(
double(alpha)) * host_v2[i];
795 vcl_v1 += double(alpha) * vcl_v2;
797 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
801 std::cout <<
"Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
802 for (std::size_t i=0; i < host_v1.size(); ++i)
803 host_v2[i] =
NumericT(3.1415) * host_v1[i];
807 for (std::size_t i=0; i < host_v1.size(); ++i)
808 host_v1[i] = host_v1[i] + alpha * host_v2[i];
809 vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
811 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
814 std::cout <<
"Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
815 for (std::size_t i=0; i < host_v1.size(); ++i)
816 host_v2[i] =
NumericT(3.1415) * host_v1[i];
820 for (std::size_t i=0; i < host_v1.size(); ++i)
821 host_v1[i] = host_v1[i] + alpha * host_v2[i];
822 vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
824 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
827 std::cout <<
"Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
828 for (std::size_t i=0; i < host_v1.size(); ++i)
829 host_v2[i] =
NumericT(3.1415) * host_v1[i];
833 for (std::size_t i=0; i < host_v1.size(); ++i)
834 host_v1[i] = alpha * host_v1[i] + beta * host_v2[i];
835 vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
837 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
841 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
842 for (std::size_t i=0; i < host_v1.size(); ++i)
843 host_v2[i] =
NumericT(3.1415) * host_v1[i];
847 for (std::size_t i=0; i < host_v1.size(); ++i)
848 host_v1[i] += alpha * host_v1[i] + beta * host_v2[i];
849 vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
851 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
854 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
855 for (std::size_t i=0; i < host_v1.size(); ++i)
856 host_v2[i] =
NumericT(3.1415) * host_v1[i];
860 for (std::size_t i=0; i < host_v1.size(); ++i)
861 host_v1[i] += alpha * host_v1[i] - beta * host_v2[i];
862 vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
864 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
869 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
870 for (std::size_t i=0; i < host_v1.size(); ++i)
871 host_v2[i] =
NumericT(3.1415) * host_v1[i];
875 for (std::size_t i=0; i < host_v1.size(); ++i)
876 host_v1[i] += alpha * host_v2[i];
877 vcl_v1 += gpu_alpha * vcl_v2;
879 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
886 std::cout <<
"Testing division-add on vector with CPU scalar (right)..." << std::endl;
887 for (
size_t i=0; i < host_v1.size(); ++i)
889 host_v1[i] =
NumericT(1.0) + randomNumber();
890 host_v2[i] =
NumericT(3.1415) * host_v1[i];
895 for (std::size_t i=0; i < host_v1.size(); ++i)
896 host_v1[i] = host_v1[i] + host_v2[i] /
NumericT(
long(alpha));
897 vcl_v1 = vcl_v1 + vcl_v2 / long(alpha);
899 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
902 for (std::size_t i=0; i < host_v1.size(); ++i)
903 host_v1[i] = host_v1[i] + host_v2[i] /
NumericT(
float(alpha));
904 vcl_v1 = vcl_v1 + vcl_v2 / float(alpha);
906 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
909 for (std::size_t i=0; i < host_v1.size(); ++i)
910 host_v1[i] = host_v1[i] + host_v2[i] /
NumericT(
double(alpha));
911 vcl_v1 = vcl_v1 + vcl_v2 / double(alpha);
913 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
917 std::cout <<
"Testing division-add on vector with CPU scalar (left)..." << std::endl;
918 for (std::size_t i=0; i < host_v1.size(); ++i)
919 host_v2[i] =
NumericT(3.1415) * host_v1[i];
923 for (std::size_t i=0; i < host_v1.size(); ++i)
924 host_v1[i] = host_v1[i] /
NumericT(
float(alpha)) + host_v2[i];
925 vcl_v1 = vcl_v1 / float(alpha) + vcl_v2;
927 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
930 for (std::size_t i=0; i < host_v1.size(); ++i)
931 host_v1[i] = host_v1[i] /
NumericT(
double(alpha)) + host_v2[i];
932 vcl_v1 = vcl_v1 / double(alpha) + vcl_v2;
934 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
938 std::cout <<
"Testing division-add on vector with CPU scalar (both)..." << std::endl;
939 for (std::size_t i=0; i < host_v1.size(); ++i)
940 host_v2[i] =
NumericT(3.1415) * host_v1[i];
944 for (std::size_t i=0; i < host_v1.size(); ++i)
945 host_v1[i] = host_v1[i] /
NumericT(
float(alpha)) + host_v2[i] /
NumericT(
float(beta));
946 vcl_v1 = vcl_v1 / float(alpha) + vcl_v2 / float(beta);
948 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
951 for (std::size_t i=0; i < host_v1.size(); ++i)
952 host_v1[i] = host_v1[i] /
NumericT(
double(alpha)) + host_v2[i] /
NumericT(
double(beta));
953 vcl_v1 = vcl_v1 / double(alpha) + vcl_v2 / double(beta);
955 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
958 std::cout <<
"Testing division-multiply-add on vector with CPU scalar..." << std::endl;
959 for (std::size_t i=0; i < host_v1.size(); ++i)
960 host_v2[i] =
NumericT(3.1415) * host_v1[i];
964 for (std::size_t i=0; i < host_v1.size(); ++i)
965 host_v1[i] = host_v1[i] / alpha + host_v2[i] * beta;
966 vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;
968 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
972 std::cout <<
"Testing multiply-division-add on vector with CPU scalar..." << std::endl;
973 for (std::size_t i=0; i < host_v1.size(); ++i)
974 host_v2[i] =
NumericT(3.1415) * host_v1[i];
978 for (std::size_t i=0; i < host_v1.size(); ++i)
979 host_v1[i] = host_v1[i] * alpha + host_v2[i] / beta;
980 vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;
982 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
987 std::cout <<
"Testing inplace division-add on vector with CPU scalar..." << std::endl;
988 for (std::size_t i=0; i < host_v1.size(); ++i)
989 host_v2[i] =
NumericT(3.1415) * host_v1[i];
993 for (std::size_t i=0; i < host_v1.size(); ++i)
994 host_v1[i] += host_v2[i] / alpha;
995 vcl_v1 += vcl_v2 / alpha;
997 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1001 std::cout <<
"Testing division-add on vector with GPU scalar (right)..." << std::endl;
1002 for (std::size_t i=0; i < host_v1.size(); ++i)
1003 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1007 for (std::size_t i=0; i < host_v1.size(); ++i)
1008 host_v1[i] = host_v1[i] + host_v2[i] / alpha;
1009 vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
1011 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1012 return EXIT_FAILURE;
1014 std::cout <<
"Testing division-add on vector with GPU scalar (left)..." << std::endl;
1015 for (std::size_t i=0; i < host_v1.size(); ++i)
1016 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1020 for (std::size_t i=0; i < host_v1.size(); ++i)
1021 host_v1[i] = host_v1[i] + host_v2[i] / alpha;
1022 vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
1024 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1025 return EXIT_FAILURE;
1027 std::cout <<
"Testing division-add on vector with GPU scalar (both)..." << std::endl;
1028 for (std::size_t i=0; i < host_v1.size(); ++i)
1029 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1033 for (std::size_t i=0; i < host_v1.size(); ++i)
1034 host_v1[i] = host_v1[i] / alpha + host_v2[i] / beta;
1035 vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1037 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1038 return EXIT_FAILURE;
1041 std::cout <<
"Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
1042 for (std::size_t i=0; i < host_v1.size(); ++i)
1043 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1047 for (std::size_t i=0; i < host_v1.size(); ++i)
1048 host_v1[i] += host_v1[i] / alpha + host_v2[i] / beta;
1049 vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1051 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1052 return EXIT_FAILURE;
1054 std::cout <<
"Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl;
1055 for (std::size_t i=0; i < host_v1.size(); ++i)
1056 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1060 for (std::size_t i=0; i < host_v1.size(); ++i)
1061 host_v1[i] += host_v1[i] / alpha - host_v2[i] / beta;
1062 vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1064 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1065 return EXIT_FAILURE;
1067 std::cout <<
"Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
1068 for (std::size_t i=0; i < host_v1.size(); ++i)
1069 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1073 for (std::size_t i=0; i < host_v1.size(); ++i)
1074 host_v1[i] += host_v1[i] / alpha + host_v2[i] * beta;
1075 vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1077 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1078 return EXIT_FAILURE;
1080 std::cout <<
"Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl;
1081 for (std::size_t i=0; i < host_v1.size(); ++i)
1082 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1086 for (std::size_t i=0; i < host_v1.size(); ++i)
1087 host_v1[i] += host_v1[i] * alpha - host_v2[i] / beta;
1088 vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1090 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1091 return EXIT_FAILURE;
1095 std::cout <<
"Testing inplace division-add on vector with GPU scalar..." << std::endl;
1096 for (std::size_t i=0; i < host_v1.size(); ++i)
1097 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1101 for (std::size_t i=0; i < host_v1.size(); ++i)
1102 host_v1[i] += host_v2[i] * alpha;
1103 vcl_v1 += vcl_v2 * gpu_alpha;
1105 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1106 return EXIT_FAILURE;
1113 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
1114 for (
size_t i=0; i < host_v1.size(); ++i)
1116 host_v1[i] =
NumericT(1.0) + randomNumber();
1117 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1122 for (std::size_t i=0; i < host_v1.size(); ++i)
1123 host_v1[i] = host_v1[i] - alpha * host_v2[i];
1124 vcl_v1 = vcl_v1 - alpha * vcl_v2;
1126 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1127 return EXIT_FAILURE;
1130 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
1131 for (std::size_t i=0; i < host_v1.size(); ++i)
1132 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1136 for (std::size_t i=0; i < host_v1.size(); ++i)
1137 host_v1[i] = alpha * host_v1[i] - host_v2[i];
1138 vcl_v1 = alpha * vcl_v1 - vcl_v2;
1140 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1141 return EXIT_FAILURE;
1143 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
1144 for (std::size_t i=0; i < host_v1.size(); ++i)
1145 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1149 for (std::size_t i=0; i < host_v1.size(); ++i)
1150 host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1151 vcl_v1 = alpha * vcl_v1 - beta * vcl_v2;
1153 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1154 return EXIT_FAILURE;
1157 std::cout <<
"Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
1158 for (std::size_t i=0; i < host_v1.size(); ++i)
1159 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1163 for (std::size_t i=0; i < host_v1.size(); ++i)
1164 host_v1[i] -= alpha * host_v2[i];
1165 vcl_v1 -= alpha * vcl_v2;
1167 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1168 return EXIT_FAILURE;
1171 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
1172 for (std::size_t i=0; i < host_v1.size(); ++i)
1173 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1177 for (std::size_t i=0; i < host_v1.size(); ++i)
1178 host_v1[i] = host_v1[i] - alpha * host_v2[i];
1179 vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1181 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1182 return EXIT_FAILURE;
1184 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
1185 for (std::size_t i=0; i < host_v1.size(); ++i)
1186 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1190 for (std::size_t i=0; i < host_v1.size(); ++i)
1191 host_v1[i] = host_v1[i] - alpha * host_v2[i];
1192 vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1194 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1195 return EXIT_FAILURE;
1197 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
1198 for (std::size_t i=0; i < host_v1.size(); ++i)
1199 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1203 for (std::size_t i=0; i < host_v1.size(); ++i)
1204 host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1205 vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1207 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1208 return EXIT_FAILURE;
1210 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1211 for (std::size_t i=0; i < host_v1.size(); ++i)
1212 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1216 for (std::size_t i=0; i < host_v1.size(); ++i)
1217 host_v1[i] -= alpha * host_v1[i] + beta * host_v2[i];
1218 vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
1220 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1221 return EXIT_FAILURE;
1223 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1224 for (std::size_t i=0; i < host_v1.size(); ++i)
1225 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1229 for (std::size_t i=0; i < host_v1.size(); ++i)
1230 host_v1[i] -= alpha * host_v1[i] - beta * host_v2[i];
1231 vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1233 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1234 return EXIT_FAILURE;
1237 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
1238 for (std::size_t i=0; i < host_v1.size(); ++i)
1239 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1243 for (std::size_t i=0; i < host_v1.size(); ++i)
1244 host_v1[i] -= alpha * host_v2[i];
1245 vcl_v1 -= gpu_alpha * vcl_v2;
1247 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1248 return EXIT_FAILURE;
1255 std::cout <<
"Testing division-subtract on vector with CPU scalar (right)..." << std::endl;
1256 for (
size_t i=0; i < host_v1.size(); ++i)
1258 host_v1[i] =
NumericT(1.0) + randomNumber();
1259 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1264 for (std::size_t i=0; i < host_v1.size(); ++i)
1265 host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1266 vcl_v1 = vcl_v1 - vcl_v2 / alpha;
1268 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1269 return EXIT_FAILURE;
1272 std::cout <<
"Testing division-subtract on vector with CPU scalar (left)..." << std::endl;
1273 for (std::size_t i=0; i < host_v1.size(); ++i)
1274 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1278 for (std::size_t i=0; i < host_v1.size(); ++i)
1279 host_v1[i] = host_v1[i] / alpha - host_v2[i];
1280 vcl_v1 = vcl_v1 / alpha - vcl_v2;
1282 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1283 return EXIT_FAILURE;
1285 std::cout <<
"Testing division-subtract on vector with CPU scalar (both)..." << std::endl;
1286 for (std::size_t i=0; i < host_v1.size(); ++i)
1287 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1291 for (std::size_t i=0; i < host_v1.size(); ++i)
1292 host_v1[i] = host_v1[i] / alpha - host_v2[i] / alpha;
1293 vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha;
1295 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1296 return EXIT_FAILURE;
1299 std::cout <<
"Testing inplace division-subtract on vector with CPU scalar..." << std::endl;
1300 for (std::size_t i=0; i < host_v1.size(); ++i)
1301 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1305 for (std::size_t i=0; i < host_v1.size(); ++i)
1306 host_v1[i] -= host_v2[i] / alpha;
1307 vcl_v1 -= vcl_v2 / alpha;
1309 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1310 return EXIT_FAILURE;
1312 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1313 for (std::size_t i=0; i < host_v1.size(); ++i)
1314 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1318 for (std::size_t i=0; i < host_v1.size(); ++i)
1319 host_v1[i] -= host_v2[i] / alpha;
1320 vcl_v1 -= vcl_v2 / gpu_alpha;
1322 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1323 return EXIT_FAILURE;
1326 std::cout <<
"Testing division-subtract on vector with GPU scalar (right)..." << std::endl;
1327 for (std::size_t i=0; i < host_v1.size(); ++i)
1328 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1332 for (std::size_t i=0; i < host_v1.size(); ++i)
1333 host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1334 vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1336 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1337 return EXIT_FAILURE;
1339 std::cout <<
"Testing division-subtract on vector with GPU scalar (left)..." << std::endl;
1340 for (std::size_t i=0; i < host_v1.size(); ++i)
1341 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1345 for (std::size_t i=0; i < host_v1.size(); ++i)
1346 host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1347 vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1349 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1350 return EXIT_FAILURE;
1352 std::cout <<
"Testing division-subtract on vector with GPU scalar (both)..." << std::endl;
1353 for (std::size_t i=0; i < host_v1.size(); ++i)
1354 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1358 for (std::size_t i=0; i < host_v1.size(); ++i)
1359 host_v1[i] = host_v1[i] / alpha - host_v2[i] / beta;
1360 vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1362 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1363 return EXIT_FAILURE;
1365 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1366 for (std::size_t i=0; i < host_v1.size(); ++i)
1367 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1371 for (std::size_t i=0; i < host_v1.size(); ++i)
1372 host_v1[i] -= host_v1[i] / alpha + host_v2[i] / beta;
1373 vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1375 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1376 return EXIT_FAILURE;
1378 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1379 for (std::size_t i=0; i < host_v1.size(); ++i)
1380 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1384 for (std::size_t i=0; i < host_v1.size(); ++i)
1385 host_v1[i] -= host_v1[i] / alpha - host_v2[i] / beta;
1386 vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1388 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1389 return EXIT_FAILURE;
1391 std::cout <<
"Testing multiply-division-subtract on vector with GPU scalar..." << std::endl;
1392 for (std::size_t i=0; i < host_v1.size(); ++i)
1393 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1397 for (std::size_t i=0; i < host_v1.size(); ++i)
1398 host_v1[i] = host_v1[i] * alpha - host_v2[i] / beta;
1399 vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1401 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1402 return EXIT_FAILURE;
1404 std::cout <<
"Testing division-multiply-subtract on vector with GPU scalar..." << std::endl;
1405 for (std::size_t i=0; i < host_v1.size(); ++i)
1406 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1410 for (std::size_t i=0; i < host_v1.size(); ++i)
1411 host_v1[i] = host_v1[i] / alpha - host_v2[i] * beta;
1412 vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1414 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1415 return EXIT_FAILURE;
1417 std::cout <<
"Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl;
1418 for (std::size_t i=0; i < host_v1.size(); ++i)
1419 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1423 for (std::size_t i=0; i < host_v1.size(); ++i)
1424 host_v1[i] -= host_v1[i] * alpha + host_v2[i] / beta;
1425 vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta;
1427 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1428 return EXIT_FAILURE;
1430 std::cout <<
"Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl;
1431 for (std::size_t i=0; i < host_v1.size(); ++i)
1432 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1436 for (std::size_t i=0; i < host_v1.size(); ++i)
1437 host_v1[i] -= host_v1[i] / alpha + host_v2[i] * beta;
1438 vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1440 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1441 return EXIT_FAILURE;
1443 std::cout <<
"Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1444 for (std::size_t i=0; i < host_v1.size(); ++i)
1445 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1449 for (std::size_t i=0; i < host_v1.size(); ++i)
1450 host_v1[i] -= host_v1[i] * alpha - host_v2[i] / beta;
1451 vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1453 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1454 return EXIT_FAILURE;
1456 std::cout <<
"Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1457 for (std::size_t i=0; i < host_v1.size(); ++i)
1458 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1462 for (std::size_t i=0; i < host_v1.size(); ++i)
1463 host_v1[i] -= host_v1[i] / alpha - host_v2[i] * beta;
1464 vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1466 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1467 return EXIT_FAILURE;
1470 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1471 for (std::size_t i=0; i < host_v1.size(); ++i)
1472 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1476 for (std::size_t i=0; i < host_v1.size(); ++i)
1477 host_v1[i] -= alpha * host_v2[i];
1478 vcl_v1 -= gpu_alpha * vcl_v2;
1480 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1481 return EXIT_FAILURE;
1488 for (std::size_t i=0; i < host_v1.size(); ++i)
1490 host_v1[i] =
NumericT(1.0) + randomNumber();
1491 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1496 std::cout <<
"Testing three vector additions..." << std::endl;
1497 for (std::size_t i=0; i < host_v1.size(); ++i)
1498 host_v1[i] = host_v2[i] + host_v1[i] + host_v2[i];
1499 vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;
1501 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1502 return EXIT_FAILURE;
1505 for (std::size_t i=0; i < host_v1.size(); ++i)
1506 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1510 std::cout <<
"Testing complicated vector expression with CPU scalar..." << std::endl;
1511 for (std::size_t i=0; i < host_v1.size(); ++i)
1512 host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1513 vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2);
1515 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1516 return EXIT_FAILURE;
1518 std::cout <<
"Testing complicated vector expression with GPU scalar..." << std::endl;
1519 for (std::size_t i=0; i < host_v1.size(); ++i)
1520 host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1521 vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2);
1523 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1524 return EXIT_FAILURE;
1527 for (std::size_t i=0; i < host_v1.size(); ++i)
1528 host_v2[i] =
NumericT(3.1415) * host_v1[i];
1532 std::cout <<
"Testing swap..." << std::endl;
1533 for (std::size_t i=0; i < host_v1.size(); ++i)
1536 host_v1[i] = host_v2[i];
1539 swap(vcl_v1, vcl_v2);
1541 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1542 return EXIT_FAILURE;
1545 for (std::size_t i=0; i<host_v1.size(); ++i)
1547 host_v1[i] =
NumericT(1.0) + randomNumber();
1548 host_v2[i] =
NumericT(5.0) + randomNumber();
1554 std::cout <<
"Testing unary operator-..." << std::endl;
1555 for (std::size_t i=0; i < host_v1.size(); ++i)
1556 host_v1[i] = - host_v2[i];
1559 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1560 return EXIT_FAILURE;
1563 std::cout <<
"Testing elementwise multiplication..." << std::endl;
1564 std::cout <<
" v1 = element_prod(v1, v2);" << std::endl;
1565 for (std::size_t i=0; i < host_v1.size(); ++i)
1566 host_v1[i] = host_v1[i] * host_v2[i];
1569 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1570 return EXIT_FAILURE;
1572 std::cout <<
" v1 += element_prod(v1, v2);" << std::endl;
1573 for (std::size_t i=0; i < host_v1.size(); ++i)
1574 host_v1[i] += host_v1[i] * host_v2[i];
1577 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1578 return EXIT_FAILURE;
1580 std::cout <<
" v1 -= element_prod(v1, v2);" << std::endl;
1581 for (std::size_t i=0; i < host_v1.size(); ++i)
1582 host_v1[i] -= host_v1[i] * host_v2[i];
1585 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1586 return EXIT_FAILURE;
1589 std::cout <<
" v1 = element_prod(v1 + v2, v2);" << std::endl;
1590 for (std::size_t i=0; i < host_v1.size(); ++i)
1591 host_v1[i] = (host_v1[i] + host_v2[i]) * host_v2[i];
1594 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1595 return EXIT_FAILURE;
1597 std::cout <<
" v1 += element_prod(v1 + v2, v2);" << std::endl;
1598 for (std::size_t i=0; i < host_v1.size(); ++i)
1599 host_v1[i] += (host_v1[i] + host_v2[i]) * host_v2[i];
1602 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1603 return EXIT_FAILURE;
1605 std::cout <<
" v1 -= element_prod(v1 + v2, v2);" << std::endl;
1606 for (std::size_t i=0; i < host_v1.size(); ++i)
1607 host_v1[i] -= (host_v1[i] + host_v2[i]) * host_v2[i];
1610 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1611 return EXIT_FAILURE;
1614 std::cout <<
" v1 = element_prod(v1, v2 + v1);" << std::endl;
1615 for (std::size_t i=0; i < host_v1.size(); ++i)
1616 host_v1[i] = host_v1[i] * (host_v2[i] + host_v1[i]);
1619 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1620 return EXIT_FAILURE;
1622 std::cout <<
" v1 += element_prod(v1, v2 + v1);" << std::endl;
1623 for (std::size_t i=0; i < host_v1.size(); ++i)
1624 host_v1[i] += host_v1[i] * (host_v2[i] + host_v1[i]);
1627 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1628 return EXIT_FAILURE;
1630 std::cout <<
" v1 -= element_prod(v1, v2 + v1);" << std::endl;
1631 for (std::size_t i=0; i < host_v1.size(); ++i)
1632 host_v1[i] -= host_v1[i] * (host_v2[i] + host_v1[i]);
1635 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1636 return EXIT_FAILURE;
1639 std::cout <<
" v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
1640 for (std::size_t i=0; i < host_v1.size(); ++i)
1641 host_v1[i] = (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1644 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1645 return EXIT_FAILURE;
1647 std::cout <<
" v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
1648 for (std::size_t i=0; i < host_v1.size(); ++i)
1649 host_v1[i] += (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1652 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1653 return EXIT_FAILURE;
1655 std::cout <<
" v1 -= element_prod(v1 + v2, v2 + v1);" << std::endl;
1656 for (std::size_t i=0; i < host_v1.size(); ++i)
1657 host_v1[i] -= (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1660 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1661 return EXIT_FAILURE;
1664 std::cout <<
"Testing elementwise division..." << std::endl;
1665 for (std::size_t i=0; i<host_v1.size(); ++i)
1667 host_v1[i] =
NumericT(1.0) + randomNumber();
1668 host_v2[i] =
NumericT(5.0) + randomNumber();
1674 for (std::size_t i=0; i < host_v1.size(); ++i)
1675 host_v1[i] = host_v1[i] / host_v2[i];
1678 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1679 return EXIT_FAILURE;
1681 for (std::size_t i=0; i < host_v1.size(); ++i)
1682 host_v1[i] += host_v1[i] / host_v2[i];
1685 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1686 return EXIT_FAILURE;
1688 for (std::size_t i=0; i < host_v1.size(); ++i)
1689 host_v1[i] -= host_v1[i] / host_v2[i];
1692 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1693 return EXIT_FAILURE;
1696 for (std::size_t i=0; i < host_v1.size(); ++i)
1697 host_v1[i] = (host_v1[i] + host_v2[i]) / host_v2[i];
1700 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1701 return EXIT_FAILURE;
1703 for (std::size_t i=0; i < host_v1.size(); ++i)
1704 host_v1[i] += (host_v1[i] + host_v2[i]) / host_v2[i];
1707 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1708 return EXIT_FAILURE;
1710 for (std::size_t i=0; i < host_v1.size(); ++i)
1711 host_v1[i] -= (host_v1[i] + host_v2[i]) / host_v2[i];
1714 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1715 return EXIT_FAILURE;
1718 for (std::size_t i=0; i < host_v1.size(); ++i)
1719 host_v1[i] = host_v1[i] / (host_v2[i] + host_v1[i]);
1722 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1723 return EXIT_FAILURE;
1725 for (std::size_t i=0; i < host_v1.size(); ++i)
1726 host_v1[i] += host_v1[i] / (host_v2[i] + host_v1[i]);
1729 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1730 return EXIT_FAILURE;
1732 for (std::size_t i=0; i < host_v1.size(); ++i)
1733 host_v1[i] -= host_v1[i] / (host_v2[i] + host_v1[i]);
1736 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1737 return EXIT_FAILURE;
1740 for (std::size_t i=0; i < host_v1.size(); ++i)
1741 host_v1[i] = (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1744 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1745 return EXIT_FAILURE;
1747 for (std::size_t i=0; i < host_v1.size(); ++i)
1748 host_v1[i] += (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1751 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1752 return EXIT_FAILURE;
1754 for (std::size_t i=0; i < host_v1.size(); ++i)
1755 host_v1[i] -= (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1758 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1759 return EXIT_FAILURE;
1762 std::cout <<
"Testing elementwise power function..." << std::endl;
1763 for (std::size_t i=0; i<host_v1.size(); ++i)
1768 std::vector<NumericT> std_v3(host_v1.size());
1774 for (std::size_t i=0; i<host_v3.size(); ++i)
1775 host_v3[i] = std::pow(host_v1[i], host_v2[i]);
1776 vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1778 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1780 std::cerr <<
"** Failure in v1 = pow(v1, v2);" << std::endl;
1781 return EXIT_FAILURE;
1785 for (std::size_t i=0; i<host_v3.size(); ++i)
1786 host_v3[i] = host_v1[i];
1787 for (std::size_t i=0; i<host_v3.size(); ++i)
1788 host_v3[i] += std::pow(host_v1[i], host_v2[i]);
1789 vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1791 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1793 std::cerr <<
"** Failure in v1 += pow(v1, v2);" << std::endl;
1794 return EXIT_FAILURE;
1798 for (std::size_t i=0; i<host_v3.size(); ++i)
1799 host_v3[i] = host_v1[i];
1800 for (std::size_t i=0; i<host_v3.size(); ++i)
1801 host_v3[i] -= std::pow(host_v1[i], host_v2[i]);
1802 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1804 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1806 std::cerr <<
"** Failure in v1 -= pow(v1, v2);" << std::endl;
1807 return EXIT_FAILURE;
1812 for (std::size_t i=0; i<host_v3.size(); ++i)
1813 host_v3[i] = host_v1[i];
1814 for (std::size_t i=0; i<host_v3.size(); ++i)
1815 host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1816 vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1818 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1820 std::cerr <<
"** Failure in v1 = pow(v1 + v2, v2);" << std::endl;
1821 return EXIT_FAILURE;
1825 for (std::size_t i=0; i<host_v3.size(); ++i)
1826 host_v3[i] = host_v1[i];
1827 for (std::size_t i=0; i<host_v3.size(); ++i)
1828 host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1829 vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1831 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1833 std::cerr <<
"** Failure in v1 += pow(v1 + v2, v2);" << std::endl;
1834 return EXIT_FAILURE;
1838 for (std::size_t i=0; i<host_v3.size(); ++i)
1839 host_v3[i] = host_v1[i];
1840 for (std::size_t i=0; i<host_v3.size(); ++i)
1841 host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1842 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1844 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1846 std::cerr <<
"** Failure in v1 -= pow(v1 + v2, v2);" << std::endl;
1847 return EXIT_FAILURE;
1852 for (std::size_t i=0; i<host_v3.size(); ++i)
1853 host_v3[i] = host_v1[i];
1854 for (std::size_t i=0; i<host_v3.size(); ++i)
1855 host_v3[i] = std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1856 vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1858 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1860 std::cerr <<
"** Failure in v1 = pow(v1, v2 + v1);" << std::endl;
1861 return EXIT_FAILURE;
1865 for (std::size_t i=0; i<host_v3.size(); ++i)
1866 host_v3[i] = host_v1[i];
1867 for (std::size_t i=0; i<host_v3.size(); ++i)
1868 host_v3[i] += std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1869 vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1871 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1873 std::cerr <<
"** Failure in v1 += pow(v1, v2 + v1);" << std::endl;
1874 return EXIT_FAILURE;
1878 for (std::size_t i=0; i<host_v3.size(); ++i)
1879 host_v3[i] = host_v1[i];
1880 for (std::size_t i=0; i<host_v3.size(); ++i)
1881 host_v3[i] -= std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1882 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1884 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1886 std::cerr <<
"** Failure in v1 -= pow(v1, v2 + v1);" << std::endl;
1887 return EXIT_FAILURE;
1892 for (std::size_t i=0; i<host_v3.size(); ++i)
1893 host_v3[i] = host_v1[i];
1894 for (std::size_t i=0; i<host_v3.size(); ++i)
1895 host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1896 vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1898 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1900 std::cerr <<
"** Failure in v1 = pow(v1 + v2, v2 + v1);" << std::endl;
1901 return EXIT_FAILURE;
1905 for (std::size_t i=0; i<host_v3.size(); ++i)
1906 host_v3[i] = host_v1[i];
1907 for (std::size_t i=0; i<host_v3.size(); ++i)
1908 host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1909 vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1911 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1913 std::cerr <<
"** Failure in v1 += pow(v1 + v2, v2 + v1);" << std::endl;
1914 return EXIT_FAILURE;
1918 for (std::size_t i=0; i<host_v3.size(); ++i)
1919 host_v3[i] = host_v1[i];
1920 for (std::size_t i=0; i<host_v3.size(); ++i)
1921 host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1922 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1924 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1926 std::cerr <<
"** Failure in v1 -= pow(v1 + v2, v2 + v1);" << std::endl;
1927 return EXIT_FAILURE;
1930 std::cout <<
"Testing unary elementwise operations..." << std::endl;
1931 for (
size_t i=0; i < host_v1.size(); ++i)
1932 host_v1[i] = randomNumber() /
NumericT(4);
1934 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \
1935 for (std::size_t i=0; i<host_v1.size(); ++i) \
1936 host_v2[i] = NumericT(3.1415) * host_v1[i]; \
1937 proxy_copy(host_v1, vcl_v1); \
1938 proxy_copy(host_v2, vcl_v2); \
1940 for (std::size_t i=0; i<host_v1.size(); ++i) \
1941 host_v1[i] = std::FUNCNAME(host_v2[i]); \
1942 vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1944 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1946 std::cout << "Failure at v1 = " << #FUNCNAME << "(v2)" << std::endl; \
1947 return EXIT_FAILURE; \
1950 for (std::size_t i=0; i<host_v1.size(); ++i) \
1951 host_v1[i] = std::FUNCNAME(host_v1[i] + host_v2[i]); \
1952 vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1954 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1956 std::cout << "Failure at v1 = " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1957 return EXIT_FAILURE; \
1960 for (std::size_t i=0; i<host_v1.size(); ++i) \
1961 host_v1[i] += std::FUNCNAME(host_v1[i]); \
1962 vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1); \
1964 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1966 std::cout << "Failure at v1 += " << #FUNCNAME << "(v2)" << std::endl; \
1967 return EXIT_FAILURE; \
1970 for (std::size_t i=0; i<host_v1.size(); ++i) \
1971 host_v1[i] += std::FUNCNAME(host_v1[i] + host_v2[i]); \
1972 vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1974 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1976 std::cout << "Failure at v1 += " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1977 return EXIT_FAILURE; \
1980 for (std::size_t i=0; i<host_v1.size(); ++i) \
1981 host_v1[i] -= std::FUNCNAME(host_v2[i]); \
1982 vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1984 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1986 std::cout << "Failure at v1 -= " << #FUNCNAME << "(v2)" << std::endl; \
1987 return EXIT_FAILURE; \
1990 for (std::size_t i=0; i<host_v1.size(); ++i) \
1991 host_v1[i] -= std::FUNCNAME(host_v1[i] + host_v2[i]); \
1992 vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1994 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1996 std::cout << "Failure at v1 -= " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1997 return EXIT_FAILURE; \
2002 for (std::size_t i=0; i < host_v1.size(); ++i)
2003 host_v1[i] = randomNumber() /
NumericT(4);
2018 for (std::size_t i=0; i<host_v1.size(); ++i)
2019 host_v2[i] =
NumericT(3.1415) * host_v1[i];
2023 std::cout <<
"Testing another complicated vector expression with CPU scalars..." << std::endl;
2024 for (std::size_t i=0; i<host_v1.size(); ++i)
2025 host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
2026 vcl_v1 = vcl_v2 / alpha + beta * (vcl_v1 - alpha*vcl_v2);
2028 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2029 return EXIT_FAILURE;
2031 std::cout <<
"Testing another complicated vector expression with GPU scalars..." << std::endl;
2032 for (std::size_t i=0; i<host_v1.size(); ++i)
2033 host_v2[i] =
NumericT(3.1415) * host_v1[i];
2037 for (std::size_t i=0; i<host_v1.size(); ++i)
2038 host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
2039 vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * (vcl_v1 - gpu_alpha*vcl_v2);
2041 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2042 return EXIT_FAILURE;
2045 std::cout <<
"Testing lenghty sum of scaled vectors..." << std::endl;
2046 for (std::size_t i=0; i<host_v1.size(); ++i)
2047 host_v2[i] =
NumericT(3.1415) * host_v1[i];
2051 for (std::size_t i=0; i<host_v1.size(); ++i)
2052 host_v1[i] = host_v2[i] / alpha + beta * host_v1[i] - alpha * host_v2[i] + beta * host_v1[i] - alpha * host_v1[i];
2053 vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1;
2055 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2056 return EXIT_FAILURE;
2063 template<
typename NumericT,
typename Epsilon >
2066 int retval = EXIT_SUCCESS;
2067 std::size_t
size = 24656;
2071 std::cout <<
"Running tests for vector of size " << size << std::endl;
2076 std::vector<NumericT> std_full_vec(size);
2077 std::vector<NumericT> std_full_vec2(std_full_vec.size());
2079 for (std::size_t i=0; i<std_full_vec.size(); ++i)
2081 std_full_vec[i] =
NumericT(1.0) + randomNumber();
2082 std_full_vec2[i] =
NumericT(1.0) + randomNumber();
2085 std::size_t r1_start = std_full_vec.size() / 4;
2086 std::size_t r1_stop = 2 * std_full_vec.size() / 4;
2087 std::size_t r2_start = 2 * std_full_vec2.size() / 4;
2088 std::size_t r2_stop = 3 * std_full_vec2.size() / 4;
2092 std::size_t s1_start = std_full_vec.size() / 4;
2093 std::size_t s1_inc = 3;
2094 std::size_t s1_size = std_full_vec.size() / 4;
2095 std::size_t s2_start = 2 * std_full_vec2.size() / 4;
2096 std::size_t s2_inc = 2;
2097 std::size_t s2_size = std_full_vec2.size() / 4;
2108 viennacl::copy(std_full_vec2.begin(), std_full_vec2.end(), vcl_full_vec2.begin());
2110 viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4);
2111 viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4);
2119 std::vector<NumericT> std_short_vec(host_range_vec.
size());
2120 for (std::size_t i=0; i<std_short_vec.size(); ++i)
2121 std_short_vec[i] = host_range_vec[i];
2124 std::vector<NumericT> std_short_vec2(host_range_vec2.
size());
2125 for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2126 std_short_vec2[i] = host_range_vec2[i];
2129 std::cout <<
"Testing creation of vectors from range..." << std::endl;
2130 if (
check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2131 return EXIT_FAILURE;
2132 if (
check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2133 return EXIT_FAILURE;
2136 viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4);
2137 viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4);
2144 std::vector<NumericT> std_short_vec(host_slice_vec.
size());
2145 for (std::size_t i=0; i<std_short_vec.size(); ++i)
2146 std_short_vec[i] = host_slice_vec[i];
2149 std::vector<NumericT> std_short_vec2(host_slice_vec2.
size());
2150 for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2151 std_short_vec2[i] = host_slice_vec2[i];
2154 std::cout <<
"Testing creation of vectors from slice..." << std::endl;
2155 if (
check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2156 return EXIT_FAILURE;
2157 if (
check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2158 return EXIT_FAILURE;
2165 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl;
2166 retval = test<NumericT>(epsilon,
2167 host_short_vec, host_short_vec2,
2168 vcl_short_vec, vcl_short_vec2);
2169 if (retval != EXIT_SUCCESS)
2170 return EXIT_FAILURE;
2172 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = range **" << std::endl;
2173 retval = test<NumericT>(epsilon,
2174 host_short_vec, host_short_vec2,
2175 vcl_short_vec, vcl_range_vec2);
2176 if (retval != EXIT_SUCCESS)
2177 return EXIT_FAILURE;
2179 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl;
2180 retval = test<NumericT>(epsilon,
2181 host_short_vec, host_short_vec2,
2182 vcl_short_vec, vcl_slice_vec2);
2183 if (retval != EXIT_SUCCESS)
2184 return EXIT_FAILURE;
2188 std::cout <<
" ** vcl_v1 = range, vcl_v2 = vector **" << std::endl;
2189 retval = test<NumericT>(epsilon,
2190 host_short_vec, host_short_vec2,
2191 vcl_range_vec, vcl_short_vec2);
2192 if (retval != EXIT_SUCCESS)
2193 return EXIT_FAILURE;
2195 std::cout <<
" ** vcl_v1 = range, vcl_v2 = range **" << std::endl;
2196 retval = test<NumericT>(epsilon,
2197 host_short_vec, host_short_vec2,
2198 vcl_range_vec, vcl_range_vec2);
2199 if (retval != EXIT_SUCCESS)
2200 return EXIT_FAILURE;
2202 std::cout <<
" ** vcl_v1 = range, vcl_v2 = slice **" << std::endl;
2203 retval = test<NumericT>(epsilon,
2204 host_short_vec, host_short_vec2,
2205 vcl_range_vec, vcl_slice_vec2);
2206 if (retval != EXIT_SUCCESS)
2207 return EXIT_FAILURE;
2211 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl;
2212 retval = test<NumericT>(epsilon,
2213 host_short_vec, host_short_vec2,
2214 vcl_slice_vec, vcl_short_vec2);
2215 if (retval != EXIT_SUCCESS)
2216 return EXIT_FAILURE;
2218 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = range **" << std::endl;
2219 retval = test<NumericT>(epsilon,
2220 host_short_vec, host_short_vec2,
2221 vcl_slice_vec, vcl_range_vec2);
2222 if (retval != EXIT_SUCCESS)
2223 return EXIT_FAILURE;
2225 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl;
2226 retval = test<NumericT>(epsilon,
2227 host_short_vec, host_short_vec2,
2228 vcl_slice_vec, vcl_slice_vec2);
2229 if (retval != EXIT_SUCCESS)
2230 return EXIT_FAILURE;
2232 return EXIT_SUCCESS;
2241 std::cout << std::endl;
2242 std::cout <<
"----------------------------------------------" << std::endl;
2243 std::cout <<
"----------------------------------------------" << std::endl;
2244 std::cout <<
"## Test :: Vector" << std::endl;
2245 std::cout <<
"----------------------------------------------" << std::endl;
2246 std::cout <<
"----------------------------------------------" << std::endl;
2247 std::cout << std::endl;
2249 int retval = EXIT_SUCCESS;
2251 std::cout << std::endl;
2252 std::cout <<
"----------------------------------------------" << std::endl;
2253 std::cout << std::endl;
2256 NumericT epsilon =
static_cast<NumericT
>(1.0E-2);
2257 std::cout <<
"# Testing setup:" << std::endl;
2258 std::cout <<
" eps: " << epsilon << std::endl;
2259 std::cout <<
" numeric: float" << std::endl;
2260 retval = test<NumericT>(epsilon);
2261 if ( retval == EXIT_SUCCESS )
2262 std::cout <<
"# Test passed" << std::endl;
2266 std::cout << std::endl;
2267 std::cout <<
"----------------------------------------------" << std::endl;
2268 std::cout << std::endl;
2269 #ifdef VIENNACL_WITH_OPENCL
2275 NumericT epsilon = 1.0E-10;
2276 std::cout <<
"# Testing setup:" << std::endl;
2277 std::cout <<
" eps: " << epsilon << std::endl;
2278 std::cout <<
" numeric: double" << std::endl;
2279 retval = test<NumericT>(epsilon);
2280 if ( retval == EXIT_SUCCESS )
2281 std::cout <<
"# Test passed" << std::endl;
2285 std::cout << std::endl;
2286 std::cout <<
"----------------------------------------------" << std::endl;
2287 std::cout << std::endl;
2290 std::cout << std::endl;
2291 std::cout <<
"------- Test completed --------" << std::endl;
2292 std::cout << std::endl;
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
T norm_2(std::vector< T, A > const &v1)
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
vector_proxy(NumericT *p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
NumericT & operator[](std::size_t index)
int test(Epsilon const &epsilon, HostVectorType &host_v1, HostVectorType &host_v2, ViennaCLVectorType1 &vcl_v1, ViennaCLVectorType2 &vcl_v2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Class for representing non-strided subvectors of a bigger vector x.
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
int check(T1 const &t1, T2 const &t2, double epsilon)
void proxy_copy(vector_proxy< NumericT > const &host_vec, viennacl::vector_base< NumericT > &vcl_vec)
Class for representing strided subvectors of a bigger vector x.
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
NumericT const & operator[](std::size_t index) const
Proxy classes for vectors.
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied.
Represents a vector consisting of 1 at a given index and zeros otherwise.
Stub routines for the summation of elements in a vector, or all elements in either a row or column of...
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
NumericT max(std::vector< NumericT > const &v1)
T norm_inf(std::vector< T, A > const &v1)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A small collection of sequential random number generators.
T norm_1(std::vector< T, A > const &v1)
size_type size() const
Returns the length of the vector (cf. std::vector)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...
NumericT min(std::vector< NumericT > const &v1)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)