1 #ifndef VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
39 #ifdef VIENNACL_WITH_OPENMP
44 #ifndef VIENNACL_OPENMP_VECTOR_MIN_SIZE
45 #define VIENNACL_OPENMP_VECTOR_MIN_SIZE 5000
56 template<
typename NumericT>
58 inline unsigned long flip_sign(
unsigned long val) {
return val; }
59 inline unsigned int flip_sign(
unsigned int val) {
return val; }
60 inline unsigned short flip_sign(
unsigned short val) {
return val; }
61 inline unsigned char flip_sign(
unsigned char val) {
return val; }
67 template<
typename DestNumericT,
typename SrcNumericT>
70 DestNumericT * data_dest = detail::extract_raw_pointer<DestNumericT>(dest);
71 SrcNumericT
const * data_src = detail::extract_raw_pointer<SrcNumericT>(src);
80 #ifdef VIENNACL_WITH_OPENMP
81 #pragma omp parallel for if (size_dest > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
83 for (
long i = 0; i < static_cast<long>(size_dest); ++i)
84 data_dest[static_cast<vcl_size_t>(i)*inc_dest+start_dest] =
static_cast<DestNumericT
>(data_src[
static_cast<vcl_size_t>(i)*inc_src+start_src]);
87 template<
typename NumericT,
typename ScalarT1>
93 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
94 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
96 value_type data_alpha = alpha;
107 if (reciprocal_alpha)
109 #ifdef VIENNACL_WITH_OPENMP
110 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
112 for (
long i = 0; i < static_cast<long>(
size1); ++i)
113 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha;
117 #ifdef VIENNACL_WITH_OPENMP
118 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
120 for (
long i = 0; i < static_cast<long>(
size1); ++i)
121 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha;
126 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2>
133 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
134 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
135 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
137 value_type data_alpha = alpha;
141 value_type data_beta = beta;
155 if (reciprocal_alpha)
159 #ifdef VIENNACL_WITH_OPENMP
160 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
162 for (
long i = 0; i < static_cast<long>(
size1); ++i)
163 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
167 #ifdef VIENNACL_WITH_OPENMP
168 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
170 for (
long i = 0; i < static_cast<long>(
size1); ++i)
171 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
178 #ifdef VIENNACL_WITH_OPENMP
179 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
181 for (
long i = 0; i < static_cast<long>(
size1); ++i)
182 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
186 #ifdef VIENNACL_WITH_OPENMP
187 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
189 for (
long i = 0; i < static_cast<long>(
size1); ++i)
190 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
196 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2>
203 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
204 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
205 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
207 value_type data_alpha = alpha;
211 value_type data_beta = beta;
225 if (reciprocal_alpha)
229 #ifdef VIENNACL_WITH_OPENMP
230 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
232 for (
long i = 0; i < static_cast<long>(
size1); ++i)
233 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
237 #ifdef VIENNACL_WITH_OPENMP
238 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
240 for (
long i = 0; i < static_cast<long>(
size1); ++i)
241 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
248 #ifdef VIENNACL_WITH_OPENMP
249 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
251 for (
long i = 0; i < static_cast<long>(
size1); ++i)
252 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
256 #ifdef VIENNACL_WITH_OPENMP
257 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
259 for (
long i = 0; i < static_cast<long>(
size1); ++i)
260 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
274 template<
typename NumericT>
279 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
286 value_type data_alpha =
static_cast<value_type
>(alpha);
288 #ifdef VIENNACL_WITH_OPENMP
289 #pragma omp parallel for if (loop_bound > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
291 for (
long i = 0; i < static_cast<long>(loop_bound); ++i)
292 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_alpha;
301 template<
typename NumericT>
306 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
307 value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
316 #ifdef VIENNACL_WITH_OPENMP
317 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
319 for (
long i = 0; i < static_cast<long>(
size1); ++i)
321 value_type temp = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2];
322 data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] = data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1];
323 data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1] = temp;
335 template<
typename NumericT,
typename OpT>
342 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
343 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
344 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(proxy.rhs());
356 #ifdef VIENNACL_WITH_OPENMP
357 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
359 for (
long i = 0; i < static_cast<long>(
size1); ++i)
360 OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1], data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2], data_vec3[static_cast<vcl_size_t>(i)*inc3+start3]);
368 template<
typename NumericT,
typename OpT>
375 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
376 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
385 #ifdef VIENNACL_WITH_OPENMP
386 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
388 for (
long i = 0; i < static_cast<long>(
size1); ++i)
389 OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1], data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2]);
405 #define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART) \
406 inline RESULTSCALART inner_prod_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1, \
407 RESULTSCALART const * data_vec2, vcl_size_t start2, vcl_size_t inc2) { \
408 TEMPSCALART temp = 0;
410 #define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART) \
411 for (long i = 0; i < static_cast<long>(size1); ++i) \
412 temp += data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] * data_vec2[static_cast<vcl_size_t>(i)*inc2+start2]; \
413 return static_cast<RESULTSCALART>(temp); \
418 #ifdef VIENNACL_WITH_OPENMP
419 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
424 #ifdef VIENNACL_WITH_OPENMP
425 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
432 #ifdef VIENNACL_WITH_OPENMP
433 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
438 #ifdef VIENNACL_WITH_OPENMP
439 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
446 #ifdef VIENNACL_WITH_OPENMP
447 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
452 #ifdef VIENNACL_WITH_OPENMP
453 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
460 #ifdef VIENNACL_WITH_OPENMP
461 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
466 #ifdef VIENNACL_WITH_OPENMP
467 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
474 #ifdef VIENNACL_WITH_OPENMP
475 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
481 #ifdef VIENNACL_WITH_OPENMP
482 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
486 #undef VIENNACL_INNER_PROD_IMPL_1
487 #undef VIENNACL_INNER_PROD_IMPL_2
496 template<
typename NumericT,
typename ScalarT>
503 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
504 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
514 data_vec2, start2, inc2);
517 template<
typename NumericT>
524 value_type
const * data_x = detail::extract_raw_pointer<value_type>(x);
530 std::vector<value_type> temp(vec_tuple.
const_size());
531 std::vector<value_type const *> data_y(vec_tuple.
const_size());
532 std::vector<vcl_size_t> start_y(vec_tuple.
const_size());
533 std::vector<vcl_size_t> stride_y(vec_tuple.
const_size());
537 data_y[j] = detail::extract_raw_pointer<value_type>(vec_tuple.
const_at(j));
545 value_type entry_x = data_x[i*inc_x+start_x];
547 temp[j] += entry_x * data_y[j][i*stride_y[j]+start_y[j]];
558 #define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART) \
559 inline RESULTSCALART norm_1_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \
560 TEMPSCALART temp = 0;
562 #define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART) \
563 for (long i = 0; i < static_cast<long>(size1); ++i) \
564 temp += static_cast<TEMPSCALART>(std::fabs(static_cast<double>(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]))); \
565 return static_cast<RESULTSCALART>(temp); \
570 #ifdef VIENNACL_WITH_OPENMP
571 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
576 #ifdef VIENNACL_WITH_OPENMP
577 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
583 #ifdef VIENNACL_WITH_OPENMP
584 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
589 #ifdef VIENNACL_WITH_OPENMP
590 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
597 #ifdef VIENNACL_WITH_OPENMP
598 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
603 #ifdef VIENNACL_WITH_OPENMP
604 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
611 #ifdef VIENNACL_WITH_OPENMP
612 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
617 #ifdef VIENNACL_WITH_OPENMP
618 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
625 #ifdef VIENNACL_WITH_OPENMP
626 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
632 #ifdef VIENNACL_WITH_OPENMP
633 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
637 #undef VIENNACL_NORM_1_IMPL_1
638 #undef VIENNACL_NORM_1_IMPL_2
647 template<
typename NumericT,
typename ScalarT>
653 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
667 #define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART) \
668 inline RESULTSCALART norm_2_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \
669 TEMPSCALART temp = 0;
671 #define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART) \
672 for (long i = 0; i < static_cast<long>(size1); ++i) { \
673 RESULTSCALART data = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]; \
674 temp += static_cast<TEMPSCALART>(data * data); \
676 return static_cast<RESULTSCALART>(temp); \
681 #ifdef VIENNACL_WITH_OPENMP
682 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
687 #ifdef VIENNACL_WITH_OPENMP
688 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
695 #ifdef VIENNACL_WITH_OPENMP
696 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
701 #ifdef VIENNACL_WITH_OPENMP
702 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
709 #ifdef VIENNACL_WITH_OPENMP
710 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
715 #ifdef VIENNACL_WITH_OPENMP
716 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
723 #ifdef VIENNACL_WITH_OPENMP
724 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
729 #ifdef VIENNACL_WITH_OPENMP
730 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
737 #ifdef VIENNACL_WITH_OPENMP
738 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
744 #ifdef VIENNACL_WITH_OPENMP
745 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
749 #undef VIENNACL_NORM_2_IMPL_1
750 #undef VIENNACL_NORM_2_IMPL_2
760 template<
typename NumericT,
typename ScalarT>
766 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
780 template<
typename NumericT,
typename ScalarT>
786 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
794 #ifdef VIENNACL_WITH_OPENMP
796 thread_count = omp_get_max_threads();
799 std::vector<value_type> temp(thread_count);
801 #ifdef VIENNACL_WITH_OPENMP
802 #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
806 #ifdef VIENNACL_WITH_OPENMP
807 id = omp_get_thread_num();
810 vcl_size_t begin = (size1 * id) / thread_count;
811 vcl_size_t end = (size1 * (
id + 1)) / thread_count;
815 temp[
id] = std::max<value_type>(temp[
id], static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1]))));
818 temp[0] = std::max<value_type>( temp[0], temp[i]);
830 template<
typename NumericT>
835 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
842 #ifdef VIENNACL_WITH_OPENMP
844 thread_count = omp_get_max_threads();
847 std::vector<value_type> temp(thread_count);
848 std::vector<vcl_size_t> index(thread_count);
850 #ifdef VIENNACL_WITH_OPENMP
851 #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
855 #ifdef VIENNACL_WITH_OPENMP
856 id = omp_get_thread_num();
858 vcl_size_t begin = (size1 * id) / thread_count;
859 vcl_size_t end = (size1 * (
id + 1)) / thread_count;
866 data =
static_cast<value_type
>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1])));
876 if (temp[i] > temp[0])
890 template<
typename NumericT,
typename ScalarT>
896 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
904 #ifdef VIENNACL_WITH_OPENMP
906 thread_count = omp_get_max_threads();
909 std::vector<value_type> temp(thread_count);
911 #ifdef VIENNACL_WITH_OPENMP
912 #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
916 #ifdef VIENNACL_WITH_OPENMP
917 id = omp_get_thread_num();
919 vcl_size_t begin = (size1 * id) / thread_count;
920 vcl_size_t end = (size1 * (
id + 1)) / thread_count;
921 temp[id] = data_vec1[
start1];
925 value_type v = data_vec1[i*inc1+
start1];
926 temp[id] = std::max<value_type>(temp[id],v);
930 temp[0] = std::max<value_type>( temp[0], temp[i]);
939 template<
typename NumericT,
typename ScalarT>
945 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
953 #ifdef VIENNACL_WITH_OPENMP
955 thread_count = omp_get_max_threads();
958 std::vector<value_type> temp(thread_count);
960 #ifdef VIENNACL_WITH_OPENMP
961 #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
965 #ifdef VIENNACL_WITH_OPENMP
966 id = omp_get_thread_num();
968 vcl_size_t begin = (size1 * id) / thread_count;
969 vcl_size_t end = (size1 * (
id + 1)) / thread_count;
970 temp[id] = data_vec1[
start1];
974 value_type v = data_vec1[i*inc1+
start1];
975 temp[id] = std::min<value_type>(temp[id],v);
979 temp[0] = std::min<value_type>( temp[0], temp[i]);
988 template<
typename NumericT,
typename ScalarT>
994 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
1000 value_type temp = 0;
1001 #ifdef VIENNACL_WITH_OPENMP
1002 #pragma omp parallel for reduction(+:temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
1004 for (
long i = 0; i < static_cast<long>(
size1); ++i)
1005 temp += data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1];
1019 template<
typename NumericT>
1026 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
1027 value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
1036 value_type data_alpha = alpha;
1037 value_type data_beta = beta;
1039 #ifdef VIENNACL_WITH_OPENMP
1040 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
1042 for (
long i = 0; i < static_cast<long>(
size1); ++i)
1044 value_type temp1 = data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1];
1045 value_type temp2 = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2];
1047 data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1] = data_alpha * temp1 + data_beta * temp2;
1048 data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] = data_alpha * temp2 - data_beta * temp1;
1055 template<
typename NumericT>
1060 NumericT const * data_vec1 = detail::extract_raw_pointer<NumericT>(vec1);
1061 NumericT * data_vec2 = detail::extract_raw_pointer<NumericT>(vec2);
1072 #ifdef VIENNACL_WITH_OPENMP
1075 std::vector<NumericT> thread_results(omp_get_max_threads());
1078 #pragma omp parallel
1080 vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
1081 vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
1082 vcl_size_t thread_stop = std::min<vcl_size_t>(thread_start + work_per_thread,
size1);
1085 for(
vcl_size_t i = thread_start; i < thread_stop; i++)
1086 thread_sum += data_vec1[i * inc1 + start1];
1088 thread_results[omp_get_thread_num()] = thread_sum;
1093 for (
vcl_size_t i=0; i<thread_results.size(); ++i)
1096 thread_results[i] = current_offset;
1097 current_offset += tmp;
1101 #pragma omp parallel
1103 vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
1104 vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
1105 vcl_size_t thread_stop = std::min<vcl_size_t>(thread_start + work_per_thread,
size1);
1107 NumericT thread_sum = thread_results[omp_get_thread_num()];
1110 for(
vcl_size_t i = thread_start; i < thread_stop; i++)
1112 thread_sum += data_vec1[i * inc1 +
start1];
1113 data_vec2[i * inc2 +
start2] = thread_sum;
1118 for(
vcl_size_t i = thread_start; i < thread_stop; i++)
1121 data_vec2[i * inc2 +
start2] = thread_sum;
1134 sum += data_vec1[i * inc1 +
start1];
1160 template<
typename NumericT>
1175 template<
typename NumericT>
vcl_size_t const_size() const
#define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART)
#define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART)
#define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART)
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
void inclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan on the host using OpenMP.
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
void norm_1_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^1-norm of a vector.
Generic size and resize functionality for different vector and matrix types.
void norm_inf_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the supremum-norm of a vector.
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void sum_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the sum of all elements from the vector.
#define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART)
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Worker class for decomposing expression templates.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Determines row and column increments for matrices and matrix proxies.
An expression template class that represents a binary operation that yields a vector.
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
void norm_2_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^2-norm of a vector - implementation.
#define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART)
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
void min_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the minimum of a vector.
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied.
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
NumericT flip_sign(NumericT val)
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Common base class for dense vectors, vector ranges, and vector slices.
Common routines for single-threaded or OpenMP-enabled execution on CPU.
void exclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan on the host using OpenMP.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void vector_scan_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2, bool is_inclusive)
Implementation of inclusive_scan and exclusive_scan for the host (OpenMP) backend.
void max_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
#define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART)
VectorType const & const_at(vcl_size_t i) const
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void inner_prod_impl(vector_base< T > const &x, vector_tuple< T > const &y_tuple, vector_base< T > &result)
Computes the inner products , , ..., and writes the result to a (sub-)vector...
Implementation of the ViennaCL scalar class.
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_1_impl(viennacl::vector_expression< LHS, RHS, OP > const &vec, S2 &result)
Computes the l^1-norm of a vector - interface for a vector expression. Creates a temporary.
#define VIENNACL_OPENMP_VECTOR_MIN_SIZE
Simple enable-if variant that uses the SFINAE pattern.