1 #ifndef VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
42 #ifndef VIENNACL_OPENMP_MATRIX_MIN_SIZE
43 #define VIENNACL_OPENMP_MATRIX_MIN_SIZE 5000
57 template<
typename DestNumericT,
typename SrcNumericT>
60 assert(mat1.
row_major() == mat2.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
62 DestNumericT * data_A = detail::extract_raw_pointer<DestNumericT>(mat1);
63 SrcNumericT
const * data_B = detail::extract_raw_pointer<SrcNumericT>(mat2);
86 #ifdef VIENNACL_WITH_OPENMP
87 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
89 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
91 wrapper_A(
row, col) =
static_cast<DestNumericT
>(wrapper_B(
row, col));
98 #ifdef VIENNACL_WITH_OPENMP
99 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
101 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
103 wrapper_A(
row, col) =
static_cast<DestNumericT
>(wrapper_B(
row, col));
110 typename SizeT,
typename DistanceT>
115 const value_type * data_A = detail::extract_raw_pointer<value_type>(proxy.lhs());
116 value_type * data_B = detail::extract_raw_pointer<value_type>(temp_trans);
136 vcl_size_t row_count = A_size1 / sub_mat_size;
137 vcl_size_t col_count = A_size2 / sub_mat_size;
139 vcl_size_t row_count_remainder = A_size1 % sub_mat_size;
140 vcl_size_t col_count_remainder = A_size2 % sub_mat_size;
142 if (proxy.lhs().row_major())
144 #ifdef VIENNACL_WITH_OPENMP
145 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
147 for(
long i = 0; i < static_cast<long>(row_count*col_count); ++i)
153 , A_start2 + A_inc2 * (col * sub_mat_size), A_inc1
154 , A_inc2, A_internal_size1, A_internal_size2);
156 , B_start2 + B_inc2 * (row * sub_mat_size), B_inc1
157 , B_inc2, B_internal_size1, B_internal_size2);
158 for(
vcl_size_t j = 0; j < (sub_mat_size); ++j)
159 for(
vcl_size_t k = 0; k < (sub_mat_size); ++k)
160 wrapper_B(j, k) = wrapper_A(k, j);
164 , A_start2 + A_inc2 * (col_count * sub_mat_size), A_inc1
165 , A_inc2, A_internal_size1, A_internal_size2);
168 , B_inc2, B_internal_size1, B_internal_size2);
169 for(
vcl_size_t j = 0; j < col_count_remainder; ++j)
171 wrapper_B(j, k) = wrapper_A(k, j);
176 , A_inc2, A_internal_size1, A_internal_size2);
178 , B_start2 + B_inc2 * (row_count * sub_mat_size), B_inc1
179 , B_inc2, B_internal_size1, B_internal_size2);
180 for(
vcl_size_t j = 0; j < row_count_remainder; ++j)
181 for(
vcl_size_t k = 0; k < (A_size2 - col_count_remainder); ++k)
182 wrapper_B(k, j) = wrapper_A(j, k);
187 #ifdef VIENNACL_WITH_OPENMP
188 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
190 for(
long i = 0; i < static_cast<long>(row_count*col_count); ++i)
196 , A_start2 + A_inc2 * (col * sub_mat_size), A_inc1
197 , A_inc2, A_internal_size1, A_internal_size2);
199 , B_start2 + B_inc2 * (row * sub_mat_size), B_inc1
200 , B_inc2, B_internal_size1, B_internal_size2);
201 for(
vcl_size_t j = 0; j < (sub_mat_size); ++j)
202 for(
vcl_size_t k = 0; k < (sub_mat_size); ++k)
203 wrapper_B(k, j)=wrapper_A(j, k);
207 , A_start2 + A_inc2 * (col_count * sub_mat_size), A_inc1
208 , A_inc2, A_internal_size1, A_internal_size2);
211 , B_inc2, B_internal_size1, B_internal_size2);
212 for(
vcl_size_t j = 0; j < col_count_remainder; ++j)
214 wrapper_B(j, k)=wrapper_A(k, j);
219 , A_inc2, A_internal_size1, A_internal_size2);
221 , B_start2 + B_inc2 * (row_count * sub_mat_size), B_inc1
222 , B_inc2, B_internal_size1, B_internal_size2);
223 for(
vcl_size_t j = 0; j < row_count_remainder; ++j)
224 for(
vcl_size_t k = 0; k < (A_size2 - col_count_remainder); ++k)
225 wrapper_B(k, j)=wrapper_A(j, k);
230 template<
typename NumericT,
typename ScalarT1>
234 assert(mat1.
row_major() == mat2.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
238 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
239 value_type
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
241 value_type data_alpha = alpha;
243 data_alpha = -data_alpha;
266 if (reciprocal_alpha)
268 #ifdef VIENNACL_WITH_OPENMP
269 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
271 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
272 for (
vcl_size_t col = 0; col < A_size2; ++col)
273 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha;
277 #ifdef VIENNACL_WITH_OPENMP
278 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
280 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
281 for (
vcl_size_t col = 0; col < A_size2; ++col)
282 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha;
290 if (reciprocal_alpha)
292 #ifdef VIENNACL_WITH_OPENMP
293 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
295 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
297 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha;
301 #ifdef VIENNACL_WITH_OPENMP
302 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
304 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
306 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha;
313 typename ScalarT1,
typename ScalarT2>
322 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
323 value_type
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
324 value_type
const * data_C = detail::extract_raw_pointer<value_type>(mat3);
326 value_type data_alpha = alpha;
328 data_alpha = -data_alpha;
330 value_type data_beta = beta;
332 data_beta = -data_beta;
363 if (reciprocal_alpha && reciprocal_beta)
365 #ifdef VIENNACL_WITH_OPENMP
366 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
368 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
369 for (
vcl_size_t col = 0; col < A_size2; ++col)
370 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
372 else if (reciprocal_alpha && !reciprocal_beta)
374 #ifdef VIENNACL_WITH_OPENMP
375 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
377 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
378 for (
vcl_size_t col = 0; col < A_size2; ++col)
379 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
381 else if (!reciprocal_alpha && reciprocal_beta)
383 #ifdef VIENNACL_WITH_OPENMP
384 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
386 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
387 for (
vcl_size_t col = 0; col < A_size2; ++col)
388 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
390 else if (!reciprocal_alpha && !reciprocal_beta)
392 #ifdef VIENNACL_WITH_OPENMP
393 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
395 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
396 for (
vcl_size_t col = 0; col < A_size2; ++col)
397 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
406 if (reciprocal_alpha && reciprocal_beta)
408 #ifdef VIENNACL_WITH_OPENMP
409 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
411 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
413 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
415 else if (reciprocal_alpha && !reciprocal_beta)
417 #ifdef VIENNACL_WITH_OPENMP
418 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
420 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
422 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
424 else if (!reciprocal_alpha && reciprocal_beta)
426 #ifdef VIENNACL_WITH_OPENMP
427 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
429 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
431 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
433 else if (!reciprocal_alpha && !reciprocal_beta)
435 #ifdef VIENNACL_WITH_OPENMP
436 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
438 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
440 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
448 typename ScalarT1,
typename ScalarT2>
457 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
458 value_type
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
459 value_type
const * data_C = detail::extract_raw_pointer<value_type>(mat3);
461 value_type data_alpha = alpha;
463 data_alpha = -data_alpha;
465 value_type data_beta = beta;
467 data_beta = -data_beta;
498 if (reciprocal_alpha && reciprocal_beta)
500 #ifdef VIENNACL_WITH_OPENMP
501 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
503 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
504 for (
vcl_size_t col = 0; col < A_size2; ++col)
505 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
507 else if (reciprocal_alpha && !reciprocal_beta)
509 #ifdef VIENNACL_WITH_OPENMP
510 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
512 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
513 for (
vcl_size_t col = 0; col < A_size2; ++col)
514 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
516 else if (!reciprocal_alpha && reciprocal_beta)
518 #ifdef VIENNACL_WITH_OPENMP
519 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
521 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
522 for (
vcl_size_t col = 0; col < A_size2; ++col)
523 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
525 else if (!reciprocal_alpha && !reciprocal_beta)
527 #ifdef VIENNACL_WITH_OPENMP
528 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
530 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
531 for (
vcl_size_t col = 0; col < A_size2; ++col)
532 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
541 if (reciprocal_alpha && reciprocal_beta)
543 #ifdef VIENNACL_WITH_OPENMP
544 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
546 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
548 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
550 else if (reciprocal_alpha && !reciprocal_beta)
552 #ifdef VIENNACL_WITH_OPENMP
553 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
555 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
557 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
559 else if (!reciprocal_alpha && reciprocal_beta)
561 #ifdef VIENNACL_WITH_OPENMP
562 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
564 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
566 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
568 else if (!reciprocal_alpha && !reciprocal_beta)
570 #ifdef VIENNACL_WITH_OPENMP
571 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
573 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
575 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
584 template<
typename NumericT>
589 value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
590 value_type alpha =
static_cast<value_type
>(s);
605 #ifdef VIENNACL_WITH_OPENMP
606 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
608 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
609 for (
vcl_size_t col = 0; col < A_size2; ++col)
610 wrapper_A(static_cast<vcl_size_t>(
row), col) = alpha;
618 #ifdef VIENNACL_WITH_OPENMP
619 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
621 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
623 wrapper_A(
row, static_cast<vcl_size_t>(col)) = alpha;
631 template<
typename NumericT>
636 value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
637 value_type alpha =
static_cast<value_type
>(s);
652 #ifdef VIENNACL_WITH_OPENMP
653 #pragma omp parallel for if ((A_size1*A_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
655 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
656 wrapper_A(
row,
row) = alpha;
662 #ifdef VIENNACL_WITH_OPENMP
663 #pragma omp parallel for if ((A_size1*A_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
665 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
666 wrapper_A(
row,
row) = alpha;
670 template<
typename NumericT>
675 value_type *data_A = detail::extract_raw_pointer<value_type>(mat);
676 value_type
const *data_vec = detail::extract_raw_pointer<value_type>(vec);
706 wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
713 wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
717 template<
typename NumericT>
722 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
723 value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
751 data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
758 data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
762 template<
typename NumericT>
767 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
768 value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
788 data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
795 data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
799 template<
typename NumericT>
804 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
805 value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
825 data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
832 data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
847 template<
typename NumericT,
typename OpT>
851 assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
856 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
857 value_type
const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
858 value_type
const * data_C = detail::extract_raw_pointer<value_type>(proxy.rhs());
889 #ifdef VIENNACL_WITH_OPENMP
890 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
892 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
893 for (
vcl_size_t col = 0; col < A_size2; ++col)
894 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col), wrapper_C(
row, col));
905 #ifdef VIENNACL_WITH_OPENMP
906 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
908 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
910 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col), wrapper_C(
row, col));
921 template<
typename NumericT,
typename OpT>
925 assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
930 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
931 value_type
const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
954 #ifdef VIENNACL_WITH_OPENMP
955 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
957 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
958 for (
vcl_size_t col = 0; col < A_size2; ++col)
959 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col));
966 #ifdef VIENNACL_WITH_OPENMP
967 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
969 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
971 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col));
992 template<
typename NumericT>
999 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
1000 value_type
const * data_x = detail::extract_raw_pointer<value_type>(vec);
1001 value_type * data_result = detail::extract_raw_pointer<value_type>(result);
1023 #ifdef VIENNACL_WITH_OPENMP
1025 thread_count = omp_get_max_threads();
1027 std::vector<value_type> temp_array(A_size2*thread_count, 0);
1030 for (
vcl_size_t col = 0; col < A_size2; ++col)
1031 wrapper_res(col) = 0;
1033 #ifdef VIENNACL_WITH_OPENMP
1034 #pragma omp parallel if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1038 #ifdef VIENNACL_WITH_OPENMP
1040 id = omp_get_thread_num();
1042 vcl_size_t begin = (A_size1 * id) / thread_count;
1043 vcl_size_t end = (A_size1 * (
id + 1)) / thread_count;
1050 value_type temp = wrapper_vec(
row);
1051 for (
vcl_size_t col = 0; col < A_size2; ++col)
1052 temp_array[A_size2 *
id + col] += wrapper_mat(
row , col) * temp;
1055 for (
vcl_size_t id = 0;
id < thread_count; ++id)
1056 for (
vcl_size_t col = 0; col < A_size2; ++col)
1057 wrapper_res(col) += temp_array[A_size2 *
id + col];
1062 #ifdef VIENNACL_WITH_OPENMP
1063 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1065 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
1067 value_type temp = 0;
1068 for (
vcl_size_t col = 0; col < A_size2; ++col)
1069 temp += data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(
row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
1071 data_result[
static_cast<vcl_size_t>(
row) * inc2 + start2] = temp;
1080 #ifdef VIENNACL_WITH_OPENMP
1082 thread_count = omp_get_max_threads();
1084 std::vector<value_type> temp_array(A_size1*thread_count, 0);
1088 wrapper_res(
row) = 0;
1090 #ifdef VIENNACL_WITH_OPENMP
1091 #pragma omp parallel if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1095 #ifdef VIENNACL_WITH_OPENMP
1097 id = omp_get_thread_num();
1099 vcl_size_t begin = (A_size2 * id) / thread_count;
1100 vcl_size_t end = (A_size2 * (
id + 1)) / thread_count;
1105 for (
vcl_size_t col = 0; col < (end - begin); ++col)
1107 value_type temp = wrapper_vec(col);
1109 temp_array[A_size1 *
id +
row] += wrapper_mat(
row , col) * temp;
1112 for (
vcl_size_t id = 0;
id < thread_count; ++id)
1114 wrapper_res(
row) += temp_array[A_size1 *
id +
row];
1118 #ifdef VIENNACL_WITH_OPENMP
1119 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1121 for (
long row = 0; row < static_cast<long>(A_size2); ++
row)
1123 value_type temp = 0;
1124 for (
vcl_size_t col = 0; col < A_size1; ++col)
1125 temp += data_A[
viennacl::column_major::mem_index(col * A_inc1 + A_start1, static_cast<vcl_size_t>(
row) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
1127 data_result[
static_cast<vcl_size_t>(
row) * inc2 + start2] = temp;
1141 template<
typename MatrixAccT1,
typename MatrixAccT2,
typename MatrixAccT3,
typename NumericT>
1142 void prod(MatrixAccT1 & A, MatrixAccT2 & B, MatrixAccT3 & C,
1146 if (C_size1 == 0 || C_size2 == 0 || A_size2 == 0)
1151 vcl_size_t num_blocks_C1 = (C_size1 - 1) / blocksize + 1;
1152 vcl_size_t num_blocks_C2 = (C_size2 - 1) / blocksize + 1;
1153 vcl_size_t num_blocks_A2 = (A_size2 - 1) / blocksize + 1;
1158 #ifdef VIENNACL_WITH_OPENMP
1159 #pragma omp parallel for if ((C_size1*C_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1161 for (
long block_idx_i2=0; block_idx_i2<static_cast<long>(num_blocks_C1); ++block_idx_i2)
1164 std::vector<NumericT> buffer_A(blocksize * blocksize);
1165 std::vector<NumericT> buffer_B(blocksize * blocksize);
1166 std::vector<NumericT> buffer_C(blocksize * blocksize);
1169 for (
vcl_size_t block_idx_j=0; block_idx_j<num_blocks_C2; ++block_idx_j)
1178 for (
vcl_size_t block_idx_k=0; block_idx_k<num_blocks_A2; ++block_idx_k)
1189 buffer_A[(i - offset_i) * blocksize + (k - offset_k)] = A(i, k);
1193 buffer_B[(k - offset_k) + (j - offset_j) * blocksize] = B(k, j);
1198 NumericT const * ptrA = &(buffer_A[i*blocksize]);
1201 NumericT const * ptrB = &(buffer_B[j*blocksize]);
1205 temp += ptrA[k] * ptrB[k];
1207 buffer_C[i*blocksize + j] += temp;
1213 if (beta > 0 || beta < 0)
1217 C(i,j) = beta * C(i,j) + alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1223 C(i,j) = alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1238 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2 >
1247 value_type
const * data_A = detail::extract_raw_pointer<value_type>(A);
1248 value_type
const * data_B = detail::extract_raw_pointer<value_type>(B);
1249 value_type * data_C = detail::extract_raw_pointer<value_type>(C);
1276 if (!trans_A && !trans_B)
1284 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1292 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1300 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1308 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1316 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1324 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1332 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1340 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1343 else if (!trans_A && trans_B)
1351 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1359 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1367 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1375 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1383 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1391 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1399 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1407 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1410 else if (trans_A && !trans_B)
1418 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1426 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1434 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1442 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1450 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1458 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1466 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1474 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1477 else if (trans_A && trans_B)
1485 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1493 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1501 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1509 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1517 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1525 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1533 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1541 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1565 template<
typename NumericT,
typename ScalarT>
1567 ScalarT
const & alpha,
vcl_size_t ,
bool reciprocal_alpha,
bool flip_sign_alpha,
1573 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
1574 value_type
const * data_v1 = detail::extract_raw_pointer<value_type>(vec1);
1575 value_type
const * data_v2 = detail::extract_raw_pointer<value_type>(vec2);
1592 value_type data_alpha = alpha;
1593 if (flip_sign_alpha)
1594 data_alpha = -data_alpha;
1598 if(reciprocal_alpha)
1600 #ifdef VIENNACL_WITH_OPENMP
1601 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1603 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
1605 value_type value_v1 = data_v1[
static_cast<vcl_size_t>(
row) * inc1 + start1] / data_alpha;
1606 for (
vcl_size_t col = 0; col < A_size2; ++col)
1607 data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(
row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2];
1612 #ifdef VIENNACL_WITH_OPENMP
1613 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1615 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
1617 value_type value_v1 = data_v1[
static_cast<vcl_size_t>(
row) * inc1 + start1] * data_alpha;
1618 for (
vcl_size_t col = 0; col < A_size2; ++col)
1619 data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(
row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2];
1625 if(reciprocal_alpha)
1627 #ifdef VIENNACL_WITH_OPENMP
1628 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1630 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
1632 value_type value_v2 = data_v2[
static_cast<vcl_size_t>(col) * inc2 + start2] / data_alpha;
1634 data_A[
viennacl::column_major::mem_index(
row * A_inc1 + A_start1, static_cast<vcl_size_t>(col) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[
row * inc1 + start1] * value_v2;
1639 #ifdef VIENNACL_WITH_OPENMP
1640 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1642 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
1644 value_type value_v2 = data_v2[
static_cast<vcl_size_t>(col) * inc2 + start2] * data_alpha;
1646 data_A[
viennacl::column_major::mem_index(
row * A_inc1 + A_start1, static_cast<vcl_size_t>(col) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[
row * inc1 + start1] * value_v2;
1660 template <
typename NumericT,
typename S1>
1669 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1670 value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1671 value_type * data_S = detail::extract_raw_pointer<value_type>(S);
1691 #ifdef VIENNACL_WITH_OPENMP
1692 #pragma omp parallel for if ((size1*size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1694 for(
long i2 = 0; i2 < long(size) - 1; i2++)
1697 data_D[start1 + inc1 * i] = data_A[
viennacl::row_major::mem_index(i * A_inc1 + A_start1, i * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1698 data_S[start2 + inc2 * (i + 1)] = data_A[
viennacl::row_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1700 data_D[start1 + inc1 * (size-1)] = data_A[
viennacl::row_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1705 #ifdef VIENNACL_WITH_OPENMP
1706 #pragma omp parallel for if ((size1*size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1708 for(
long i2 = 0; i2 < long(size) - 1; i2++)
1712 data_S[start2 + inc2 * (i + 1)] = data_A[
viennacl::column_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1714 data_D[start1 + inc1 * (size-1)] = data_A[
viennacl::column_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1721 template <
typename NumericT,
typename VectorType>
1738 template <
typename NumericT>
1747 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1748 value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1767 for(
vcl_size_t j = row_start; j < A_size1; j++)
1768 ss = ss + data_D[start1 + inc1 * j] * data_A[
viennacl::row_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1769 #ifdef VIENNACL_WITH_OPENMP
1770 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1772 for(
long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1773 data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1774 data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] -
1775 (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)]* ss);
1783 for(
vcl_size_t j = row_start; j < A_size1; j++)
1784 ss = ss + data_D[start1 + inc1 * j] * data_A[
viennacl::column_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1785 #ifdef VIENNACL_WITH_OPENMP
1786 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1788 for(
long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1791 (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)]* ss);
1803 template <
typename NumericT>
1810 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1811 value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1831 ss = ss + (data_D[start1 + inc1 * j] * data_A[
viennacl::row_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1834 #ifdef VIENNACL_WITH_OPENMP
1835 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1837 for(
long j = 0; j < static_cast<long>(A_size2); j++)
1839 data_A[
viennacl::row_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)] * sum_Av);
1848 ss = ss + (data_D[start1 + inc1 * j] * data_A[
viennacl::column_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1851 #ifdef VIENNACL_WITH_OPENMP
1852 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1854 for(
long j = 0; j < static_cast<long>(A_size2); j++)
1856 data_A[
viennacl::column_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)] * sum_Av);
1869 template <
typename NumericT>
1895 template<
typename NumericT>
1905 value_type * data_Q = detail::extract_raw_pointer<value_type>(Q);
1906 value_type * data_tmp1 = detail::extract_raw_pointer<value_type>(tmp1);
1907 value_type * data_tmp2 = detail::extract_raw_pointer<value_type>(tmp2);
1925 for(
int i = m - 1; i >= l; i--)
1927 #ifdef VIENNACL_WITH_OPENMP
1928 #pragma omp parallel for if ((Q_size1*Q_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1930 for(
long k = 0; k < static_cast<long>(Q_size1); k++)
1948 for(
int i = m - 1; i >= l; i--)
1950 #ifdef VIENNACL_WITH_OPENMP
1951 #pragma omp parallel for if ((Q_size1*Q_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1953 for(
long k = 0; k < static_cast<long>(Q_size1); k++)
1982 template <
typename NumericT,
typename S1>
1988 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1989 value_type * data_V = detail::extract_raw_pointer<value_type>(V);
2004 #ifdef VIENNACL_WITH_OPENMP
2005 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
2007 for(
long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
2009 data_V[i -
static_cast<long>(row_start)] = data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
2014 #ifdef VIENNACL_WITH_OPENMP
2015 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
2017 for(
long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
2019 data_V[i -
static_cast<long>(row_start)] = data_A[
viennacl::column_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
2027 #ifdef VIENNACL_WITH_OPENMP
2028 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
2030 for(
long i = static_cast<long>(col_start); i < static_cast<long>(A_size2); i++)
2032 data_V[i -
static_cast<long>(col_start)] = data_A[
viennacl::row_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
2037 #ifdef VIENNACL_WITH_OPENMP
2038 #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
2040 for(
long i = static_cast<long>(col_start); i < static_cast<long>(A_size2); i++)
2042 data_V[i -
static_cast<long>(col_start)] = data_A[
viennacl::column_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
void fill(MatrixType &matrix, vcl_size_t row_index, vcl_size_t col_index, NumericT value)
Generic filler routine for setting an entry of a matrix to a particular value.
Helper class for accessing a strided subvector of a larger vector.
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
void bidiag_pack_impl(matrix_base< NumericT > &A, vector_base< S1 > &D, vector_base< S1 > &S)
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
void matrix_diag_to_vector(const matrix_base< NumericT > &mat, int k, vector_base< NumericT > &vec)
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Worker class for decomposing expression templates.
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Determines row and column increments for matrices and matrix proxies.
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
Helper array for accessing a strided submatrix embedded in a larger matrix.
void copy_vec(matrix_base< NumericT > &A, vector_base< S1 > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
This function copies a row or a column from a matrix to a vector.
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
#define VIENNACL_OPENMP_MATRIX_MIN_SIZE
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Common routines for single-threaded or OpenMP-enabled execution on CPU.
Proxy classes for vectors.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
void scaled_rank_1_update(matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void matrix_row(const matrix_base< NumericT > &mat, unsigned int i, vector_base< NumericT > &vec)
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void givens_next(matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
This function updates the matrix Q. It is part of the tql2 algorithm.
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
A tag class representing transposed matrices.
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
void prod(MatrixAccT1 &A, MatrixAccT2 &B, MatrixAccT3 &C, vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2, NumericT alpha, NumericT beta)
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
T min(const T &lhs, const T &rhs)
Minimum.
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Implementation of the ViennaCL scalar class.
A collection of compile time type deductions.
Simple enable-if variant that uses the SFINAE pattern.