1 #ifndef VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
36 #ifdef VIENNACL_WITH_OPENMP
52 template<
typename NumericT,
unsigned int AlignmentV>
57 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
58 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
59 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
60 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
65 unsigned int row_end = row_buffer[
row+1];
67 switch (info_selector)
70 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
71 value = std::max<NumericT>(value, std::fabs(elements[i]));
75 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
76 value += std::fabs(elements[i]);
80 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
81 value += elements[i] * elements[i];
82 value = std::sqrt(value);
86 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
88 if (col_buffer[i] ==
row)
96 result_buf[
row] = value;
110 template<
typename NumericT,
unsigned int AlignmentV>
117 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
118 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
119 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
120 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
121 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
123 #ifdef VIENNACL_WITH_OPENMP
124 #pragma omp parallel for
126 for (
long row = 0; row < static_cast<long>(mat.
size1()); ++
row)
131 dot_prod += elements[i] * vec_buf[col_buffer[i] * vec.
stride() + vec.
start()];
133 if (beta < 0 || beta > 0)
136 result_buf[index] = alpha * dot_prod + beta * result_buf[index];
152 template<
typename NumericT,
unsigned int AlignmentV>
157 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
158 unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle1());
159 unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
161 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
162 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
179 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
181 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
184 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
186 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
189 #ifdef VIENNACL_WITH_OPENMP
190 #pragma omp parallel for
192 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
197 for (
vcl_size_t k = row_start; k < row_end; ++k) {
198 temp += sp_mat_elements[k] * d_mat_wrapper_row(static_cast<vcl_size_t>(sp_mat_col_buffer[k]), col);
201 result_wrapper_row(
row, col) = temp;
203 result_wrapper_col(
row, col) = temp;
208 #ifdef VIENNACL_WITH_OPENMP
209 #pragma omp parallel for
211 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
212 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
216 for (
vcl_size_t k = row_start; k < row_end; ++k) {
217 temp += sp_mat_elements[k] * d_mat_wrapper_col(static_cast<vcl_size_t>(sp_mat_col_buffer[k]), static_cast<vcl_size_t>(col));
220 result_wrapper_row(
row, col) = temp;
222 result_wrapper_col(
row, col) = temp;
238 template<
typename NumericT,
unsigned int AlignmentV>
245 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
246 unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle1());
247 unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
249 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
250 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
267 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
269 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
272 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
274 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
276 if ( d_mat.lhs().row_major() ) {
277 #ifdef VIENNACL_WITH_OPENMP
278 #pragma omp parallel for
280 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
283 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
285 for (
vcl_size_t k = row_start; k < row_end; ++k) {
286 temp += sp_mat_elements[k] * d_mat_wrapper_row(col, static_cast<vcl_size_t>(sp_mat_col_buffer[k]));
289 result_wrapper_row(
row, col) = temp;
291 result_wrapper_col(
row, col) = temp;
296 #ifdef VIENNACL_WITH_OPENMP
297 #pragma omp parallel for
299 for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
304 for (
vcl_size_t k = row_start; k < row_end; ++k) {
305 temp += sp_mat_elements[k] * d_mat_wrapper_col(col, static_cast<vcl_size_t>(sp_mat_col_buffer[k]));
308 result_wrapper_row(
row, col) = temp;
310 result_wrapper_col(
row, col) = temp;
327 template<
typename NumericT,
unsigned int AlignmentV>
333 NumericT const * A_elements = detail::extract_raw_pointer<NumericT>(A.
handle());
334 unsigned int const * A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
335 unsigned int const * A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
337 NumericT const * B_elements = detail::extract_raw_pointer<NumericT>(B.
handle());
338 unsigned int const * B_row_buffer = detail::extract_raw_pointer<unsigned int>(B.
handle1());
339 unsigned int const * B_col_buffer = detail::extract_raw_pointer<unsigned int>(B.
handle2());
342 unsigned int * C_row_buffer = detail::extract_raw_pointer<unsigned int>(C.
handle1());
344 #if defined(VIENNACL_WITH_OPENMP)
345 unsigned int block_factor = 10;
346 unsigned int max_threads = omp_get_max_threads();
347 long chunk_size = long(A.
size1()) /
long(block_factor * max_threads) + 1;
349 unsigned int max_threads = 1;
351 std::vector<unsigned int> max_length_row_C(max_threads);
352 std::vector<unsigned int *> row_C_temp_index_buffers(max_threads);
353 std::vector<NumericT *> row_C_temp_value_buffers(max_threads);
360 #if defined(VIENNACL_WITH_OPENMP)
361 #pragma omp parallel for schedule(dynamic, chunk_size)
363 for (
long i=0; i<long(A.
size1()); ++i)
365 unsigned int row_start_A = A_row_buffer[i];
366 unsigned int row_end_A = A_row_buffer[i+1];
368 unsigned int row_C_upper_bound_row = 0;
369 for (
unsigned int j = row_start_A; j<row_end_A; ++j)
371 unsigned int row_B = A_col_buffer[j];
373 unsigned int entries_in_row = B_row_buffer[row_B+1] - B_row_buffer[row_B];
374 row_C_upper_bound_row += entries_in_row;
377 #ifdef VIENNACL_WITH_OPENMP
378 unsigned int thread_id = omp_get_thread_num();
380 unsigned int thread_id = 0;
383 max_length_row_C[thread_id] =
std::max(max_length_row_C[thread_id],
std::min(row_C_upper_bound_row, static_cast<unsigned int>(B.
size2())));
387 for (std::size_t i=1; i<max_length_row_C.size(); ++i)
388 max_length_row_C[0] =
std::max(max_length_row_C[0], max_length_row_C[i]);
391 for (
unsigned int i=0; i<max_threads; ++i)
392 row_C_temp_index_buffers[i] = (
unsigned int *)malloc(
sizeof(
unsigned int)*3*max_length_row_C[0]);
399 #ifdef VIENNACL_WITH_OPENMP
400 #pragma omp parallel for schedule(dynamic, chunk_size)
402 for (
long i=0; i<long(A.
size1()); ++i)
404 unsigned int thread_id = 0;
405 #ifdef VIENNACL_WITH_OPENMP
406 thread_id = omp_get_thread_num();
408 unsigned int buffer_len = max_length_row_C[0];
410 unsigned int *row_C_vector_1 = row_C_temp_index_buffers[thread_id];
411 unsigned int *row_C_vector_2 = row_C_vector_1 + buffer_len;
412 unsigned int *row_C_vector_3 = row_C_vector_2 + buffer_len;
414 unsigned int row_start_A = A_row_buffer[i];
415 unsigned int row_end_A = A_row_buffer[i+1];
418 B_row_buffer, B_col_buffer, static_cast<unsigned int>(B.
size2()),
419 row_C_vector_1, row_C_vector_2, row_C_vector_3);
423 unsigned int current_offset = 0;
424 for (std::size_t i=0; i<C.
size1(); ++i)
426 unsigned int tmp = C_row_buffer[i];
427 C_row_buffer[i] = current_offset;
428 current_offset += tmp;
430 C_row_buffer[C.
size1()] = current_offset;
431 C.
reserve(current_offset,
false);
434 for (
unsigned int i=0; i<max_threads; ++i)
435 row_C_temp_value_buffers[i] = (
NumericT *)malloc(
sizeof(
NumericT)*3*max_length_row_C[0]);
440 NumericT * C_elements = detail::extract_raw_pointer<NumericT>(C.
handle());
441 unsigned int * C_col_buffer = detail::extract_raw_pointer<unsigned int>(C.
handle2());
443 #ifdef VIENNACL_WITH_OPENMP
444 #pragma omp parallel for schedule(dynamic, chunk_size)
446 for (
long i = 0; i < long(A.
size1()); ++i)
448 unsigned int row_start_A = A_row_buffer[i];
449 unsigned int row_end_A = A_row_buffer[i+1];
451 unsigned int row_C_buffer_start = C_row_buffer[i];
452 unsigned int row_C_buffer_end = C_row_buffer[i+1];
454 #ifdef VIENNACL_WITH_OPENMP
455 unsigned int thread_id = omp_get_thread_num();
457 unsigned int thread_id = 0;
460 unsigned int *row_C_vector_1 = row_C_temp_index_buffers[thread_id];
461 unsigned int *row_C_vector_2 = row_C_vector_1 + max_length_row_C[0];
462 unsigned int *row_C_vector_3 = row_C_vector_2 + max_length_row_C[0];
464 NumericT *row_C_vector_1_values = row_C_temp_value_buffers[thread_id];
465 NumericT *row_C_vector_2_values = row_C_vector_1_values + max_length_row_C[0];
466 NumericT *row_C_vector_3_values = row_C_vector_2_values + max_length_row_C[0];
469 B_row_buffer, B_col_buffer, B_elements, static_cast<unsigned int>(B.
size2()),
470 row_C_buffer_start, row_C_buffer_end, C_col_buffer, C_elements,
471 row_C_vector_1, row_C_vector_1_values,
472 row_C_vector_2, row_C_vector_2_values,
473 row_C_vector_3, row_C_vector_3_values);
477 for (
unsigned int i=0; i<max_threads; ++i)
479 free(row_C_temp_index_buffers[i]);
480 free(row_C_temp_value_buffers[i]);
493 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
495 IndexArrayT
const & col_buffer,
496 ConstScalarArrayT
const & element_buffer,
497 ScalarArrayT & vec_buffer,
506 for (
vcl_size_t i = row_begin; i < row_end; ++i)
510 vec_entry -= vec_buffer[col_index] * element_buffer[i];
512 vec_buffer[
row] = vec_entry;
517 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
519 IndexArrayT
const & col_buffer,
520 ConstScalarArrayT
const & element_buffer,
521 ScalarArrayT & vec_buffer,
533 for (
vcl_size_t i = row_begin; i < row_end; ++i)
537 vec_entry -= vec_buffer[col_index] * element_buffer[i];
538 else if (col_index ==
row)
539 diagonal_entry = element_buffer[i];
542 vec_buffer[
row] = vec_entry / diagonal_entry;
548 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
550 IndexArrayT
const & col_buffer,
551 ConstScalarArrayT
const & element_buffer,
552 ScalarArrayT & vec_buffer,
556 for (
vcl_size_t row2 = 1; row2 < num_cols; ++row2)
562 for (
vcl_size_t i = row_begin; i < row_end; ++i)
566 vec_entry -= vec_buffer[col_index] * element_buffer[i];
568 vec_buffer[
row] = vec_entry;
572 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
574 IndexArrayT
const & col_buffer,
575 ConstScalarArrayT
const & element_buffer,
576 ScalarArrayT & vec_buffer,
580 for (
vcl_size_t row2 = 0; row2 < num_cols; ++row2)
589 for (
vcl_size_t i = row_begin; i < row_end; ++i)
593 vec_entry -= vec_buffer[col_index] * element_buffer[i];
594 else if (col_index == row)
595 diagonal_entry = element_buffer[i];
598 vec_buffer[
row] = vec_entry / diagonal_entry;
612 template<
typename NumericT,
unsigned int AlignmentV>
617 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
618 NumericT const * elements = detail::extract_raw_pointer<NumericT>(L.
handle());
619 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
620 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
622 detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, L.
size2(), tag);
631 template<
typename NumericT,
unsigned int AlignmentV>
636 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
637 NumericT const * elements = detail::extract_raw_pointer<NumericT>(L.
handle());
638 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
639 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
641 detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, L.
size2(), tag);
651 template<
typename NumericT,
unsigned int AlignmentV>
656 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
657 NumericT const * elements = detail::extract_raw_pointer<NumericT>(U.
handle());
658 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
659 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
661 detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, U.
size2(), tag);
670 template<
typename NumericT,
unsigned int AlignmentV>
675 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
676 NumericT const * elements = detail::extract_raw_pointer<NumericT>(U.
handle());
677 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
678 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
680 detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, U.
size2(), tag);
695 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
697 IndexArrayT
const & col_buffer,
698 ConstScalarArrayT
const & element_buffer,
699 ScalarArrayT & vec_buffer,
704 for (
vcl_size_t col = 0; col < num_cols; ++col)
706 NumericT vec_entry = vec_buffer[col];
708 for (
vcl_size_t i = col_begin; i < col_end; ++i)
710 unsigned int row_index = col_buffer[i];
712 vec_buffer[row_index] -= vec_entry * element_buffer[i];
718 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
720 IndexArrayT
const & col_buffer,
721 ConstScalarArrayT
const & element_buffer,
722 ScalarArrayT & vec_buffer,
727 for (
vcl_size_t col = 0; col < num_cols; ++col)
733 for (
vcl_size_t i = col_begin; i < col_end; ++i)
736 if (row_index == col)
738 diagonal_entry = element_buffer[i];
744 NumericT vec_entry = vec_buffer[col] / diagonal_entry;
745 vec_buffer[col] = vec_entry;
746 for (
vcl_size_t i = col_begin; i < col_end; ++i)
750 vec_buffer[row_index] -= vec_entry * element_buffer[i];
756 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
758 IndexArrayT
const & col_buffer,
759 ConstScalarArrayT
const & element_buffer,
760 ScalarArrayT & vec_buffer,
764 for (
vcl_size_t col2 = 0; col2 < num_cols; ++col2)
768 NumericT vec_entry = vec_buffer[col];
771 for (
vcl_size_t i = col_begin; i < col_end; ++i)
775 vec_buffer[row_index] -= vec_entry * element_buffer[i];
781 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
783 IndexArrayT
const & col_buffer,
784 ConstScalarArrayT
const & element_buffer,
785 ScalarArrayT & vec_buffer,
789 for (
vcl_size_t col2 = 0; col2 < num_cols; ++col2)
797 for (
vcl_size_t i = col_begin; i < col_end; ++i)
800 if (row_index == col)
802 diagonal_entry = element_buffer[i];
808 NumericT vec_entry = vec_buffer[col] / diagonal_entry;
809 vec_buffer[col] = vec_entry;
810 for (
vcl_size_t i = col_begin; i < col_end; ++i)
814 vec_buffer[row_index] -= vec_entry * element_buffer[i];
823 template<
typename NumericT,
unsigned int AlignmentV>
834 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
835 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
836 NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.lhs().handle());
837 NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
840 for (
vcl_size_t col = 0; col < L.lhs().size1(); ++col)
842 NumericT vec_entry = vec_buffer[col];
844 for (
vcl_size_t i = col_begin; i < col_end; ++i)
846 unsigned int row_index = col_buffer[i];
848 vec_buffer[row_index] -= vec_entry * elements[i];
854 template<
typename NumericT,
unsigned int AlignmentV>
865 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
866 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
867 NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.lhs().handle());
868 NumericT const * diagonal_buffer = detail::extract_raw_pointer<NumericT>(L_diagonal.
handle());
869 NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
872 for (
vcl_size_t col = 0; col < L.lhs().size1(); ++col)
876 NumericT vec_entry = vec_buffer[col] / diagonal_buffer[col];
877 vec_buffer[col] = vec_entry;
878 for (
vcl_size_t i = col_begin; i < col_end; ++i)
882 vec_buffer[row_index] -= vec_entry * elements[i];
890 template<
typename NumericT,
unsigned int AlignmentV>
901 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
902 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
903 NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.lhs().handle());
904 NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
906 for (
vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
908 vcl_size_t col = (U.lhs().size1() - col2) - 1;
910 NumericT vec_entry = vec_buffer[col];
913 for (
vcl_size_t i = col_begin; i < col_end; ++i)
917 vec_buffer[row_index] -= vec_entry * elements[i];
923 template<
typename NumericT,
unsigned int AlignmentV>
934 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
935 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
936 NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.lhs().handle());
937 NumericT const * diagonal_buffer = detail::extract_raw_pointer<NumericT>(U_diagonal.
handle());
938 NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
940 for (
vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
942 vcl_size_t col = (U.lhs().size1() - col2) - 1;
947 NumericT vec_entry = vec_buffer[col] / diagonal_buffer[col];
948 vec_buffer[col] = vec_entry;
949 for (
vcl_size_t i = col_begin; i < col_end; ++i)
953 vec_buffer[row_index] -= vec_entry * elements[i];
967 template<
typename NumericT,
unsigned int AlignmentV>
974 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
975 NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
976 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
977 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
979 detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
988 template<
typename NumericT,
unsigned int AlignmentV>
995 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
996 NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
997 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
998 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
1000 detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
1010 template<
typename NumericT,
unsigned int AlignmentV>
1017 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1018 NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
1019 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
1020 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
1022 detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
1032 template<
typename NumericT,
unsigned int AlignmentV>
1039 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1040 NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
1041 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
1042 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
1044 detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
1061 template<
typename NumericT>
1068 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
1069 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1070 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1071 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
1072 unsigned int const * row_indices = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
1073 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1075 if (beta < 0 || beta > 0)
1078 result_buf[i * result.
stride() + result.
start()] *= beta;
1083 result_buf[i * result.
stride() + result.
start()] = 0;
1086 #ifdef VIENNACL_WITH_OPENMP
1087 #pragma omp parallel for
1089 for (
long i = 0; i < static_cast<long>(mat.
nnz1()); ++i)
1093 for (
vcl_size_t j = row_buffer[i]; j < row_end; ++j)
1094 dot_prod += elements[j] * vec_buf[col_buffer[j] * vec.
stride() + vec.
start()];
1096 if (beta > 0 || beta < 0)
1112 template<
typename NumericT,
unsigned int AlignmentV>
1117 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1118 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1119 unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle12());
1122 unsigned int last_row = 0;
1126 unsigned int current_row = coord_buffer[2*i];
1128 if (current_row != last_row)
1131 value = std::sqrt(value);
1133 result_buf[last_row] = value;
1135 last_row = current_row;
1138 switch (info_selector)
1141 value = std::max<NumericT>(value, std::fabs(elements[i]));
1145 value += std::fabs(elements[i]);
1149 value += elements[i] * elements[i];
1153 if (coord_buffer[2*i+1] == current_row)
1154 value = elements[i];
1163 value = std::sqrt(value);
1165 result_buf[last_row] = value;
1177 template<
typename NumericT,
unsigned int AlignmentV>
1184 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
1185 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1186 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1187 unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle12());
1189 if (beta < 0 || beta > 0)
1192 result_buf[i * result.
stride() + result.
start()] *= beta;
1197 result_buf[i * result.
stride() + result.
start()] = 0;
1201 result_buf[coord_buffer[2*i] * result.
stride() + result.
start()]
1202 += alpha * elements[i] * vec_buf[coord_buffer[2*i+1] * vec.
stride() + vec.
start()];
1213 template<
typename NumericT,
unsigned int AlignmentV>
1218 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
1219 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle12());
1221 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1222 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1239 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1241 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1244 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1246 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1250 #ifdef VIENNACL_WITH_OPENMP
1251 #pragma omp parallel for
1253 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1263 #ifdef VIENNACL_WITH_OPENMP
1264 #pragma omp parallel for
1266 for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
1271 NumericT y = d_mat_wrapper_row( c, col);
1273 result_wrapper_row(r, col) += x * y;
1275 result_wrapper_col(r, col) += x * y;
1282 #ifdef VIENNACL_WITH_OPENMP
1283 #pragma omp parallel for
1285 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col)
1295 #ifdef VIENNACL_WITH_OPENMP
1296 #pragma omp parallel for
1298 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
1305 NumericT y = d_mat_wrapper_col( c, col);
1308 result_wrapper_row( r, col) += x*y;
1310 result_wrapper_col( r, col) += x*y;
1327 template<
typename NumericT,
unsigned int AlignmentV>
1334 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
1335 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle12());
1337 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
1338 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1355 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1357 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1360 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1362 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1364 if ( d_mat.lhs().row_major() )
1366 #ifdef VIENNACL_WITH_OPENMP
1367 #pragma omp parallel for
1369 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1372 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1375 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1379 #ifdef VIENNACL_WITH_OPENMP
1380 #pragma omp parallel for
1382 for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
1388 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1389 NumericT y = d_mat_wrapper_row( col, c);
1390 result_wrapper_row(r, col) += x * y;
1395 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1396 NumericT y = d_mat_wrapper_row( col, c);
1397 result_wrapper_col(r, col) += x * y;
1406 #ifdef VIENNACL_WITH_OPENMP
1407 #pragma omp parallel for
1409 for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1419 #ifdef VIENNACL_WITH_OPENMP
1420 #pragma omp parallel for
1422 for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
1428 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1429 NumericT y = d_mat_wrapper_col( col, c);
1430 result_wrapper_row(r, col) += x * y;
1435 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1436 NumericT y = d_mat_wrapper_col( col, c);
1437 result_wrapper_col(r, col) += x * y;
1458 template<
typename NumericT,
unsigned int AlignmentV>
1465 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
1466 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1467 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1468 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1474 for (
unsigned int item_id = 0; item_id < mat.
internal_maxnnz(); ++item_id)
1479 if (val > 0 || val < 0)
1481 unsigned int col = coords[offset];
1482 sum += (vec_buf[col * vec.
stride() + vec.
start()] * val);
1486 if (beta < 0 || beta > 0)
1489 result_buf[index] = alpha * sum + beta * result_buf[index];
1504 template<
typename NumericT,
unsigned int AlignmentV>
1509 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
1510 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
1512 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1513 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1530 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1532 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1535 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1537 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1540 #ifdef VIENNACL_WITH_OPENMP
1541 #pragma omp parallel for
1543 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1553 #ifdef VIENNACL_WITH_OPENMP
1554 #pragma omp parallel for
1556 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1558 for (
long item_id = 0; item_id < static_cast<long>(sp_mat.
maxnnz()); ++item_id)
1564 if (sp_mat_val < 0 || sp_mat_val > 0)
1568 result_wrapper_row(static_cast<vcl_size_t>(
row), col) += sp_mat_val * d_mat_wrapper_row( sp_mat_col, col);
1571 result_wrapper_col(static_cast<vcl_size_t>(
row), col) += sp_mat_val * d_mat_wrapper_row( sp_mat_col, col);
1577 #ifdef VIENNACL_WITH_OPENMP
1578 #pragma omp parallel for
1580 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col)
1583 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1586 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1590 #ifdef VIENNACL_WITH_OPENMP
1591 #pragma omp parallel for
1593 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
1595 for (
unsigned int item_id = 0; item_id < sp_mat.
maxnnz(); ++item_id) {
1603 if (sp_mat_val < 0 || sp_mat_val > 0)
1606 result_wrapper_row(
row, col) += sp_mat_val * d_mat_wrapper_col( sp_mat_col, col);
1608 result_wrapper_col(
row, col) += sp_mat_val * d_mat_wrapper_col( sp_mat_col, col);
1626 template<
typename NumericT,
unsigned int AlignmentV>
1633 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
1634 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
1636 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
1637 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1654 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1656 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1659 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1661 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1663 if ( d_mat.lhs().row_major() )
1665 #ifdef VIENNACL_WITH_OPENMP
1666 #pragma omp parallel for
1668 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1671 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1674 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1678 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1680 for (
unsigned int item_id = 0; item_id < sp_mat.
maxnnz(); ++item_id) {
1688 if (sp_mat_val < 0 || sp_mat_val > 0)
1691 result_wrapper_row(
row, col) += sp_mat_val * d_mat_wrapper_row( col, sp_mat_col);
1693 result_wrapper_col(
row, col) += sp_mat_val * d_mat_wrapper_row( col, sp_mat_col);
1701 #ifdef VIENNACL_WITH_OPENMP
1702 #pragma omp parallel for
1704 for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1714 #ifdef VIENNACL_WITH_OPENMP
1715 #pragma omp parallel for
1719 for (
long item_id = 0; item_id < static_cast<long>(sp_mat.
maxnnz()); ++item_id) {
1725 if (sp_mat_val < 0 || sp_mat_val > 0)
1728 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1729 result_wrapper_row(
row, col) += sp_mat_val * d_mat_wrapper_col( col, sp_mat_col);
1731 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1732 result_wrapper_col(
row, col) += sp_mat_val * d_mat_wrapper_col( col, sp_mat_col);
1752 template<
typename NumericT,
typename IndexT>
1759 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
1760 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1761 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1762 IndexT
const * columns_per_block = detail::extract_raw_pointer<IndexT>(mat.
handle1());
1763 IndexT
const * column_indices = detail::extract_raw_pointer<IndexT>(mat.
handle2());
1764 IndexT
const * block_start = detail::extract_raw_pointer<IndexT>(mat.
handle3());
1768 #ifdef VIENNACL_WITH_OPENMP
1769 #pragma omp parallel for
1771 for (
long block_idx2 = 0; block_idx2 < static_cast<long>(num_blocks); ++block_idx2)
1774 vcl_size_t current_columns_per_block = columns_per_block[block_idx];
1778 for (IndexT column_entry_index = 0;
1779 column_entry_index < current_columns_per_block;
1780 ++column_entry_index)
1785 for (IndexT row_in_block = 0; row_in_block < mat.
rows_per_block(); ++row_in_block)
1787 NumericT val = elements[stride_start + row_in_block];
1789 result_values[row_in_block] += (val > 0 || val < 0) ? vec_buf[column_indices[stride_start + row_in_block] * vec.
stride() + vec.
start()] * val : 0;
1794 if (beta < 0 || beta > 0)
1796 for (IndexT row_in_block = 0; row_in_block < mat.
rows_per_block(); ++row_in_block)
1798 if (first_row_in_matrix + row_in_block < result.
size())
1801 result_buf[index] = alpha * result_values[row_in_block] + beta * result_buf[index];
1807 for (IndexT row_in_block = 0; row_in_block < mat.
rows_per_block(); ++row_in_block)
1809 if (first_row_in_matrix + row_in_block < result.
size())
1810 result_buf[(first_row_in_matrix + row_in_block) * result.
stride() + result.
start()] = alpha * result_values[row_in_block];
1828 template<
typename NumericT,
unsigned int AlignmentV>
1835 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
1836 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1837 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1838 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1839 NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.
handle5());
1840 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
1841 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
1851 for (
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
1856 if (val > 0 || val < 0)
1858 unsigned int col = coords[offset];
1859 sum += (vec_buf[col * vec.
stride() + vec.
start()] * val);
1869 for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1871 sum += (vec_buf[csr_col_buffer[item_id] * vec.
stride() + vec.
start()] * csr_elements[item_id]);
1874 if (beta < 0 || beta > 0)
1877 result_buf[index] = alpha * sum + beta * result_buf[index];
1896 template<
typename NumericT,
unsigned int AlignmentV>
1901 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1902 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1919 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1921 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1924 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1926 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1928 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1929 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1930 NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.
handle5());
1931 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
1932 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
1935 for (
vcl_size_t result_col = 0; result_col < result.
size2(); ++result_col)
1944 for (
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
1949 if (val < 0 || val > 0)
1953 sum += d_mat_wrapper_row(col, result_col) * val;
1955 sum += d_mat_wrapper_col(col, result_col) * val;
1966 for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1967 sum += d_mat_wrapper_row(static_cast<vcl_size_t>(csr_col_buffer[item_id]), result_col) * csr_elements[item_id];
1969 for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1970 sum += d_mat_wrapper_col(static_cast<vcl_size_t>(csr_col_buffer[item_id]), result_col) * csr_elements[item_id];
1973 result_wrapper_row(
row, result_col) =
sum;
1975 result_wrapper_col(
row, result_col) =
sum;
1989 template<
typename NumericT,
unsigned int AlignmentV>
1996 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1997 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
2014 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
2016 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
2019 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
2021 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
2023 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
2024 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
2025 NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.
handle5());
2026 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
2027 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
2030 for (
vcl_size_t result_col = 0; result_col < result.
size2(); ++result_col)
2039 for (
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
2044 if (val < 0 || val > 0)
2047 if (d_mat.lhs().row_major())
2048 sum += d_mat_wrapper_row(result_col, col) * val;
2050 sum += d_mat_wrapper_col(result_col, col) * val;
2060 if (d_mat.lhs().row_major())
2061 for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
2062 sum += d_mat_wrapper_row(result_col, static_cast<vcl_size_t>(csr_col_buffer[item_id])) * csr_elements[item_id];
2064 for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
2065 sum += d_mat_wrapper_col(result_col, static_cast<vcl_size_t>(csr_col_buffer[item_id])) * csr_elements[item_id];
2068 result_wrapper_row(
row, result_col) =
sum;
2070 result_wrapper_col(
row, result_col) =
sum;
const vcl_size_t & size2() const
Returns the number of columns.
vcl_size_t internal_ellnnz() const
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
const handle_type & handle3() const
const vcl_size_t & size1() const
Returns the number of rows.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
const handle_type & handle() const
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
A tag class representing a lower triangular matrix.
vcl_size_t internal_size1() const
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
vcl_size_t nnz() const
Returns the number of nonzero entries.
void dot_prod(MatrixT const &A, unsigned int beg_ind, NumericT &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
const handle_type & handle4() const
T max(const T &lhs, const T &rhs)
Maximum.
vcl_size_t rows_per_block() const
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
vcl_size_t internal_size1() const
size_type stride() const
Returns the stride within the buffer (in multiples of sizeof(NumericT))
const handle_type & handle2() const
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void row_C_scan_numeric_vector(unsigned int row_start_A, unsigned int row_end_A, unsigned int const *A_col_buffer, NumericT const *A_elements, unsigned int const *B_row_buffer, unsigned int const *B_col_buffer, NumericT const *B_elements, unsigned int B_size2, unsigned int row_start_C, unsigned int row_end_C, unsigned int *C_col_buffer, NumericT *C_elements, unsigned int *row_C_vector_1, NumericT *row_C_vector_1_values, unsigned int *row_C_vector_2, NumericT *row_C_vector_2_values, unsigned int *row_C_vector_3, NumericT *row_C_vector_3_values)
result_of::size_type< T >::type start2(T const &obj)
Helper array for accessing a strided submatrix embedded in a larger matrix.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
A tag class representing an upper triangular matrix.
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Sparse matrix class using the sliced ELLPACK with parameters C, .
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
unsigned int row_C_scan_symbolic_vector(unsigned int row_start_A, unsigned int row_end_A, unsigned int const *A_col_buffer, unsigned int const *B_row_buffer, unsigned int const *B_col_buffer, unsigned int B_size2, unsigned int *row_C_vector_1, unsigned int *row_C_vector_2, unsigned int *row_C_vector_3)
void block_inplace_solve(const matrix_expression< const compressed_matrix< NumericT, AlignmentV >, const compressed_matrix< NumericT, AlignmentV >, op_trans > &L, viennacl::backend::mem_handle const &, vcl_size_t, vector_base< NumericT > const &, vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag)
size_type size2() const
Returns the number of columns.
Common routines for single-threaded or OpenMP-enabled execution on CPU.
vcl_size_t maxnnz() const
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
size_type size() const
Returns the length of the vector (cf. std::vector)
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
void csr_inplace_solve(IndexArrayT const &row_buffer, IndexArrayT const &col_buffer, ConstScalarArrayT const &element_buffer, ScalarArrayT &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
A tag class representing a lower triangular matrix with unit diagonal.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
A tag class representing transposed matrices.
void row_info(compressed_matrix< NumericT, AlignmentV > const &mat, vector_base< NumericT > &vec, viennacl::linalg::detail::row_info_types info_selector)
A sparse square matrix in compressed sparse rows format.
T min(const T &lhs, const T &rhs)
Minimum.
const handle_type & handle5() const
size_type start() const
Returns the offset within the buffer.
vcl_size_t size1() const
Returns the number of rows.
vcl_size_t internal_maxnnz() const
Implementation of the ViennaCL scalar class.
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
const handle_type & handle() const
Returns the memory handle.
void csr_trans_inplace_solve(IndexArrayT const &row_buffer, IndexArrayT const &col_buffer, ConstScalarArrayT const &element_buffer, ScalarArrayT &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
A tag class representing an upper triangular matrix with unit diagonal.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
Implementations of NMF operations using a plain single-threaded or OpenMP-enabled execution on CPU...