ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ilu_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_ILU_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_ILU_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <cmath>
26 #include <algorithm> //for std::max and std::min
27 
28 #include "viennacl/forwards.h"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
33 #include "viennacl/traits/size.hpp"
38 
39 
40 // Minimum vector size for using OpenMP on vector operations:
41 #ifndef VIENNACL_OPENMP_ILU_MIN_SIZE
42  #define VIENNACL_OPENMP_ILU_MIN_SIZE 5000
43 #endif
44 
45 namespace viennacl
46 {
47 namespace linalg
48 {
49 namespace host_based
50 {
51 
52 template<typename NumericT>
55 {
56  // L is known to have correct dimensions
57 
58  unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.handle1());
59  unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.handle2());
60  NumericT const *A_elements = detail::extract_raw_pointer<NumericT>(A.handle());
61 
62  unsigned int *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
63 
64  //
65  // Step 1: Count elements in L
66  //
67 #ifdef VIENNACL_WITH_OPENMP
68  #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
69 #endif
70  for (long row = 0; row < static_cast<long>(A.size1()); ++row)
71  {
72  unsigned int col_begin = A_row_buffer[row];
73  unsigned int col_end = A_row_buffer[row+1];
74 
75  for (unsigned int j = col_begin; j < col_end; ++j)
76  {
77  unsigned int col = A_col_buffer[j];
78  if (long(col) <= row)
79  ++L_row_buffer[row];
80  }
81  }
82 
83  //
84  // Step 2: Exclusive scan on row_buffer arrays to get correct starting indices
85  //
86  viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), L.size1() + 1, 0, 1);
87  viennacl::linalg::exclusive_scan(wrapped_L_row_buffer);
88  L.reserve(wrapped_L_row_buffer[L.size1()], false);
89 
90  unsigned int *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
91  NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.handle());
92 
93  //
94  // Step 3: Write entries:
95  //
96 #ifdef VIENNACL_WITH_OPENMP
97  #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
98 #endif
99  for (long row = 0; row < static_cast<long>(A.size1()); ++row)
100  {
101  unsigned int col_begin = A_row_buffer[row];
102  unsigned int col_end = A_row_buffer[row+1];
103 
104  unsigned int index_L = L_row_buffer[row];
105  for (unsigned int j = col_begin; j < col_end; ++j)
106  {
107  unsigned int col = A_col_buffer[j];
108  NumericT value = A_elements[j];
109 
110  if (long(col) <= row)
111  {
112  L_col_buffer[index_L] = col;
113  L_elements[index_L] = value;
114  ++index_L;
115  }
116  }
117  }
118 
119 } // extract_L
120 
121 
123 template<typename NumericT>
126 {
128 
129  unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.handle1());
130  unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.handle2());
131  NumericT const *A_elements = detail::extract_raw_pointer<NumericT>(A.handle());
132 
133  NumericT *D_elements = detail::extract_raw_pointer<NumericT>(D.handle());
134 
135  //
136  // Step 1: Determine D
137  //
138 #ifdef VIENNACL_WITH_OPENMP
139  #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
140 #endif
141  for (long row = 0; row < static_cast<long>(A.size1()); ++row)
142  {
143  unsigned int col_begin = A_row_buffer[row];
144  unsigned int col_end = A_row_buffer[row+1];
145 
146  for (unsigned int j = col_begin; j < col_end; ++j)
147  {
148  unsigned int col = A_col_buffer[j];
149  if (row == col)
150  {
151  D_elements[row] = NumericT(1) / std::sqrt(std::fabs(A_elements[j]));
152  break;
153  }
154  }
155  }
156 
157  //
158  // Step 2: Scale values in L:
159  //
160  unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
161  unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
162  NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.handle());
163 
164 #ifdef VIENNACL_WITH_OPENMP
165  #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
166 #endif
167  for (long row = 0; row < static_cast<long>(A.size1()); ++row)
168  {
169  unsigned int col_begin = L_row_buffer[row];
170  unsigned int col_end = L_row_buffer[row+1];
171 
172  NumericT D_row = D_elements[row];
173 
174  for (unsigned int j = col_begin; j < col_end; ++j)
175  L_elements[j] *= D_row * D_elements[L_col_buffer[j]];
176  }
177 
179 }
180 
181 
182 
184 template<typename NumericT>
186  vector<NumericT> & aij_L)
187 {
188  unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
189  unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
190  NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.handle());
191 
192  NumericT *aij_ptr = detail::extract_raw_pointer<NumericT>(aij_L.handle());
193 
194  // temporary workspace
195  NumericT *L_backup = (NumericT *)malloc(sizeof(NumericT) * L.nnz());
196 
197  // backup:
198 #ifdef VIENNACL_WITH_OPENMP
199  #pragma omp parallel for if (L.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE)
200 #endif
201  for (long i = 0; i < static_cast<long>(L.nnz()); ++i)
202  L_backup[i] = L_elements[i];
203 
204 
205  // sweep
206 #ifdef VIENNACL_WITH_OPENMP
207  #pragma omp parallel for if (L.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
208 #endif
209  for (long row = 0; row < static_cast<long>(L.size1()); ++row)
210  {
211  //
212  // update L:
213  //
214  unsigned int row_Li_start = L_row_buffer[row];
215  unsigned int row_Li_end = L_row_buffer[row + 1];
216 
217  for (unsigned int i = row_Li_start; i < row_Li_end; ++i)
218  {
219  unsigned int col = L_col_buffer[i];
220 
221  unsigned int row_Lj_start = L_row_buffer[col];
222  unsigned int row_Lj_end = L_row_buffer[col+1];
223 
224  // compute \sum_{k=1}^{j-1} l_ik l_jk
225  unsigned int index_Lj = row_Lj_start;
226  unsigned int col_Lj = L_col_buffer[index_Lj];
227  NumericT s = aij_ptr[i];
228  for (unsigned int index_Li = row_Li_start; index_Li < i; ++index_Li)
229  {
230  unsigned int col_Li = L_col_buffer[index_Li];
231 
232  // find element in row j
233  while (col_Lj < col_Li)
234  {
235  ++index_Lj;
236  col_Lj = L_col_buffer[index_Lj];
237  }
238 
239  if (col_Lj == col_Li)
240  s -= L_backup[index_Li] * L_backup[index_Lj];
241  }
242 
243  if (row != col)
244  L_elements[i] = s / L_backup[row_Lj_end - 1]; // diagonal element is last in row!
245  else
246  L_elements[i] = std::sqrt(s);
247  }
248  }
249 
250  free(L_backup);
251 }
252 
253 
254 
256 
257 template<typename NumericT>
261 {
262  // L and U are known to have correct dimensions
263 
264  unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.handle1());
265  unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.handle2());
266  NumericT const *A_elements = detail::extract_raw_pointer<NumericT>(A.handle());
267 
268  unsigned int *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
269  unsigned int *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1());
270 
271  //
272  // Step 1: Count elements in L and U
273  //
274 #ifdef VIENNACL_WITH_OPENMP
275  #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
276 #endif
277  for (long row = 0; row < static_cast<long>(A.size1()); ++row)
278  {
279  unsigned int col_begin = A_row_buffer[row];
280  unsigned int col_end = A_row_buffer[row+1];
281 
282  for (unsigned int j = col_begin; j < col_end; ++j)
283  {
284  unsigned int col = A_col_buffer[j];
285  if (long(col) <= row)
286  ++L_row_buffer[row];
287  if (long(col) >= row)
288  ++U_row_buffer[row];
289  }
290  }
291 
292  //
293  // Step 2: Exclusive scan on row_buffer arrays to get correct starting indices
294  //
295  viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), L.size1() + 1, 0, 1);
296  viennacl::linalg::exclusive_scan(wrapped_L_row_buffer);
297  L.reserve(wrapped_L_row_buffer[L.size1()], false);
298 
299  viennacl::vector_base<unsigned int> wrapped_U_row_buffer(U.handle1(), U.size1() + 1, 0, 1);
300  viennacl::linalg::exclusive_scan(wrapped_U_row_buffer);
301  U.reserve(wrapped_U_row_buffer[U.size1()], false);
302 
303  unsigned int *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
304  NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.handle());
305 
306  unsigned int *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2());
307  NumericT *U_elements = detail::extract_raw_pointer<NumericT>(U.handle());
308 
309  //
310  // Step 3: Write entries:
311  //
312 #ifdef VIENNACL_WITH_OPENMP
313  #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
314 #endif
315  for (long row = 0; row < static_cast<long>(A.size1()); ++row)
316  {
317  unsigned int col_begin = A_row_buffer[row];
318  unsigned int col_end = A_row_buffer[row+1];
319 
320  unsigned int index_L = L_row_buffer[row];
321  unsigned int index_U = U_row_buffer[row];
322  for (unsigned int j = col_begin; j < col_end; ++j)
323  {
324  unsigned int col = A_col_buffer[j];
325  NumericT value = A_elements[j];
326 
327  if (long(col) <= row)
328  {
329  L_col_buffer[index_L] = col;
330  L_elements[index_L] = value;
331  ++index_L;
332  }
333 
334  if (long(col) >= row)
335  {
336  U_col_buffer[index_U] = col;
337  U_elements[index_U] = value;
338  ++index_U;
339  }
340  }
341  }
342 
343 } // extract_LU
344 
345 
346 
348 template<typename NumericT>
352 {
354 
355  unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.handle1());
356  unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.handle2());
357  NumericT const *A_elements = detail::extract_raw_pointer<NumericT>(A.handle());
358 
359  NumericT *D_elements = detail::extract_raw_pointer<NumericT>(D.handle());
360 
361  //
362  // Step 1: Determine D
363  //
364 #ifdef VIENNACL_WITH_OPENMP
365  #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
366 #endif
367  for (long row = 0; row < static_cast<long>(A.size1()); ++row)
368  {
369  unsigned int col_begin = A_row_buffer[row];
370  unsigned int col_end = A_row_buffer[row+1];
371 
372  for (unsigned int j = col_begin; j < col_end; ++j)
373  {
374  unsigned int col = A_col_buffer[j];
375  if (row == col)
376  {
377  D_elements[row] = NumericT(1) / std::sqrt(std::fabs(A_elements[j]));
378  break;
379  }
380  }
381  }
382 
383  //
384  // Step 2: Scale values in L:
385  //
386  unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
387  unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
388  NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.handle());
389 
390 #ifdef VIENNACL_WITH_OPENMP
391  #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
392 #endif
393  for (long row = 0; row < static_cast<long>(A.size1()); ++row)
394  {
395  unsigned int col_begin = L_row_buffer[row];
396  unsigned int col_end = L_row_buffer[row+1];
397 
398  NumericT D_row = D_elements[row];
399 
400  for (unsigned int j = col_begin; j < col_end; ++j)
401  L_elements[j] *= D_row * D_elements[L_col_buffer[j]];
402  }
403 
404  //
405  // Step 3: Scale values in U:
406  //
407  unsigned int const *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1());
408  unsigned int const *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2());
409  NumericT *U_elements = detail::extract_raw_pointer<NumericT>(U.handle());
410 
411 #ifdef VIENNACL_WITH_OPENMP
412  #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
413 #endif
414  for (long row = 0; row < static_cast<long>(A.size1()); ++row)
415  {
416  unsigned int col_begin = U_row_buffer[row];
417  unsigned int col_end = U_row_buffer[row+1];
418 
419  NumericT D_row = D_elements[row];
420 
421  for (unsigned int j = col_begin; j < col_end; ++j)
422  U_elements[j] *= D_row * D_elements[U_col_buffer[j]];
423  }
424 
426  // Note: block information for U will be generated after transposition
427 
428 }
429 
430 template<typename NumericT>
433 {
434  NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
435  unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
436  unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
437 
438  // initialize datastructures for B:
440 
441  NumericT * B_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(B.handle());
442  unsigned int * B_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(B.handle1());
443  unsigned int * B_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(B.handle2());
444 
445  // prepare uninitialized B_row_buffer:
446  for (std::size_t i = 0; i < B.size1(); ++i)
447  B_row_buffer[i] = 0;
448 
449  //
450  // Stage 1: Compute pattern for B
451  //
452  for (std::size_t row = 0; row < A.size1(); ++row)
453  {
454  unsigned int row_start = A_row_buffer[row];
455  unsigned int row_stop = A_row_buffer[row+1];
456 
457  for (unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
458  B_row_buffer[A_col_buffer[nnz_index]] += 1;
459  }
460 
461  // Bring row-start array in place using exclusive-scan:
462  unsigned int offset = B_row_buffer[0];
463  B_row_buffer[0] = 0;
464  for (std::size_t row = 1; row < B.size1(); ++row)
465  {
466  unsigned int tmp = B_row_buffer[row];
467  B_row_buffer[row] = offset;
468  offset += tmp;
469  }
470  B_row_buffer[B.size1()] = offset;
471 
472  //
473  // Stage 2: Fill with data
474  //
475 
476  std::vector<unsigned int> B_row_offsets(B.size1()); //number of elements already written per row
477 
478  for (unsigned int row = 0; row < static_cast<unsigned int>(A.size1()); ++row)
479  {
480  //std::cout << "Row " << row << ": ";
481  unsigned int row_start = A_row_buffer[row];
482  unsigned int row_stop = A_row_buffer[row+1];
483 
484  for (unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
485  {
486  unsigned int col_in_A = A_col_buffer[nnz_index];
487  unsigned int B_nnz_index = B_row_buffer[col_in_A] + B_row_offsets[col_in_A];
488  B_col_buffer[B_nnz_index] = row;
489  B_elements[B_nnz_index] = A_elements[nnz_index];
490  ++B_row_offsets[col_in_A];
491  //B_temp.at(A_col_buffer[nnz_index])[row] = A_elements[nnz_index];
492  }
493  }
494 
495  // Step 3: Make datastructure consistent (row blocks!)
497 }
498 
499 
500 
502 template<typename NumericT>
504  vector<NumericT> const & aij_L,
505  compressed_matrix<NumericT> & U_trans,
506  vector<NumericT> const & aij_U_trans)
507 {
508  unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
509  unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
510  NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.handle());
511 
512  NumericT const *aij_L_ptr = detail::extract_raw_pointer<NumericT>(aij_L.handle());
513 
514  unsigned int const *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U_trans.handle1());
515  unsigned int const *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U_trans.handle2());
516  NumericT *U_elements = detail::extract_raw_pointer<NumericT>(U_trans.handle());
517 
518  NumericT const *aij_U_trans_ptr = detail::extract_raw_pointer<NumericT>(aij_U_trans.handle());
519 
520  // temporary workspace
521  NumericT *L_backup = new NumericT[L.nnz()];
522  NumericT *U_backup = new NumericT[U_trans.nnz()];
523 
524  // backup:
525 #ifdef VIENNACL_WITH_OPENMP
526  #pragma omp parallel for if (L.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE)
527 #endif
528  for (long i = 0; i < static_cast<long>(L.nnz()); ++i)
529  L_backup[i] = L_elements[i];
530 
531 #ifdef VIENNACL_WITH_OPENMP
532  #pragma omp parallel for if (U_trans.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE)
533 #endif
534  for (long i = 0; i < static_cast<long>(U_trans.nnz()); ++i)
535  U_backup[i] = U_elements[i];
536 
537  // sweep
538 #ifdef VIENNACL_WITH_OPENMP
539  #pragma omp parallel for if (L.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
540 #endif
541  for (long row = 0; row < static_cast<long>(L.size1()); ++row)
542  {
543  //
544  // update L:
545  //
546  unsigned int row_L_start = L_row_buffer[row];
547  unsigned int row_L_end = L_row_buffer[row + 1];
548 
549  for (unsigned int j = row_L_start; j < row_L_end; ++j)
550  {
551  unsigned int col = L_col_buffer[j];
552 
553  if (col == row)
554  continue;
555 
556  unsigned int row_U_start = U_row_buffer[col];
557  unsigned int row_U_end = U_row_buffer[col + 1];
558 
559  // compute \sum_{k=1}^{j-1} l_ik u_kj
560  unsigned int index_U = row_U_start;
561  unsigned int col_U = (index_U < row_U_end) ? U_col_buffer[index_U] : static_cast<unsigned int>(U_trans.size2());
562  NumericT sum = 0;
563  for (unsigned int k = row_L_start; k < j; ++k)
564  {
565  unsigned int col_L = L_col_buffer[k];
566 
567  // find element in U
568  while (col_U < col_L)
569  {
570  ++index_U;
571  col_U = U_col_buffer[index_U];
572  }
573 
574  if (col_U == col_L)
575  sum += L_backup[k] * U_backup[index_U];
576  }
577 
578  // update l_ij:
579  assert(U_col_buffer[row_U_end - 1] == col && bool("Accessing U element which is not a diagonal element!"));
580  L_elements[j] = (aij_L_ptr[j] - sum) / U_backup[row_U_end - 1]; // diagonal element is last entry in U
581  }
582 
583 
584  //
585  // update U:
586  //
587  unsigned int row_U_start = U_row_buffer[row];
588  unsigned int row_U_end = U_row_buffer[row + 1];
589  for (unsigned int j = row_U_start; j < row_U_end; ++j)
590  {
591  unsigned int col = U_col_buffer[j];
592 
593  row_L_start = L_row_buffer[col];
594  row_L_end = L_row_buffer[col + 1];
595 
596  // compute \sum_{k=1}^{j-1} l_ik u_kj
597  unsigned int index_L = row_L_start;
598  unsigned int col_L = (index_L < row_L_end) ? L_col_buffer[index_L] : static_cast<unsigned int>(L.size1());
599  NumericT sum = 0;
600  for (unsigned int k = row_U_start; k < j; ++k)
601  {
602  unsigned int col_U = U_col_buffer[k];
603 
604  // find element in L
605  while (col_L < col_U)
606  {
607  ++index_L;
608  col_L = L_col_buffer[index_L];
609  }
610 
611  if (col_U == col_L)
612  sum += L_backup[index_L] * U_backup[k];
613  }
614 
615  // update u_ij:
616  U_elements[j] = aij_U_trans_ptr[j] - sum;
617  }
618  }
619 
620  delete[] L_backup;
621  delete[] U_backup;
622 }
623 
624 
625 template<typename NumericT>
627  vector<NumericT> & diag_R)
628 {
629  unsigned int *R_row_buffer = detail::extract_raw_pointer<unsigned int>(R.handle1());
630  unsigned int *R_col_buffer = detail::extract_raw_pointer<unsigned int>(R.handle2());
631  NumericT *R_elements = detail::extract_raw_pointer<NumericT>(R.handle());
632 
633  NumericT *diag_R_ptr = detail::extract_raw_pointer<NumericT>(diag_R.handle());
634 
635 #ifdef VIENNACL_WITH_OPENMP
636  #pragma omp parallel for if (R.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
637 #endif
638  for (long row = 0; row < static_cast<long>(R.size1()); ++row)
639  {
640  unsigned int col_begin = R_row_buffer[row];
641  unsigned int col_end = R_row_buffer[row+1];
642 
643  // part 1: extract diagonal entry
644  NumericT diag = 0;
645  for (unsigned int j = col_begin; j < col_end; ++j)
646  {
647  unsigned int col = R_col_buffer[j];
648  if (col == row)
649  {
650  diag = R_elements[j];
651  R_elements[j] = 0; // (I - D^{-1}R)
652  break;
653  }
654  }
655  diag_R_ptr[row] = diag;
656 
657  assert((diag > 0 || diag < 0) && bool("Zero diagonal detected!"));
658 
659  // part2: scale
660  for (unsigned int j = col_begin; j < col_end; ++j)
661  R_elements[j] /= -diag;
662  }
663 
664  //std::cout << "diag_R: " << diag_R << std::endl;
665 }
666 
667 } //namespace host_based
668 } //namespace linalg
669 } //namespace viennacl
670 
671 
672 #endif
const vcl_size_t & size2() const
Returns the number of columns.
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
Definition: sum.hpp:45
Generic size and resize functionality for different vector and matrix types.
Implementations of vector operations.
const vcl_size_t & size1() const
Returns the number of rows.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
void icc_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > &aij_L)
Performs one nonlinear relaxation step in the Chow-Patel-ICC using OpenMP (cf. Algorithm 3 in paper...
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
void extract_LU(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
float NumericT
Definition: bisect.cpp:40
void ilu_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
void generate_row_block_information()
Builds the row block information needed for fast sparse matrix-vector multiplications.
void ilu_form_neumann_matrix(compressed_matrix< NumericT > &R, vector< NumericT > &diag_R)
void extract_L(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Common routines for single-threaded or OpenMP-enabled execution on CPU.
void icc_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
Definition: matrix.hpp:895
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void ilu_transpose(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &B)
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
Implementation of the ViennaCL scalar class.
const handle_type & handle() const
Returns the memory handle.
Definition: vector_def.hpp:128
void ilu_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L, compressed_matrix< NumericT > &U_trans, vector< NumericT > const &aij_U_trans)
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) ...
Simple enable-if variant that uses the SFINAE pattern.