ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
qr-method.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_QR_METHOD_HPP_
2 #define VIENNACL_LINALG_QR_METHOD_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include "viennacl/vector.hpp"
22 #include "viennacl/matrix.hpp"
23 
25 #include "viennacl/linalg/tql2.hpp"
26 #include "viennacl/linalg/prod.hpp"
27 
28 #include <boost/numeric/ublas/vector.hpp>
29 #include <boost/numeric/ublas/matrix.hpp>
30 
35 namespace viennacl
36 {
37 namespace linalg
38 {
39 namespace detail
40 {
41 template <typename SCALARTYPE>
43  int n,
44  int last_n,
45  SCALARTYPE q,
46  SCALARTYPE p
47  )
48 {
49  (void)A; (void)n; (void)last_n; (void)q; (void)p;
50 #ifdef VIENNACL_WITH_OPENCL
51  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
52 
53  if(A.row_major())
54  {
56 
58  A,
59  static_cast<cl_uint>(A.internal_size1()),
60  static_cast<cl_uint>(n),
61  static_cast<cl_uint>(last_n),
62  q,
63  p
64  ));
65  }
66  else
67  {
69 
71  A,
72  static_cast<cl_uint>(A.internal_size1()),
73  static_cast<cl_uint>(n),
74  static_cast<cl_uint>(last_n),
75  q,
76  p
77  ));
78  }
79 #endif
80 }
81 template <typename SCALARTYPE, typename VectorType>
83  const VectorType& buf,
85  int m,
86  int n,
87  int last_n,
88  bool //is_triangular
89  )
90 {
91  (void)A; (void)buf; (void)buf_vcl; (void)m; (void)n; (void)last_n;
92 #ifdef VIENNACL_WITH_OPENCL
93  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
94 
95  viennacl::fast_copy(buf, buf_vcl);
96 
97  if(A.row_major())
98  {
100 
101  viennacl::ocl::enqueue(kernel(
102  A,
103  static_cast<cl_uint>(A.internal_size1()),
104  buf_vcl,
105  static_cast<cl_uint>(m),
106  static_cast<cl_uint>(n),
107  static_cast<cl_uint>(last_n)
108  ));
109  }
110  else
111  {
113 
114  viennacl::ocl::enqueue(kernel(
115  A,
116  static_cast<cl_uint>(A.internal_size1()),
117  buf_vcl,
118  static_cast<cl_uint>(m),
119  static_cast<cl_uint>(n),
120  static_cast<cl_uint>(last_n)
121  ));
122  }
123 
124 #endif
125 }
126 
127  template<typename SCALARTYPE, typename MatrixT>
128  void final_iter_update(MatrixT& A,
129  int n,
130  int last_n,
131  SCALARTYPE q,
132  SCALARTYPE p
133  )
134  {
135  for (int i = 0; i < last_n; i++)
136  {
137  SCALARTYPE v_in = A(i, n);
138  SCALARTYPE z = A(i, n - 1);
139  A(i, n - 1) = q * z + p * v_in;
140  A(i, n) = q * v_in - p * z;
141  }
142  }
143 
144  template<typename SCALARTYPE, typename MatrixT>
145  void update_float_QR_column(MatrixT& A,
146  const std::vector<SCALARTYPE>& buf,
147  int m,
148  int n,
149  int last_i,
150  bool is_triangular
151  )
152  {
153  for (int i = 0; i < last_i; i++)
154  {
155  int start_k = is_triangular?std::max(i + 1, m):m;
156 
157  SCALARTYPE* a_row = A.row(i);
158 
159  SCALARTYPE a_ik = a_row[start_k];
160  SCALARTYPE a_ik_1 = 0;
161  SCALARTYPE a_ik_2 = 0;
162 
163  if (start_k < n)
164  a_ik_1 = a_row[start_k + 1];
165 
166  for (int k = start_k; k < n; k++)
167  {
168  bool notlast = (k != n - 1);
169 
170  SCALARTYPE p = buf[5 * static_cast<vcl_size_t>(k)] * a_ik + buf[5 * static_cast<vcl_size_t>(k) + 1] * a_ik_1;
171 
172  if (notlast)
173  {
174  a_ik_2 = a_row[k + 2];
175  p = p + buf[5 * static_cast<vcl_size_t>(k) + 2] * a_ik_2;
176  a_ik_2 = a_ik_2 - p * buf[5 * static_cast<vcl_size_t>(k) + 4];
177  }
178 
179  a_row[k] = a_ik - p;
180  a_ik_1 = a_ik_1 - p * buf[5 * static_cast<vcl_size_t>(k) + 3];
181 
182  a_ik = a_ik_1;
183  a_ik_1 = a_ik_2;
184  }
185 
186  if (start_k < n)
187  a_row[n] = a_ik;
188  }
189  }
190 
192  template<typename SCALARTYPE>
194  {
195  public:
197  {
198  size_ = 0;
199  }
200 
201  FastMatrix(vcl_size_t sz, vcl_size_t internal_size) : size_(sz), internal_size_(internal_size)
202  {
203  data.resize(internal_size * internal_size);
204  }
205 
206  SCALARTYPE& operator()(int i, int j)
207  {
208  return data[static_cast<vcl_size_t>(i) * internal_size_ + static_cast<vcl_size_t>(j)];
209  }
210 
211  SCALARTYPE* row(int i)
212  {
213  return &data[static_cast<vcl_size_t>(i) * internal_size_];
214  }
215 
216  SCALARTYPE* begin()
217  {
218  return &data[0];
219  }
220 
221  SCALARTYPE* end()
222  {
223  return &data[0] + data.size();
224  }
225 
226  std::vector<SCALARTYPE> data;
227  private:
228  vcl_size_t size_;
229  vcl_size_t internal_size_;
230  };
231 
232  // Nonsymmetric reduction from Hessenberg to real Schur form.
233  // This is derived from the Algol procedure hqr2, by Martin and Wilkinson, Handbook for Auto. Comp.,
234  // Vol.ii-Linear Algebra, and the corresponding Fortran subroutine in EISPACK.
235  template <typename SCALARTYPE, typename VectorType>
238  VectorType & d,
239  VectorType & e)
240  {
241  transpose(V);
242 
243  int nn = static_cast<int>(vcl_H.size1());
244 
245  FastMatrix<SCALARTYPE> H(vcl_size_t(nn), vcl_H.internal_size2());//, V(nn);
246 
247  std::vector<SCALARTYPE> buf(5 * vcl_size_t(nn));
248  //boost::numeric::ublas::vector<float> buf(5 * nn);
249  viennacl::vector<SCALARTYPE> buf_vcl(5 * vcl_size_t(nn));
250 
251  viennacl::fast_copy(vcl_H, H.begin());
252 
253 
254  int n = nn - 1;
255 
256  SCALARTYPE eps = 2 * static_cast<SCALARTYPE>(EPS);
257  SCALARTYPE exshift = 0;
258  SCALARTYPE p = 0;
259  SCALARTYPE q = 0;
260  SCALARTYPE r = 0;
261  SCALARTYPE s = 0;
262  SCALARTYPE z = 0;
263  SCALARTYPE t;
264  SCALARTYPE w;
265  SCALARTYPE x;
266  SCALARTYPE y;
267 
268  SCALARTYPE out1, out2;
269 
270  // compute matrix norm
271  SCALARTYPE norm = 0;
272  for (int i = 0; i < nn; i++)
273  {
274  for (int j = std::max(i - 1, 0); j < nn; j++)
275  norm = norm + std::fabs(H(i, j));
276  }
277 
278  // Outer loop over eigenvalue index
279  int iter = 0;
280  while (n >= 0)
281  {
282  // Look for single small sub-diagonal element
283  int l = n;
284  while (l > 0)
285  {
286  s = std::fabs(H(l - 1, l - 1)) + std::fabs(H(l, l));
287  if (s <= 0)
288  s = norm;
289  if (std::fabs(H(l, l - 1)) < eps * s)
290  break;
291 
292  l--;
293  }
294 
295  // Check for convergence
296  if (l == n)
297  {
298  // One root found
299  H(n, n) = H(n, n) + exshift;
300  d[vcl_size_t(n)] = H(n, n);
301  e[vcl_size_t(n)] = 0;
302  n--;
303  iter = 0;
304  }
305  else if (l == n - 1)
306  {
307  // Two roots found
308  w = H(n, n - 1) * H(n - 1, n);
309  p = (H(n - 1, n - 1) - H(n, n)) / 2;
310  q = p * p + w;
311  z = static_cast<SCALARTYPE>(std::sqrt(std::fabs(q)));
312  H(n, n) = H(n, n) + exshift;
313  H(n - 1, n - 1) = H(n - 1, n - 1) + exshift;
314  x = H(n, n);
315 
316  if (q >= 0)
317  {
318  // Real pair
319  z = (p >= 0) ? (p + z) : (p - z);
320  d[vcl_size_t(n) - 1] = x + z;
321  d[vcl_size_t(n)] = d[vcl_size_t(n) - 1];
322  if (z <= 0 && z >= 0) // z == 0 without compiler complaints
323  d[vcl_size_t(n)] = x - w / z;
324  e[vcl_size_t(n) - 1] = 0;
325  e[vcl_size_t(n)] = 0;
326  x = H(n, n - 1);
327  s = std::fabs(x) + std::fabs(z);
328  p = x / s;
329  q = z / s;
330  r = static_cast<SCALARTYPE>(std::sqrt(p * p + q * q));
331  p = p / r;
332  q = q / r;
333 
334  // Row modification
335  for (int j = n - 1; j < nn; j++)
336  {
337  SCALARTYPE h_nj = H(n, j);
338  z = H(n - 1, j);
339  H(n - 1, j) = q * z + p * h_nj;
340  H(n, j) = q * h_nj - p * z;
341  }
342 
343  final_iter_update(H, n, n + 1, q, p);
344  final_iter_update_gpu(V, n, nn, q, p);
345  }
346  else
347  {
348  // Complex pair
349  d[vcl_size_t(n) - 1] = x + p;
350  d[vcl_size_t(n)] = x + p;
351  e[vcl_size_t(n) - 1] = z;
352  e[vcl_size_t(n)] = -z;
353  }
354 
355  n = n - 2;
356  iter = 0;
357  }
358  else
359  {
360  // No convergence yet
361 
362  // Form shift
363  x = H(n, n);
364  y = 0;
365  w = 0;
366  if (l < n)
367  {
368  y = H(n - 1, n - 1);
369  w = H(n, n - 1) * H(n - 1, n);
370  }
371 
372  // Wilkinson's original ad hoc shift
373  if (iter == 10)
374  {
375  exshift += x;
376  for (int i = 0; i <= n; i++)
377  H(i, i) -= x;
378 
379  s = std::fabs(H(n, n - 1)) + std::fabs(H(n - 1, n - 2));
380  x = y = SCALARTYPE(0.75) * s;
381  w = SCALARTYPE(-0.4375) * s * s;
382  }
383 
384  // MATLAB's new ad hoc shift
385  if (iter == 30)
386  {
387  s = (y - x) / 2;
388  s = s * s + w;
389  if (s > 0)
390  {
391  s = static_cast<SCALARTYPE>(std::sqrt(s));
392  if (y < x) s = -s;
393  s = x - w / ((y - x) / 2 + s);
394  for (int i = 0; i <= n; i++)
395  H(i, i) -= s;
396  exshift += s;
397  x = y = w = SCALARTYPE(0.964);
398  }
399  }
400 
401  iter = iter + 1;
402 
403  // Look for two consecutive small sub-diagonal elements
404  int m = n - 2;
405  while (m >= l)
406  {
407  SCALARTYPE h_m1_m1 = H(m + 1, m + 1);
408  z = H(m, m);
409  r = x - z;
410  s = y - z;
411  p = (r * s - w) / H(m + 1, m) + H(m, m + 1);
412  q = h_m1_m1 - z - r - s;
413  r = H(m + 2, m + 1);
414  s = std::fabs(p) + std::fabs(q) + std::fabs(r);
415  p = p / s;
416  q = q / s;
417  r = r / s;
418  if (m == l)
419  break;
420  if (std::fabs(H(m, m - 1)) * (std::fabs(q) + std::fabs(r)) < eps * (std::fabs(p) * (std::fabs(H(m - 1, m - 1)) + std::fabs(z) + std::fabs(h_m1_m1))))
421  break;
422  m--;
423  }
424 
425  for (int i = m + 2; i <= n; i++)
426  {
427  H(i, i - 2) = 0;
428  if (i > m + 2)
429  H(i, i - 3) = 0;
430  }
431 
432  // float QR step involving rows l:n and columns m:n
433  for (int k = m; k < n; k++)
434  {
435  bool notlast = (k != n - 1);
436  if (k != m)
437  {
438  p = H(k, k - 1);
439  q = H(k + 1, k - 1);
440  r = (notlast ? H(k + 2, k - 1) : 0);
441  x = std::fabs(p) + std::fabs(q) + std::fabs(r);
442  if (x > 0)
443  {
444  p = p / x;
445  q = q / x;
446  r = r / x;
447  }
448  }
449 
450  if (x <= 0 && x >= 0) break; // x == 0 without compiler complaints
451 
452  s = static_cast<SCALARTYPE>(std::sqrt(p * p + q * q + r * r));
453  if (p < 0) s = -s;
454 
455  if (s < 0 || s > 0)
456  {
457  if (k != m)
458  H(k, k - 1) = -s * x;
459  else
460  if (l != m)
461  H(k, k - 1) = -H(k, k - 1);
462 
463  p = p + s;
464  y = q / s;
465  z = r / s;
466  x = p / s;
467  q = q / p;
468  r = r / p;
469 
470  buf[5 * vcl_size_t(k)] = x;
471  buf[5 * vcl_size_t(k) + 1] = y;
472  buf[5 * vcl_size_t(k) + 2] = z;
473  buf[5 * vcl_size_t(k) + 3] = q;
474  buf[5 * vcl_size_t(k) + 4] = r;
475 
476 
477  SCALARTYPE* a_row_k = H.row(k);
478  SCALARTYPE* a_row_k_1 = H.row(k + 1);
479  SCALARTYPE* a_row_k_2 = H.row(k + 2);
480  // Row modification
481  for (int j = k; j < nn; j++)
482  {
483  SCALARTYPE h_kj = a_row_k[j];
484  SCALARTYPE h_k1_j = a_row_k_1[j];
485 
486  p = h_kj + q * h_k1_j;
487  if (notlast)
488  {
489  SCALARTYPE h_k2_j = a_row_k_2[j];
490  p = p + r * h_k2_j;
491  a_row_k_2[j] = h_k2_j - p * z;
492  }
493 
494  a_row_k[j] = h_kj - p * x;
495  a_row_k_1[j] = h_k1_j - p * y;
496  }
497 
498  //H(k + 1, nn - 1) = h_kj;
499 
500 
501  // Column modification
502  for (int i = k; i < std::min(nn, k + 4); i++)
503  {
504  p = x * H(i, k) + y * H(i, k + 1);
505  if (notlast)
506  {
507  p = p + z * H(i, k + 2);
508  H(i, k + 2) = H(i, k + 2) - p * r;
509  }
510 
511  H(i, k) = H(i, k) - p;
512  H(i, k + 1) = H(i, k + 1) - p * q;
513  }
514  }
515  else
516  {
517  buf[5 * vcl_size_t(k)] = 0;
518  buf[5 * vcl_size_t(k) + 1] = 0;
519  buf[5 * vcl_size_t(k) + 2] = 0;
520  buf[5 * vcl_size_t(k) + 3] = 0;
521  buf[5 * vcl_size_t(k) + 4] = 0;
522  }
523  }
524 
525  // Timer timer;
526  // timer.start();
527 
528  update_float_QR_column<SCALARTYPE>(H, buf, m, n, n, true);
529  update_float_QR_column_gpu(V, buf, buf_vcl, m, n, nn, false);
530 
531  // std::cout << timer.get() << "\n";
532  }
533  }
534 
535  // Backsubstitute to find vectors of upper triangular form
536  if (norm <= 0)
537  {
538  return;
539  }
540 
541  for (n = nn - 1; n >= 0; n--)
542  {
543  p = d[vcl_size_t(n)];
544  q = e[vcl_size_t(n)];
545 
546  // Real vector
547  if (q <= 0 && q >= 0)
548  {
549  int l = n;
550  H(n, n) = 1;
551  for (int i = n - 1; i >= 0; i--)
552  {
553  w = H(i, i) - p;
554  r = 0;
555  for (int j = l; j <= n; j++)
556  r = r + H(i, j) * H(j, n);
557 
558  if (e[vcl_size_t(i)] < 0)
559  {
560  z = w;
561  s = r;
562  }
563  else
564  {
565  l = i;
566  if (e[vcl_size_t(i)] <= 0) // e[i] == 0 with previous if
567  {
568  H(i, n) = (w > 0 || w < 0) ? (-r / w) : (-r / (eps * norm));
569  }
570  else
571  {
572  // Solve real equations
573  x = H(i, i + 1);
574  y = H(i + 1, i);
575  q = (d[vcl_size_t(i)] - p) * (d[vcl_size_t(i)] - p) + e[vcl_size_t(i)] * e[vcl_size_t(i)];
576  t = (x * s - z * r) / q;
577  H(i, n) = t;
578  H(i + 1, n) = (std::fabs(x) > std::fabs(z)) ? ((-r - w * t) / x) : ((-s - y * t) / z);
579  }
580 
581  // Overflow control
582  t = std::fabs(H(i, n));
583  if ((eps * t) * t > 1)
584  for (int j = i; j <= n; j++)
585  H(j, n) /= t;
586  }
587  }
588  }
589  else if (q < 0)
590  {
591  // Complex vector
592  int l = n - 1;
593 
594  // Last vector component imaginary so matrix is triangular
595  if (std::fabs(H(n, n - 1)) > std::fabs(H(n - 1, n)))
596  {
597  H(n - 1, n - 1) = q / H(n, n - 1);
598  H(n - 1, n) = -(H(n, n) - p) / H(n, n - 1);
599  }
600  else
601  {
602  cdiv<SCALARTYPE>(0, -H(n - 1, n), H(n - 1, n - 1) - p, q, out1, out2);
603 
604  H(n - 1, n - 1) = out1;
605  H(n - 1, n) = out2;
606  }
607 
608  H(n, n - 1) = 0;
609  H(n, n) = 1;
610  for (int i = n - 2; i >= 0; i--)
611  {
612  SCALARTYPE ra, sa, vr, vi;
613  ra = 0;
614  sa = 0;
615  for (int j = l; j <= n; j++)
616  {
617  SCALARTYPE h_ij = H(i, j);
618  ra = ra + h_ij * H(j, n - 1);
619  sa = sa + h_ij * H(j, n);
620  }
621 
622  w = H(i, i) - p;
623 
624  if (e[vcl_size_t(i)] < 0)
625  {
626  z = w;
627  r = ra;
628  s = sa;
629  }
630  else
631  {
632  l = i;
633  if (e[vcl_size_t(i)] <= 0) // e[i] == 0 with previous if
634  {
635  cdiv<SCALARTYPE>(-ra, -sa, w, q, out1, out2);
636  H(i, n - 1) = out1;
637  H(i, n) = out2;
638  }
639  else
640  {
641  // Solve complex equations
642  x = H(i, i + 1);
643  y = H(i + 1, i);
644  vr = (d[vcl_size_t(i)] - p) * (d[vcl_size_t(i)] - p) + e[vcl_size_t(i)] * e[vcl_size_t(i)] - q * q;
645  vi = (d[vcl_size_t(i)] - p) * 2 * q;
646  if ( (vr <= 0 && vr >= 0) && (vi <= 0 && vi >= 0) )
647  vr = eps * norm * (std::fabs(w) + std::fabs(q) + std::fabs(x) + std::fabs(y) + std::fabs(z));
648 
649  cdiv<SCALARTYPE>(x * r - z * ra + q * sa, x * s - z * sa - q * ra, vr, vi, out1, out2);
650 
651  H(i, n - 1) = out1;
652  H(i, n) = out2;
653 
654 
655  if (std::fabs(x) > (std::fabs(z) + std::fabs(q)))
656  {
657  H(i + 1, n - 1) = (-ra - w * H(i, n - 1) + q * H(i, n)) / x;
658  H(i + 1, n) = (-sa - w * H(i, n) - q * H(i, n - 1)) / x;
659  }
660  else
661  {
662  cdiv<SCALARTYPE>(-r - y * H(i, n - 1), -s - y * H(i, n), z, q, out1, out2);
663 
664  H(i + 1, n - 1) = out1;
665  H(i + 1, n) = out2;
666  }
667  }
668 
669  // Overflow control
670  t = std::max(std::fabs(H(i, n - 1)), std::fabs(H(i, n)));
671  if ((eps * t) * t > 1)
672  {
673  for (int j = i; j <= n; j++)
674  {
675  H(j, n - 1) /= t;
676  H(j, n) /= t;
677  }
678  }
679  }
680  }
681  }
682  }
683 
684  viennacl::fast_copy(H.begin(), H.end(), vcl_H);
685  // viennacl::fast_copy(V.begin(), V.end(), vcl_V);
686 
688 
689  V = viennacl::linalg::prod(trans(tmp), vcl_H);
690  }
691 
692 
693  template <typename SCALARTYPE>
699  {
700  vcl_size_t A_size1 = static_cast<vcl_size_t>(viennacl::traits::size1(A));
701  if(start + 2 >= A_size1)
702  return false;
703 
704  prepare_householder_vector(A, D, A_size1, start + 1, start, start + 1, true);
707  viennacl::linalg::house_update_QL(Q, D, A_size1);
708 
709  return true;
710  }
711 
712  template <typename SCALARTYPE>
715  {
716  vcl_size_t sz = A.size1();
717 
718  viennacl::vector<SCALARTYPE> hh_vector(sz);
719 
720  for(vcl_size_t i = 0; i < sz; i++)
721  {
722  householder_twoside(A, Q, hh_vector, i);
723  }
724 
725  }
726 
727  template <typename SCALARTYPE>
730  std::vector<SCALARTYPE> & D,
731  std::vector<SCALARTYPE> & E,
732  bool is_symmetric = true)
733  {
734 
735  assert(A.size1() == A.size2() && bool("Input matrix must be square for QR method!"));
736  /* if (!viennacl::is_row_major<F>::value && !is_symmetric)
737  {
738  std::cout << "qr_method for non-symmetric column-major matrices not implemented yet!" << std::endl;
739  exit(EXIT_FAILURE);
740  }
741 
742  */
743  vcl_size_t mat_size = A.size1();
744  D.resize(A.size1());
745  E.resize(A.size1());
746 
747  viennacl::vector<SCALARTYPE> vcl_D(mat_size), vcl_E(mat_size);
748  //std::vector<SCALARTYPE> std_D(mat_size), std_E(mat_size);
749 
751 
752  // reduce to tridiagonal form
754 
755  // pack diagonal and super-diagonal
756  viennacl::linalg::bidiag_pack(A, vcl_D, vcl_E);
757  copy(vcl_D, D);
758  copy(vcl_E, E);
759 
760  // find eigenvalues of symmetric tridiagonal matrix
761  if(is_symmetric)
762  {
763  viennacl::linalg::tql2(Q, D, E);
764 
765  }
766  else
767  {
768  detail::hqr2(A, Q, D, E);
769  }
770 
771 
772  boost::numeric::ublas::matrix<SCALARTYPE> eigen_values(A.size1(), A.size1());
773  eigen_values.clear();
774 
775  for (vcl_size_t i = 0; i < A.size1(); i++)
776  {
777  if(std::fabs(E[i]) < EPS)
778  {
779  eigen_values(i, i) = D[i];
780  }
781  else
782  {
783  eigen_values(i, i) = D[i];
784  eigen_values(i, i + 1) = E[i];
785  eigen_values(i + 1, i) = -E[i];
786  eigen_values(i + 1, i + 1) = D[i];
787  i++;
788  }
789  }
790 
791  copy(eigen_values, A);
792  }
793 }
794 
795 template <typename SCALARTYPE>
798  std::vector<SCALARTYPE>& D,
799  std::vector<SCALARTYPE>& E
800  )
801 {
802  detail::qr_method(A, Q, D, E, false);
803 }
804 
805 template <typename SCALARTYPE>
808  std::vector<SCALARTYPE>& D
809  )
810 {
811  std::vector<SCALARTYPE> E(A.size1());
812 
813  detail::qr_method(A, Q, D, E, true);
814 }
815 
816 template <typename SCALARTYPE>
820  )
821 {
822  std::vector<SCALARTYPE> std_D(D.size());
823  std::vector<SCALARTYPE> E(A.size1());
824 
825  viennacl::copy(D, std_D);
826  detail::qr_method(A, Q, std_D, E, true);
827  viennacl::copy(std_D, D);
828 }
829 
830 }
831 }
832 
833 #endif
#define EPS
Definition: bisect.cpp:38
void final_iter_update(MatrixT &A, int n, int last_n, SCALARTYPE q, SCALARTYPE p)
Definition: qr-method.hpp:128
Internal helper class representing a row-major dense matrix used for the QR method for the purpose of...
Definition: qr-method.hpp:193
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Implementation of the dense matrix class.
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
void tql2(matrix_base< SCALARTYPE, F > &Q, VectorType &d, VectorType &e)
Definition: tql2.hpp:131
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
void final_iter_update_gpu(matrix_base< SCALARTYPE > &A, int n, int last_n, SCALARTYPE q, SCALARTYPE p)
Definition: qr-method.hpp:42
const std::string SVD_UPDATE_QR_COLUMN_KERNEL
Implementation of the tql2-algorithm for eigenvalue computations.
A dense matrix class.
Definition: forwards.h:375
void transpose(matrix_base< SCALARTYPE > &A)
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: matrix_def.hpp:69
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:375
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
void prepare_householder_vector(matrix_base< SCALARTYPE > &A, vector_base< SCALARTYPE > &D, vcl_size_t size, vcl_size_t row_start, vcl_size_t col_start, vcl_size_t start, bool is_column)
void update_float_QR_column_gpu(matrix_base< SCALARTYPE > &A, const VectorType &buf, viennacl::vector< SCALARTYPE > &buf_vcl, int m, int n, int last_n, bool)
Definition: qr-method.hpp:82
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
Common routines used for the QR method and SVD. Experimental.
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
Definition: svd.hpp:644
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
void qr_method_nsm(viennacl::matrix< SCALARTYPE > &A, viennacl::matrix< SCALARTYPE > &Q, std::vector< SCALARTYPE > &D, std::vector< SCALARTYPE > &E)
Definition: qr-method.hpp:796
Common base class for dense vectors, vector ranges, and vector slices.
Definition: vector_def.hpp:104
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
std::size_t vcl_size_t
Definition: forwards.h:75
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:226
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void tridiagonal_reduction(matrix_base< SCALARTYPE > &A, matrix_base< SCALARTYPE > &Q)
Definition: qr-method.hpp:713
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:224
std::vector< SCALARTYPE > data
Definition: qr-method.hpp:226
void qr_method(viennacl::matrix< SCALARTYPE > &A, viennacl::matrix< SCALARTYPE > &Q, std::vector< SCALARTYPE > &D, std::vector< SCALARTYPE > &E, bool is_symmetric=true)
Definition: qr-method.hpp:728
SCALARTYPE & operator()(int i, int j)
Definition: qr-method.hpp:206
void update_float_QR_column(MatrixT &A, const std::vector< SCALARTYPE > &buf, int m, int n, int last_i, bool is_triangular)
Definition: qr-method.hpp:145
FastMatrix(vcl_size_t sz, vcl_size_t internal_size)
Definition: qr-method.hpp:201
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:248
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
const std::string SVD_FINAL_ITER_UPDATE_KERNEL
void hqr2(viennacl::matrix< SCALARTYPE > &vcl_H, viennacl::matrix< SCALARTYPE > &V, VectorType &d, VectorType &e)
Definition: qr-method.hpp:236
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238
T min(const T &lhs, const T &rhs)
Minimum.
Definition: util.hpp:45
bool householder_twoside(matrix_base< SCALARTYPE > &A, matrix_base< SCALARTYPE > &Q, vector_base< SCALARTYPE > &D, vcl_size_t start)
Definition: qr-method.hpp:694
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
void qr_method_sym(viennacl::matrix< SCALARTYPE > &A, viennacl::matrix< SCALARTYPE > &Q, std::vector< SCALARTYPE > &D)
Definition: qr-method.hpp:806