ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
lu.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_LU_HPP
2 #define VIENNACL_LINALG_LU_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <algorithm> //for std::min
26 
27 #include "viennacl/matrix.hpp"
29 
30 #include "viennacl/linalg/prod.hpp"
32 
33 namespace viennacl
34 {
35 namespace linalg
36 {
41 template<typename NumericT>
43 {
44  typedef matrix<NumericT, viennacl::row_major> MatrixType;
45 
46  vcl_size_t max_block_size = 32;
47  vcl_size_t num_blocks = (A.size2() - 1) / max_block_size + 1;
48  std::vector<NumericT> temp_buffer(A.internal_size2() * max_block_size);
49 
50  // Iterate over panels
51  for (vcl_size_t panel_id = 0; panel_id < num_blocks; ++panel_id)
52  {
53  vcl_size_t row_start = panel_id * max_block_size;
54  vcl_size_t current_block_size = std::min<vcl_size_t>(A.size1() - row_start, max_block_size);
55 
56  viennacl::range block_range(row_start, row_start + current_block_size);
57  viennacl::range remainder_range(row_start + current_block_size, A.size1());
58 
59  //
60  // Perform LU factorization on panel:
61  //
62 
63 
64  // Read from matrix to buffer:
66  sizeof(NumericT) * row_start * A.internal_size2(),
67  sizeof(NumericT) * current_block_size * A.internal_size2(),
68  &(temp_buffer[0]));
69 
70  // Factorize (kij-version):
71  for (vcl_size_t k=0; k < current_block_size - 1; ++k)
72  {
73  for (vcl_size_t i=k+1; i < current_block_size; ++i)
74  {
75  temp_buffer[row_start + i * A.internal_size2() + k] /= temp_buffer[row_start + k * A.internal_size2() + k]; // write l_ik
76 
77  NumericT l_ik = temp_buffer[row_start + i * A.internal_size2() + k];
78 
79  for (vcl_size_t j = row_start + k + 1; j < A.size1(); ++j)
80  temp_buffer[i * A.internal_size2() + j] -= l_ik * temp_buffer[k * A.internal_size2() + j]; // l_ik * a_kj
81  }
82  }
83 
84  // Write back:
86  sizeof(NumericT) * row_start * A.internal_size2(),
87  sizeof(NumericT) * current_block_size * A.internal_size2(),
88  &(temp_buffer[0]));
89 
90  if (remainder_range.size() > 0)
91  {
92  //
93  // Compute L_12 = [ (U_11)^{T}^{-1} A_{21}^T ]^T
94  //
95  viennacl::matrix_range<MatrixType> U_11(A, block_range, block_range);
96  viennacl::matrix_range<MatrixType> A_21(A, remainder_range, block_range);
98 
99  //
100  // Update remainder of A
101  //
102  viennacl::matrix_range<MatrixType> L_21(A, remainder_range, block_range);
103  viennacl::matrix_range<MatrixType> U_12(A, block_range, remainder_range);
104  viennacl::matrix_range<MatrixType> A_22(A, remainder_range, remainder_range);
105 
106  A_22 -= viennacl::linalg::prod(L_21, U_12);
107  }
108  }
109 
110 }
111 
112 
117 template<typename NumericT>
119 {
120  typedef matrix<NumericT, viennacl::column_major> MatrixType;
121 
122  vcl_size_t max_block_size = 32;
123  vcl_size_t num_blocks = (A.size1() - 1) / max_block_size + 1;
124  std::vector<NumericT> temp_buffer(A.internal_size1() * max_block_size);
125 
126  // Iterate over panels
127  for (vcl_size_t panel_id = 0; panel_id < num_blocks; ++panel_id)
128  {
129  vcl_size_t col_start = panel_id * max_block_size;
130  vcl_size_t current_block_size = std::min<vcl_size_t>(A.size1() - col_start, max_block_size);
131 
132  viennacl::range block_range(col_start, col_start + current_block_size);
133  viennacl::range remainder_range(col_start + current_block_size, A.size1());
134 
135  //
136  // Perform LU factorization on panel:
137  //
138 
139 
140  // Read from matrix to buffer:
142  sizeof(NumericT) * col_start * A.internal_size1(),
143  sizeof(NumericT) * current_block_size * A.internal_size1(),
144  &(temp_buffer[0]));
145 
146  // Factorize (kji-version):
147  for (vcl_size_t k=0; k < current_block_size; ++k)
148  {
149  NumericT a_kk = temp_buffer[col_start + k + k * A.internal_size1()];
150  for (vcl_size_t i=col_start+k+1; i < A.size1(); ++i)
151  temp_buffer[i + k * A.internal_size1()] /= a_kk; // write l_ik
152 
153  for (vcl_size_t j=k+1; j < current_block_size; ++j)
154  {
155  NumericT a_kj = temp_buffer[col_start + k + j * A.internal_size1()];
156  for (vcl_size_t i=col_start+k+1; i < A.size1(); ++i)
157  temp_buffer[i + j * A.internal_size1()] -= temp_buffer[i + k * A.internal_size1()] * a_kj; // l_ik * a_kj
158  }
159  }
160 
161  // Write back:
163  sizeof(NumericT) * col_start * A.internal_size1(),
164  sizeof(NumericT) * current_block_size * A.internal_size1(),
165  &(temp_buffer[0]));
166 
167  if (remainder_range.size() > 0)
168  {
169  //
170  // Compute U_12:
171  //
172  viennacl::matrix_range<MatrixType> L_11(A, block_range, block_range);
173  viennacl::matrix_range<MatrixType> A_12(A, block_range, remainder_range);
175 
176  //
177  // Update remainder of A
178  //
179  viennacl::matrix_range<MatrixType> L_21(A, remainder_range, block_range);
180  viennacl::matrix_range<MatrixType> U_12(A, block_range, remainder_range);
181  viennacl::matrix_range<MatrixType> A_22(A, remainder_range, remainder_range);
182 
183  A_22 -= viennacl::linalg::prod(L_21, U_12);
184  }
185 
186  }
187 
188 }
189 
190 
191 //
192 // Convenience layer:
193 //
194 
200 template<typename NumericT, typename F1, typename F2, unsigned int AlignmentV1, unsigned int AlignmentV2>
203 {
204  assert(A.size1() == A.size2() && bool("Matrix must be square"));
205  assert(A.size1() == B.size1() && bool("Matrix must be square"));
206  inplace_solve(A, B, unit_lower_tag());
207  inplace_solve(A, B, upper_tag());
208 }
209 
215 template<typename NumericT, typename F, unsigned int MatAlignmentV, unsigned int VecAlignmentV>
218 {
219  assert(A.size1() == A.size2() && bool("Matrix must be square"));
220  inplace_solve(A, vec, unit_lower_tag());
221  inplace_solve(A, vec, upper_tag());
222 }
223 
224 }
225 }
226 
227 #endif
void inplace_solve(const matrix_base< NumericT > &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
Definition: memory.hpp:220
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
void lu_substitute(matrix< NumericT, F1, AlignmentV1 > const &A, matrix< NumericT, F2, AlignmentV2 > &B)
LU substitution for the system LU = rhs.
Definition: lu.hpp:201
A tag class representing a lower triangular matrix.
Definition: forwards.h:849
A dense matrix class.
Definition: forwards.h:375
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
float NumericT
Definition: bisect.cpp:40
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
A tag class representing an upper triangular matrix.
Definition: forwards.h:854
std::size_t vcl_size_t
Definition: forwards.h:75
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:226
handle_type & handle()
Returns the OpenCL handle, non-const-version.
Definition: matrix_def.hpp:244
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:224
Implementations of dense direct solvers are found here.
Proxy classes for matrices.
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:424
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:859
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
Class for representing non-strided submatrices of a bigger matrix A.
Definition: forwards.h:440
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238
void lu_factorize(matrix< NumericT, viennacl::row_major > &A)
LU factorization of a row-major dense matrix.
Definition: lu.hpp:42