ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sliced_ell_matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_SLICED_ELL_MATRIX_HPP_
2 #define VIENNACL_SLICED_ELL_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
28 #include "viennacl/forwards.h"
29 #include "viennacl/vector.hpp"
30 
31 #include "viennacl/tools/tools.hpp"
32 
34 
35 namespace viennacl
36 {
45 template<typename ScalarT, typename IndexT /* see forwards.h = unsigned int */>
46 class sliced_ell_matrix
47 {
48 public:
52 
53  explicit sliced_ell_matrix() : rows_(0), cols_(0), rows_per_block_(0) {}
54 
60  size_type num_cols,
61  size_type num_rows_per_block_ = 0)
62  : rows_(num_rows),
63  cols_(num_cols),
64  rows_per_block_(num_rows_per_block_) {}
65 
66  explicit sliced_ell_matrix(viennacl::context ctx) : rows_(0), cols_(0), rows_per_block_(0)
67  {
68  columns_per_block_.switch_active_handle_id(ctx.memory_type());
69  column_indices_.switch_active_handle_id(ctx.memory_type());
70  block_start_.switch_active_handle_id(ctx.memory_type());
71  elements_.switch_active_handle_id(ctx.memory_type());
72 
73 #ifdef VIENNACL_WITH_OPENCL
74  if (ctx.memory_type() == OPENCL_MEMORY)
75  {
76  columns_per_block_.opencl_handle().context(ctx.opencl_context());
77  column_indices_.opencl_handle().context(ctx.opencl_context());
78  block_start_.opencl_handle().context(ctx.opencl_context());
79  elements_.opencl_handle().context(ctx.opencl_context());
80  }
81 #endif
82  }
83 
85  void clear()
86  {
87  viennacl::backend::typesafe_host_array<IndexT> host_columns_per_block_buffer(columns_per_block_, rows_ / rows_per_block_ + 1);
88  viennacl::backend::typesafe_host_array<IndexT> host_column_buffer(column_indices_, internal_size1());
89  viennacl::backend::typesafe_host_array<IndexT> host_block_start_buffer(block_start_, (rows_ - 1) / rows_per_block_ + 1);
90  std::vector<ScalarT> host_elements(1);
91 
92  viennacl::backend::memory_create(columns_per_block_, host_columns_per_block_buffer.element_size() * (rows_ / rows_per_block_ + 1), viennacl::traits::context(columns_per_block_), host_columns_per_block_buffer.get());
93  viennacl::backend::memory_create(column_indices_, host_column_buffer.element_size() * internal_size1(), viennacl::traits::context(column_indices_), host_column_buffer.get());
94  viennacl::backend::memory_create(block_start_, host_block_start_buffer.element_size() * ((rows_ - 1) / rows_per_block_ + 1), viennacl::traits::context(block_start_), host_block_start_buffer.get());
95  viennacl::backend::memory_create(elements_, sizeof(ScalarT) * 1, viennacl::traits::context(elements_), &(host_elements[0]));
96  }
97 
98  vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, rows_per_block_); }
99  vcl_size_t internal_size2() const { return cols_; }
100 
101  vcl_size_t size1() const { return rows_; }
102  vcl_size_t size2() const { return cols_; }
103 
104  vcl_size_t rows_per_block() const { return rows_per_block_; }
105 
106  //vcl_size_t nnz() const { return rows_ * maxnnz_; }
107  //vcl_size_t internal_nnz() const { return internal_size1() * internal_maxnnz(); }
108 
109  handle_type & handle1() { return columns_per_block_; }
110  const handle_type & handle1() const { return columns_per_block_; }
111 
112  handle_type & handle2() { return column_indices_; }
113  const handle_type & handle2() const { return column_indices_; }
114 
115  handle_type & handle3() { return block_start_; }
116  const handle_type & handle3() const { return block_start_; }
117 
118  handle_type & handle() { return elements_; }
119  const handle_type & handle() const { return elements_; }
120 
121 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
122  template<typename CPUMatrixT>
123  friend void copy(CPUMatrixT const & cpu_matrix, sliced_ell_matrix & gpu_matrix );
124 #else
125  template<typename CPUMatrixT, typename ScalarT2, typename IndexT2>
126  friend void copy(CPUMatrixT const & cpu_matrix, sliced_ell_matrix<ScalarT2, IndexT2> & gpu_matrix );
127 #endif
128 
129 private:
130  vcl_size_t rows_;
131  vcl_size_t cols_;
132  vcl_size_t rows_per_block_; //parameter C in the paper by Kreutzer et al.
133 
134  handle_type columns_per_block_;
135  handle_type column_indices_;
136  handle_type block_start_;
137  handle_type elements_;
138 };
139 
140 template<typename CPUMatrixT, typename ScalarT, typename IndexT>
141 void copy(CPUMatrixT const & cpu_matrix, sliced_ell_matrix<ScalarT, IndexT> & gpu_matrix )
142 {
143  assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
144  assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
145 
146  if (gpu_matrix.rows_per_block() == 0) // not yet initialized by user. Set default: 32 is perfect for NVIDIA GPUs and older AMD GPUs. Still okay for newer AMD GPUs.
147  gpu_matrix.rows_per_block_ = 32;
148 
149  if (viennacl::traits::size1(cpu_matrix) > 0 && viennacl::traits::size2(cpu_matrix) > 0)
150  {
151  //determine max capacity for row
152  IndexT columns_in_current_block = 0;
153  vcl_size_t total_element_buffer_size = 0;
154  viennacl::backend::typesafe_host_array<IndexT> columns_in_block_buffer(gpu_matrix.handle1(), (viennacl::traits::size1(cpu_matrix) - 1) / gpu_matrix.rows_per_block() + 1);
155  for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
156  {
157  vcl_size_t entries_in_row = 0;
158  for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
159  ++entries_in_row;
160 
161  columns_in_current_block = std::max(columns_in_current_block, static_cast<IndexT>(entries_in_row));
162 
163  // check for end of block
164  if ( (row_it.index1() % gpu_matrix.rows_per_block() == gpu_matrix.rows_per_block() - 1)
165  || row_it.index1() == viennacl::traits::size1(cpu_matrix) - 1)
166  {
167  total_element_buffer_size += columns_in_current_block * gpu_matrix.rows_per_block();
168  columns_in_block_buffer.set(row_it.index1() / gpu_matrix.rows_per_block(), columns_in_current_block);
169  columns_in_current_block = 0;
170  }
171  }
172 
173  //setup GPU matrix
174  gpu_matrix.rows_ = cpu_matrix.size1();
175  gpu_matrix.cols_ = cpu_matrix.size2();
176 
177  viennacl::backend::typesafe_host_array<IndexT> coords(gpu_matrix.handle2(), total_element_buffer_size);
178  viennacl::backend::typesafe_host_array<IndexT> block_start(gpu_matrix.handle3(), (viennacl::traits::size1(cpu_matrix) - 1) / gpu_matrix.rows_per_block() + 1);
179  std::vector<ScalarT> elements(total_element_buffer_size, 0);
180 
181  vcl_size_t block_offset = 0;
182  vcl_size_t block_index = 0;
183  vcl_size_t row_in_block = 0;
184  for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
185  {
186  vcl_size_t entry_in_row = 0;
187 
188  for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
189  {
190  vcl_size_t buffer_index = block_offset + entry_in_row * gpu_matrix.rows_per_block() + row_in_block;
191  coords.set(buffer_index, col_it.index2());
192  elements[buffer_index] = *col_it;
193  entry_in_row++;
194  }
195 
196  ++row_in_block;
197 
198  // check for end of block:
199  if ( (row_it.index1() % gpu_matrix.rows_per_block() == gpu_matrix.rows_per_block() - 1)
200  || row_it.index1() == viennacl::traits::size1(cpu_matrix) - 1)
201  {
202  block_start.set(block_index, static_cast<IndexT>(block_offset));
203  block_offset += columns_in_block_buffer[block_index] * gpu_matrix.rows_per_block();
204  ++block_index;
205  row_in_block = 0;
206  }
207  }
208 
209  viennacl::backend::memory_create(gpu_matrix.handle1(), columns_in_block_buffer.raw_size(), traits::context(gpu_matrix.handle1()), columns_in_block_buffer.get());
210  viennacl::backend::memory_create(gpu_matrix.handle2(), coords.raw_size(), traits::context(gpu_matrix.handle2()), coords.get());
211  viennacl::backend::memory_create(gpu_matrix.handle3(), block_start.raw_size(), traits::context(gpu_matrix.handle3()), block_start.get());
212  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(ScalarT) * elements.size(), traits::context(gpu_matrix.handle()), &(elements[0]));
213  }
214 }
215 
216 
217 
223 template<typename IndexT, typename NumericT, typename IndexT2>
224 void copy(std::vector< std::map<IndexT, NumericT> > const & cpu_matrix,
226 {
227  vcl_size_t max_col = 0;
228  for (vcl_size_t i=0; i<cpu_matrix.size(); ++i)
229  {
230  if (cpu_matrix[i].size() > 0)
231  max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first);
232  }
233 
234  viennacl::copy(tools::const_sparse_matrix_adapter<NumericT, IndexT>(cpu_matrix, cpu_matrix.size(), max_col + 1), gpu_matrix);
235 }
236 
237 
238 /*
239 template<typename CPUMatrixT, typename ScalarT, typename IndexT>
240 void copy(sliced_ell_matrix<ScalarT, IndexT> const & gpu_matrix, CPUMatrixT & cpu_matrix )
241 {
242  assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
243  assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
244 
245  if (gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0)
246  {
247  std::vector<NumericT> elements(gpu_matrix.internal_nnz());
248  viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), gpu_matrix.internal_nnz());
249 
250  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * elements.size(), &(elements[0]));
251  viennacl::backend::memory_read(gpu_matrix.handle2(), 0, coords.raw_size(), coords.get());
252 
253  for (vcl_size_t row = 0; row < gpu_matrix.size1(); row++)
254  {
255  for (vcl_size_t ind = 0; ind < gpu_matrix.internal_maxnnz(); ind++)
256  {
257  vcl_size_t offset = gpu_matrix.internal_size1() * ind + row;
258 
259  if (elements[offset] == static_cast<NumericT>(0.0))
260  continue;
261 
262  if (coords[offset] >= gpu_matrix.size2())
263  {
264  std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << coords[offset] << " " << gpu_matrix.size2() << std::endl;
265  return;
266  }
267 
268  cpu_matrix(row, coords[offset]) = elements[offset];
269  }
270  }
271  }
272 } */
273 
274 
275 //
276 // Specify available operations:
277 //
278 
281 namespace linalg
282 {
283 namespace detail
284 {
285  // x = A * y
286  template<typename ScalarT, typename IndexT>
287  struct op_executor<vector_base<ScalarT>, op_assign, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> >
288  {
289  static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> const & rhs)
290  {
291  // check for the special case x = A * x
292  if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
293  {
294  viennacl::vector<ScalarT> temp(lhs);
295  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), ScalarT(1), temp, ScalarT(0));
296  lhs = temp;
297  }
298  else
299  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), ScalarT(1), lhs, ScalarT(0));
300  }
301  };
302 
303  template<typename ScalarT, typename IndexT>
304  struct op_executor<vector_base<ScalarT>, op_inplace_add, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> >
305  {
306  static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> const & rhs)
307  {
308  // check for the special case x += A * x
309  if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
310  {
311  viennacl::vector<ScalarT> temp(lhs);
312  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), ScalarT(1), temp, ScalarT(0));
313  lhs += temp;
314  }
315  else
316  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), ScalarT(1), lhs, ScalarT(1));
317  }
318  };
319 
320  template<typename ScalarT, typename IndexT>
321  struct op_executor<vector_base<ScalarT>, op_inplace_sub, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> >
322  {
323  static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> const & rhs)
324  {
325  // check for the special case x -= A * x
326  if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
327  {
328  viennacl::vector<ScalarT> temp(lhs);
329  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), ScalarT(1), temp, ScalarT(0));
330  lhs -= temp;
331  }
332  else
333  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), ScalarT(-1), lhs, ScalarT(1));
334  }
335  };
336 
337 
338  // x = A * vec_op
339  template<typename ScalarT, typename IndexT, typename LHS, typename RHS, typename OP>
340  struct op_executor<vector_base<ScalarT>, op_assign, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
341  {
342  static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
343  {
345  viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
346  }
347  };
348 
349  // x = A * vec_op
350  template<typename ScalarT, typename IndexT, typename LHS, typename RHS, typename OP>
351  struct op_executor<vector_base<ScalarT>, op_inplace_add, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
352  {
353  static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
354  {
356  viennacl::vector<ScalarT> temp_result(lhs);
357  viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
358  lhs += temp_result;
359  }
360  };
361 
362  // x = A * vec_op
363  template<typename ScalarT, typename IndexT, typename LHS, typename RHS, typename OP>
364  struct op_executor<vector_base<ScalarT>, op_inplace_sub, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
365  {
366  static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
367  {
369  viennacl::vector<ScalarT> temp_result(lhs);
370  viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
371  lhs -= temp_result;
372  }
373  };
374 
375 } // namespace detail
376 } // namespace linalg
377 
379 }
380 
381 #endif
382 
383 
void clear()
Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity p...
const handle_type & handle3() const
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:92
vcl_size_t element_size() const
Definition: util.hpp:112
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227
const handle_type & handle1() const
Various little tools used here and there in ViennaCL.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
const handle_type & handle2() const
This file provides the forward declarations for the main types used within ViennaCL.
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
vcl_size_t rows_per_block() const
friend void copy(CPUMatrixT const &cpu_matrix, sliced_ell_matrix< ScalarT2, IndexT2 > &gpu_matrix)
scalar< typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT< ScalarT >::ResultType > value_type
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
sliced_ell_matrix(viennacl::context ctx)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:403
Implementations of operations using sparse matrices.
Adapts a constant sparse matrix type made up from std::vector > to basic ub...
Definition: adapter.hpp:183
std::size_t vcl_size_t
Definition: forwards.h:75
sliced_ell_matrix(size_type num_rows, size_type num_cols, size_type num_rows_per_block_=0)
Standard constructor for setting the row and column sizes as well as the block size.
viennacl::memory_types memory_type() const
Definition: context.hpp:76
const handle_type & handle() const
vcl_size_t internal_size2() const
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:121
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void set(vcl_size_t index, U value)
Definition: util.hpp:115
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
vcl_size_t internal_size1() const
viennacl::backend::mem_handle handle_type