ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_MATRIX_HPP_
2 #define VIENNACL_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
27 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
36 
37 namespace viennacl
38 {
39 
40 //#ifdef VIENNACL_WITH_OPENCL
41 // template<class NumericT, class DISTRIBUTION>
42 // rand::random_matrix_t<NumericT, DISTRIBUTION> random_matrix(unsigned int size1, unsigned int size2, DISTRIBUTION const & distribution){
43 // return rand::random_matrix_t<NumericT,DISTRIBUTION>(size1,size2,distribution);
44 // }
45 //#endif
46 
53 template<typename LHS, typename RHS, typename OP>
54 class matrix_expression
55 {
56  typedef typename viennacl::result_of::reference_if_nonscalar<LHS>::type lhs_reference_type;
57  typedef typename viennacl::result_of::reference_if_nonscalar<RHS>::type rhs_reference_type;
58 
59 public:
61 
62  matrix_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {}
63 
66  LHS & lhs() const { return lhs_; }
69  RHS & rhs() const { return rhs_; }
70 
74 
75 private:
77  lhs_reference_type lhs_;
79  rhs_reference_type rhs_;
80 };
81 
82 
84 struct row_iteration {};
85 
87 struct col_iteration {};
88 
89 //STL-like iterator. TODO: STL-compliance...
91 template<typename ROWCOL, typename MatrixT>
92 class matrix_iterator
93 {
94  typedef matrix_iterator<ROWCOL, MatrixT> self_type;
95 public:
96  typedef typename MatrixT::value_type value_type;
97 
98  matrix_iterator(MatrixT & mat,
99  vcl_size_t start_row,
100  vcl_size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {}
101 
102  value_type operator*(void) { return mat_(row_, col_); }
104  self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
105 
106  bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); }
107  bool operator!=(self_type const & other) { return !(*this == other); }
108 
109  vcl_size_t index1() { return row_; }
110  vcl_size_t index2() { return col_; }
111 
112  MatrixT & operator()(void) const { return mat_; }
113 
114 private:
115  MatrixT & mat_;
116  vcl_size_t row_;
117  vcl_size_t col_;
118 };
119 
126 template<class NumericT, typename SizeT, typename DistanceT>
128  : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1),
129  internal_size1_(viennacl::tools::align_to_multiple<size_type>(rows, dense_padding_size)),
130  internal_size2_(viennacl::tools::align_to_multiple<size_type>(columns, dense_padding_size)),
131  row_major_fixed_(true), row_major_(is_row_major)
132 {
133  if (rows > 0 && columns > 0)
134  {
135  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx);
136  clear();
137  }
138 }
139 
142 template<class NumericT, typename SizeT, typename DistanceT>
143 template<typename LHS, typename RHS, typename OP>
145  size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1),
146  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),
147  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),
148  row_major_fixed_(true), row_major_(viennacl::traits::row_major(proxy))
149 {
151  if (internal_size() > 0)
152  {
154  clear();
155  self_type::operator=(proxy);
156  }
157 }
158 
159 // CUDA or host memory:
160 template<class NumericT, typename SizeT, typename DistanceT>
162  size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1,
163  size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2,
164  bool is_row_major)
165  : size1_(mat_size1), size2_(mat_size2),
166  start1_(mat_start1), start2_(mat_start2),
167  stride1_(mat_stride1), stride2_(mat_stride2),
168  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),
169  row_major_fixed_(true), row_major_(is_row_major)
170 {
171  if (mem_type == viennacl::CUDA_MEMORY)
172  {
173 #ifdef VIENNACL_WITH_CUDA
175  elements_.cuda_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
176  elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
177 #else
179 #endif
180  }
181  else if (mem_type == viennacl::MAIN_MEMORY)
182  {
184  elements_.ram_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
185  elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
186  }
187 
188  elements_.raw_size(sizeof(NumericT) * internal_size());
189 }
190 
191 #ifdef VIENNACL_WITH_OPENCL
192 template<class NumericT, typename SizeT, typename DistanceT>
193 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(cl_mem mem, size_type rows, size_type columns, bool is_row_major, viennacl::context ctx)
194  : size1_(rows), size2_(columns),
195  start1_(0), start2_(0),
196  stride1_(1), stride2_(1),
197  internal_size1_(rows), internal_size2_(columns),
198  row_major_fixed_(true), row_major_(is_row_major)
199 {
201  elements_.opencl_handle() = mem;
202  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
203  elements_.opencl_handle().context(ctx.opencl_context());
204  elements_.raw_size(sizeof(NumericT)*internal_size());
205 }
206 
207 template<class NumericT, typename SizeT, typename DistanceT>
209  size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1,
210  size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2,
211  bool is_row_major)
212  : size1_(mat_size1), size2_(mat_size2),
213  start1_(mat_start1), start2_(mat_start2),
214  stride1_(mat_stride1), stride2_(mat_stride2),
215  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),
216  row_major_fixed_(true), row_major_(is_row_major)
217 {
219  elements_.opencl_handle() = mem;
220  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
221  elements_.opencl_handle().context(ctx.opencl_context());
222  elements_.raw_size(sizeof(NumericT)*internal_size());
223 }
224 #endif
225 
226 // Copy CTOR
227 template<class NumericT, typename SizeT, typename DistanceT>
229  size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1),
230  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),
231  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),
232  row_major_fixed_(true), row_major_(other.row_major())
233 {
235  if (internal_size() > 0)
236  {
238  clear();
239  self_type::operator=(other);
240  }
241 }
242 
243 // Conversion CTOR
244 template<typename NumericT, typename SizeT, typename DistanceT>
245 template<typename OtherNumericT>
247  size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1),
248  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),
249  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),
250  row_major_fixed_(true), row_major_(other.row_major())
251 {
253  if (internal_size() > 0)
254  {
256  clear();
257  self_type::operator=(other);
258  }
259 }
260 
261 template<class NumericT, typename SizeT, typename DistanceT>
263 {
264  if (&other==this)
265  return *this;
266 
267  if (internal_size() == 0)
268  {
269  if (other.internal_size() == 0)
270  return *this;
271  if (!row_major_fixed_)
272  row_major_ = other.row_major();
273  resize(other.size1(), other.size2(), false);
274  }
275 
276  viennacl::linalg::am(*this,
277  other, cpu_value_type(1.0), 1, false, false);
278  return *this;
279 }
280 
281 // Conversion assignment
282 template<class NumericT, typename SizeT, typename DistanceT>
283 template<typename OtherNumericT>
285 {
286  if (internal_size() == 0)
287  {
288  if (other.internal_size() == 0)
289  return *this;
290  if (!row_major_fixed_)
291  row_major_ = other.row_major();
292  resize(other.size1(), other.size2(), false);
293  }
294 
295  viennacl::linalg::convert(*this, other);
296  return *this;
297 }
298 
303 template<class NumericT, typename SizeT, typename DistanceT>
304 template<typename LHS, typename RHS, typename OP>
306 {
307  assert( (viennacl::traits::size1(proxy) == size1() || size1() == 0)
308  && (viennacl::traits::size2(proxy) == size2() || size2() == 0)
309  && bool("Incompatible matrix sizes!"));
310  if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0)
311  {
312  size1_ = viennacl::traits::size1(proxy);
313  size2_ = viennacl::traits::size2(proxy);
314  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
315  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
316  if (!row_major_fixed_)
317  row_major_ = viennacl::traits::row_major(proxy);
319  if (size1_ != internal_size1_ || size2_ != internal_size2_)
320  clear();
321  }
322 
323  if (internal_size() > 0)
325 
326  return *this;
327 }
328 
329 
330 // A = trans(B)
331 template<class NumericT, typename SizeT, typename DistanceT>
333 {
334  if ( internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0 )
335  {
336  size1_ = viennacl::traits::size1(proxy);
337  size2_ = viennacl::traits::size2(proxy);
338  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
339  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
340  if (!row_major_fixed_)
341  row_major_ = viennacl::traits::row_major(proxy);
342  }
343 
344  if ( handle() == proxy.lhs().handle() )
345  {
346  viennacl::matrix_base<NumericT> temp(proxy.lhs().size2(), proxy.lhs().size1(),proxy.lhs().row_major());
347  viennacl::linalg::trans(proxy, temp);
348  if ( proxy.lhs().size1() != proxy.lhs().size2() )
349  this->resize(proxy.lhs().size2(), proxy.lhs().size1());
350  elements_ = temp.handle();
351  }
352  else
353  {
354  if ( proxy.lhs().size1() != proxy.lhs().size2() )
355  this->resize(proxy.lhs().size2(), proxy.lhs().size1(), false);
356  viennacl::linalg::trans(proxy, *this);
357  }
358  return *this;
359 }
360 
361 template<class NumericT, typename SizeT, typename DistanceT>
362 template<typename LHS, typename RHS, typename OP>
364 {
365  assert( (viennacl::traits::size1(proxy) == size1())
366  && (viennacl::traits::size2(proxy) == size2())
367  && bool("Incompatible matrix sizes!"));
368  assert( (size1() > 0) && bool("Vector not yet initialized!") );
369  assert( (size2() > 0) && bool("Vector not yet initialized!") );
370 
372 
373  return *this;
374 }
375 
376 template<class NumericT, typename SizeT, typename DistanceT>
377 template<typename LHS, typename RHS, typename OP>
379 {
380  assert( (viennacl::traits::size1(proxy) == size1())
381  && (viennacl::traits::size2(proxy) == size2())
382  && bool("Incompatible matrix sizes!"));
383  assert( (size1() > 0) && bool("Vector not yet initialized!") );
384  assert( (size2() > 0) && bool("Vector not yet initialized!") );
385 
387 
388  return *this;
389 }
390 
392 template<class NumericT, typename SizeT, typename DistanceT>
394 {
395  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
396  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
397 
398  if (internal_size() == 0)
399  {
400  size1_ = m.size1();
401  size2_ = m.size2();
402  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
403  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
404  if (internal_size() > 0)
405  {
407  clear();
408  }
409  }
410  else
412 
413  if (internal_size() > 0)
415 
416  return *this;
417 }
418 
420 template<class NumericT, typename SizeT, typename DistanceT>
422 {
423  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
424  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
425 
426  if (internal_size() == 0)
427  {
428  size1_ = m.size1();
429  size2_ = m.size2();
430  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
431  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
432  if (internal_size() > 0)
433  {
435  clear();
436  }
437  }
438  else
440 
441  return *this;
442 }
443 
445 template<class NumericT, typename SizeT, typename DistanceT>
447 {
448  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
449  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
450 
451  if (internal_size() == 0)
452  {
453  size1_ = m.size1();
454  size2_ = m.size2();
455  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
456  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
457  if (internal_size() > 0)
458  {
460  clear();
461  }
462  }
463 
464  if (internal_size() > 0)
465  {
466  viennacl::linalg::matrix_assign(*this, m(0,0));
467  }
468 
469  return *this;
470 }
471 
472 
473 //read-write access to an element of the matrix/matrix_range/matrix_slice
476 template<class NumericT, typename SizeT, typename DistanceT>
478 {
479  if (row_major_)
480  return entry_proxy<NumericT>(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
481  return entry_proxy<NumericT>(column_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
482 }
483 
486 template<class NumericT, typename SizeT, typename DistanceT>
488 {
489  if (row_major_)
490  return const_entry_proxy<NumericT>(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
491  return const_entry_proxy<NumericT>(column_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
492 }
493 
494 //
495 // Operator overloads for enabling implicit conversions:
496 //
497 template<class NumericT, typename SizeT, typename DistanceT>
499 {
501  *this, NumericT(1.0), 1, false, false,
502  other, NumericT(1.0), 1, false, false);
503  return *this;
504 }
505 
506 template<class NumericT, typename SizeT, typename DistanceT>
508 {
510  *this, NumericT(1.0), 1, false, false,
511  other, NumericT(1.0), 1, false, true);
512  return *this;
513 }
514 
516 template<class NumericT, typename SizeT, typename DistanceT>
518 {
519  viennacl::linalg::am(*this,
520  *this, NumericT(val), 1, false, false);
521  return *this;
522 }
523 
525 template<class NumericT, typename SizeT, typename DistanceT>
527 {
528  viennacl::linalg::am(*this,
529  *this, NumericT(val), 1, false, false);
530  return *this;
531 }
532 
534 template<class NumericT, typename SizeT, typename DistanceT>
536 {
537  viennacl::linalg::am(*this,
538  *this, NumericT(val), 1, false, false);
539  return *this;
540 }
541 
543 template<class NumericT, typename SizeT, typename DistanceT>
545 {
546  viennacl::linalg::am(*this,
547  *this, NumericT(val), 1, false, false);
548  return *this;
549 }
550 
552 template<class NumericT, typename SizeT, typename DistanceT>
554 {
555  viennacl::linalg::am(*this,
556  *this, NumericT(val), 1, false, false);
557  return *this;
558 }
559 
561 template<class NumericT, typename SizeT, typename DistanceT>
563 {
564  viennacl::linalg::am(*this,
565  *this, NumericT(val), 1, false, false);
566  return *this;
567 }
568 
569 
570 
572 template<class NumericT, typename SizeT, typename DistanceT>
574 {
575  viennacl::linalg::am(*this,
576  *this, NumericT(val), 1, true, false);
577  return *this;
578 }
579 
581 template<class NumericT, typename SizeT, typename DistanceT>
583 {
584  viennacl::linalg::am(*this,
585  *this, NumericT(val), 1, true, false);
586  return *this;
587 }
588 
590 template<class NumericT, typename SizeT, typename DistanceT>
592 {
593  viennacl::linalg::am(*this,
594  *this, NumericT(val), 1, true, false);
595  return *this;
596 }
597 
599 template<class NumericT, typename SizeT, typename DistanceT>
601 {
602  viennacl::linalg::am(*this,
603  *this, NumericT(val), 1, true, false);
604  return *this;
605 }
606 
608 template<class NumericT, typename SizeT, typename DistanceT>
610 {
611  viennacl::linalg::am(*this,
612  *this, NumericT(val), 1, true, false);
613  return *this;
614 }
615 
617 template<class NumericT, typename SizeT, typename DistanceT>
619 {
620  viennacl::linalg::am(*this,
621  *this, NumericT(val), 1, true, false);
622  return *this;
623 }
624 
625 
627 template<class NumericT, typename SizeT, typename DistanceT>
629 {
631 }
632 
633 template<class NumericT, typename SizeT, typename DistanceT>
635 
636 
637 template<class NumericT, typename SizeT, typename DistanceT>
639 {
640  assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!"));
641 
642  if (preserve && internal_size() > 0)
643  {
644  //get old entries:
645  std::vector< NumericT > old_entries(internal_size());
646  viennacl::backend::memory_read(elements_, 0, sizeof(NumericT)*internal_size(), &(old_entries[0]));
647 
648  //set up entries of new matrix:
649  std::vector< NumericT > new_entries( viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size)
650  * viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size));
651  for (size_type i=0; i<rows; ++i)
652  {
653  if (i >= size1_)
654  continue;
655 
656  for (size_type j=0; j<columns; ++j)
657  {
658  if (j >= size2_)
659  continue;
660  if (row_major_)
661  new_entries[row_major::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size), viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size))]
662  = old_entries[row_major::mem_index(i, j, internal_size1(), internal_size2())];
663  else
664  new_entries[column_major::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size), viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size))]
665  = old_entries[column_major::mem_index(i, j, internal_size1(), internal_size2())];
666  }
667  }
668 
669  //copy new entries to GPU:
670  size1_ = rows;
671  size2_ = columns;
672  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
673  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
674  viennacl::backend::memory_create(elements_, sizeof(NumericT)*new_entries.size(), viennacl::traits::context(elements_), &(new_entries[0]));
675  }
676  else //discard old entries:
677  {
678  size1_ = rows;
679  size2_ = columns;
680  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
681  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
682 
684  clear();
685  }
686 }
687 
688 
695 template<class NumericT, typename F, unsigned int AlignmentV>
696 class matrix : public matrix_base<NumericT>
697 {
698  typedef matrix<NumericT, F, AlignmentV> self_type;
699  typedef matrix_base<NumericT> base_type;
700 public:
701  typedef typename base_type::size_type size_type;
702 
704  explicit matrix() : base_type(static_cast<bool>(viennacl::is_row_major<F>::value)) {}
705 
712  explicit matrix(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : base_type(rows, columns, viennacl::is_row_major<F>::value, ctx) {}
713 
721  explicit matrix(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type cols)
722  : base_type(ptr_to_mem, mem_type,
723  rows, 0, 1, rows,
724  cols, 0, 1, cols,
725  viennacl::is_row_major<F>::value) {}
726 
727 
737  explicit matrix(NumericT * ptr_to_mem, viennacl::memory_types mem_type,
738  size_type rows, size_type internal_row_count,
739  size_type cols, size_type internal_col_count)
740  : base_type(ptr_to_mem, mem_type,
741  rows, 0, 1, internal_row_count,
742  cols, 0, 1, internal_col_count,
743  true, viennacl::is_row_major<F>::value) {}
744 
745 #ifdef VIENNACL_WITH_OPENCL
746  explicit matrix(cl_mem mem, size_type rows, size_type columns) : base_type(mem, rows, columns, viennacl::is_row_major<F>::value) {}
747 #endif
748 
749  template<typename LHS, typename RHS, typename OP>
751 
753  matrix(identity_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
754  {
755  if (base_type::internal_size() > 0)
757  }
758 
760  matrix(zero_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
761  {
762  if (base_type::internal_size() > 0)
764  }
765 
767  matrix(scalar_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
768  {
769  if (base_type::internal_size() > 0)
771  }
772 
773  matrix(const base_type & other) : base_type(other.size1(), other.size2(), viennacl::is_row_major<F>::value, viennacl::traits::context(other))
774  {
775  base_type::operator=(other);
776  }
777 
778 
779  //copy constructor:
780  matrix(const self_type & other) : base_type(other.size1(), other.size2(), viennacl::is_row_major<F>::value, viennacl::traits::context(other))
781  {
782  base_type::operator=(other);
783  }
784 
785 
786  /*template<typename M1>
787  self_type & operator=(const matrix_expression< const M1, const M1, op_trans> & proxy)
788  {
789  self_type temp(proxy.lhs());
790  *this = trans(temp);
791  return *this;
792  }*/
793 
794  using base_type::operator=;
795 
796  // the following are needed for Visual Studio:
797  template<typename OtherNumericT, typename F2>
799 
800  template<typename OtherNumericT, typename F2>
802 
803  template<typename OtherNumericT, typename F2>
805 
813  void resize(size_type rows, size_type columns, bool preserve = true)
814  {
815  base_type::resize(rows, columns, preserve);
816  }
817 
818 }; //matrix
819 
820 
821 
827 template<class NumericT>
828 std::ostream & operator<<(std::ostream & s, const matrix_base<NumericT> & gpu_matrix)
829 {
830  typedef typename matrix_base<NumericT>::size_type size_type;
831 
832  std::vector<NumericT> tmp(gpu_matrix.internal_size());
833  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * gpu_matrix.internal_size(), &(tmp[0]));
834 
835  s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
836 
837  s << "(";
838  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
839  {
840  s << "(";
841  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
842  {
843  if (gpu_matrix.row_major())
844  s << tmp[row_major::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
845  else
846  s << tmp[column_major::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
847 
848  if (j < gpu_matrix.size2() - 1)
849  s << ",";
850  }
851  s << ")";
852  if (i < gpu_matrix.size1() - 1)
853  s << ",";
854  }
855  s << ")";
856  return s;
857 }
858 
864 template<typename LHS, typename RHS, typename OP>
865 std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
866 {
868 
869  matrix<ScalarType> temp = expr;
870  s << temp;
871  return s;
872 }
873 
875 template<typename NumericT>
876 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>
878 {
880 }
881 
883 template<typename LhsT, typename RhsT, typename OpT>
884 matrix_expression< const matrix_expression<const LhsT, const RhsT, OpT>, const matrix_expression<const LhsT, const RhsT, OpT>, op_trans>
886 {
889  op_trans>(proxy, proxy);
890 }
891 
892 //diag():
893 template<typename NumericT>
894 vector_expression< const matrix_base<NumericT>, const int, op_matrix_diag>
895 diag(const matrix_base<NumericT> & A, int k = 0)
896 {
898 }
899 
900 template<typename NumericT>
901 matrix_expression< const vector_base<NumericT>, const int, op_vector_diag>
902 diag(const vector_base<NumericT> & v, int k = 0)
903 {
905 }
906 
907 // row():
908 template<typename NumericT, typename F>
909 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>
910 row(const matrix_base<NumericT, F> & A, unsigned int i)
911 {
912  return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>(A, i);
913 }
914 
915 // column():
916 template<typename NumericT, typename F>
917 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>
918 column(const matrix_base<NumericT, F> & A, unsigned int j)
919 {
920  return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>(A, j);
921 }
922 
924 
925 //
926 //cpu to gpu, generic type:
927 //
933 template<typename CPUMatrixT, typename NumericT, typename F, unsigned int AlignmentV>
934 void copy(const CPUMatrixT & cpu_matrix,
935  matrix<NumericT, F, AlignmentV> & gpu_matrix )
936 {
937  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
938 
939  //std::cout << "Copying CPUMatrixT!" << std::endl;
940  //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
941  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
942  {
943  gpu_matrix.resize(cpu_matrix.size1(),
944  cpu_matrix.size2(), false);
945  }
946 
947  assert( (gpu_matrix.size1() == cpu_matrix.size1()) && (gpu_matrix.size2() == cpu_matrix.size2()) && bool("Matrix dimensions mismatch.") );
948 
949  std::vector<NumericT> data(gpu_matrix.internal_size());
950  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
951  {
952  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
953  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
954  }
955 
956  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
957  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
958  //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
959 }
960 
961 //
962 //cpu to gpu, STL type:
963 //
969 template<typename NumericT, typename A1, typename A2, typename F, unsigned int AlignmentV>
970 void copy(const std::vector< std::vector<NumericT, A1>, A2> & cpu_matrix,
971  matrix<NumericT, F, AlignmentV> & gpu_matrix )
972 {
973  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
974 
975  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
976  {
977  gpu_matrix.resize(cpu_matrix.size(),
978  cpu_matrix[0].size(),
979  false);
980  }
981 
982  assert( (gpu_matrix.size1() == cpu_matrix.size()) && bool("Matrix dimensions mismatch.") );
983 
984  std::vector<NumericT> data(gpu_matrix.internal_size());
985  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
986  {
987  assert( (gpu_matrix.size2() == cpu_matrix[i].size()) && bool("Matrix dimensions mismatch.") );
988 
989  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
990  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
991  }
992 
993  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
994  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
995 }
996 
997 
998 //
999 //cpu to gpu, another STL type:
1000 //
1009 template<typename NumericT, typename F, unsigned int AlignmentV>
1010 void fast_copy(NumericT * cpu_matrix_begin,
1011  NumericT * cpu_matrix_end,
1012  matrix<NumericT, F, AlignmentV> & gpu_matrix)
1013 {
1014  if (gpu_matrix.internal_size() == 0)
1015  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin);
1016  else
1017  {
1018  assert( (gpu_matrix.internal_size() >= static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin)) && bool("fast_copy(): Matrix not large enough to fit data!"));
1019  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin), cpu_matrix_begin);
1020  }
1021 }
1022 
1023 #ifdef VIENNACL_WITH_ARMADILLO
1024 
1029 template<typename NumericT, typename F, unsigned int AlignmentV>
1030 void copy(arma::Mat<NumericT> const & arma_matrix,
1032 {
1033  typedef typename viennacl::matrix<NumericT, F, AlignmentV>::size_type size_type;
1034 
1035  if (vcl_matrix.size1() == 0 || vcl_matrix.size2() == 0)
1036  {
1037  vcl_matrix.resize(arma_matrix.n_rows,
1038  arma_matrix.n_cols,
1039  false);
1040  }
1041  else
1042  {
1043  assert( (vcl_matrix.size1() == static_cast<vcl_size_t>(arma_matrix.n_rows))
1044  && (vcl_matrix.size2() == static_cast<vcl_size_t>(arma_matrix.n_cols))
1045  && bool("matrix size mismatch")
1046  );
1047  }
1048 
1049  // prepare buffer:
1051  for (size_type j = 0; j < vcl_matrix.size2(); ++j) // iterate along columns is certainly fast for arma_matrix
1052  for (size_type i = 0; i < vcl_matrix.size1(); ++i)
1053  data.set(F::mem_index(i, j, vcl_matrix.internal_size1(), vcl_matrix.internal_size2()), arma_matrix(i,j));
1054 
1055  // copy over:
1056  viennacl::backend::memory_write(vcl_matrix.handle(), 0, data.raw_size(), data.get());
1057 }
1058 #endif
1059 
1060 #ifdef VIENNACL_WITH_EIGEN
1061 namespace detail
1062 {
1063  template<typename EigenMatrixTypeT, typename NumericT, typename F, unsigned int AlignmentV>
1064  void copy_from_eigen_matrix(EigenMatrixTypeT const & cpu_matrix,
1066  {
1067  typedef typename viennacl::matrix<NumericT, F, AlignmentV>::size_type size_type;
1068 
1069  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1070  {
1071  gpu_matrix.resize(cpu_matrix.rows(),
1072  cpu_matrix.cols(),
1073  false);
1074  }
1075  else
1076  {
1077  assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))
1078  && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))
1079  && bool("matrix size mismatch")
1080  );
1081  }
1082 
1083  std::vector<NumericT> data(gpu_matrix.internal_size());
1084  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1085  {
1086  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1087  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
1088  }
1089 
1090  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
1091  }
1092 
1093 }
1094 
1100 template<typename NumericT, int EigenOptions, typename F, unsigned int AlignmentV>
1101 void copy(Eigen::Matrix<NumericT, Eigen::Dynamic, Eigen::Dynamic, EigenOptions> const & cpu_matrix,
1103 {
1104  detail::copy_from_eigen_matrix(cpu_matrix, vcl_matrix);
1105 }
1106 
1112 template<typename NumericT, int EigenOptions, int EigenMatTypeV, typename EigenStrideT, typename F, unsigned int AlignmentV>
1113 void copy(Eigen::Map<Eigen::Matrix<NumericT, Eigen::Dynamic, Eigen::Dynamic, EigenOptions>, EigenMatTypeV, EigenStrideT> const & cpu_matrix,
1115 {
1116  detail::copy_from_eigen_matrix(cpu_matrix, vcl_matrix);
1117 }
1118 #endif
1119 
1120 #ifdef VIENNACL_WITH_MTL4
1121 
1126 template<typename NumericT, typename T, typename F, unsigned int AlignmentV>
1127 void copy(const mtl::dense2D<NumericT, T>& cpu_matrix,
1128  matrix<NumericT, F, AlignmentV> & gpu_matrix)
1129 {
1130  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
1131 
1132  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1133  {
1134  gpu_matrix.resize(cpu_matrix.num_rows(),
1135  cpu_matrix.num_cols(),
1136  false);
1137  }
1138  else
1139  {
1140  assert( (gpu_matrix.size1() == cpu_matrix.num_rows())
1141  && (gpu_matrix.size2() == cpu_matrix.num_cols())
1142  && bool("matrix size mismatch")
1143  );
1144  }
1145 
1146  std::vector<NumericT> data(gpu_matrix.internal_size());
1147  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1148  {
1149  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1150  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
1151  }
1152 
1153  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
1154  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1155 }
1156 #endif
1157 
1158 
1159 
1160 
1161 //
1162 //gpu to cpu, generic type
1163 //
1169 template<typename CPUMatrixT, typename NumericT, typename F, unsigned int AlignmentV>
1170 void copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,
1171  CPUMatrixT & cpu_matrix )
1172 {
1173  typedef typename matrix<float, F, AlignmentV>::size_type size_type;
1174 
1175  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
1176  {
1177  assert( viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1() && bool("Matrix dimensions mismatch: rows"));
1178 
1179  std::vector<NumericT> temp_buffer(gpu_matrix.internal_size());
1180  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), &(temp_buffer[0]));
1181 
1182  //now copy entries to cpu_matrix:
1183  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1184  {
1185  assert( viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2() && bool("Matrix dimensions mismatch: columns"));
1186  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1187  cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
1188  }
1189  }
1190 }
1191 
1192 //gpu to cpu, STL type
1198 template<typename NumericT, typename A1, typename A2, typename F, unsigned int AlignmentV>
1199 void copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,
1200  std::vector< std::vector<NumericT, A1>, A2> & cpu_matrix)
1201 {
1202  typedef typename matrix<float, F, AlignmentV>::size_type size_type;
1203 
1204  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
1205  {
1206  assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Matrix dimensions mismatch: rows"));
1207 
1208  std::vector<NumericT> temp_buffer(gpu_matrix.internal_size());
1209  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), &(temp_buffer[0]));
1210 
1211  //now copy entries to cpu_matrix:
1212  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1213  {
1214  assert( (cpu_matrix[i].size() == gpu_matrix.size2()) && bool("Matrix dimensions mismatch: columns"));
1215 
1216  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1217  cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
1218  }
1219  }
1220 }
1221 
1222 //gpu to cpu, STL type
1230 template<typename NumericT, typename F, unsigned int AlignmentV>
1232  NumericT * cpu_matrix_begin)
1233 {
1234  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), cpu_matrix_begin);
1235 }
1236 
1237 
1238 
1240 
1241 
1242 // operator +
1244 template<typename LHS1, typename RHS1, typename OP1,
1245  typename LHS2, typename RHS2, typename OP2>
1246 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1247 const matrix_expression<const LHS2, const RHS2, OP2>,
1248 op_add>
1251 {
1252  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1253  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1254  && bool("Incompatible matrix sizes!"));
1257  op_add>(proxy1, proxy2);
1258 }
1259 
1260 template<typename LHS1, typename RHS1, typename OP1,
1261  typename NumericT>
1262 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1263 const matrix_base<NumericT>,
1264 op_add>
1266  matrix_base<NumericT> const & proxy2)
1267 {
1268  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1269  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1270  && bool("Incompatible matrix sizes!"));
1272  const matrix_base<NumericT>,
1273  op_add>(proxy1, proxy2);
1274 }
1275 
1276 template<typename NumericT,
1277  typename LHS2, typename RHS2, typename OP2>
1278 matrix_expression< const matrix_base<NumericT>,
1279 const matrix_expression<const LHS2, const RHS2, OP2>,
1280 op_add>
1283 {
1284  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1285  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1286  && bool("Incompatible matrix sizes!"));
1289  op_add>(proxy1, proxy2);
1290 }
1291 
1293 template<typename NumericT>
1294 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_add >
1296 {
1298  const matrix_base<NumericT>,
1299  op_add > (m1, m2);
1300 }
1301 
1302 
1303 // operator -
1304 template<typename LHS1, typename RHS1, typename OP1,
1305  typename LHS2, typename RHS2, typename OP2>
1306 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1307 const matrix_expression<const LHS2, const RHS2, OP2>,
1308 op_sub>
1311 {
1312  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1313  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1314  && bool("Incompatible matrix sizes!"));
1317  op_sub>(proxy1, proxy2);
1318 }
1319 
1320 template<typename LHS1, typename RHS1, typename OP1,
1321  typename NumericT>
1322 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1323 const matrix_base<NumericT>,
1324 op_sub>
1326  matrix_base<NumericT> const & proxy2)
1327 {
1328  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1329  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1330  && bool("Incompatible matrix sizes!"));
1332  const matrix_base<NumericT>,
1333  op_sub>(proxy1, proxy2);
1334 }
1335 
1336 template<typename NumericT,
1337  typename LHS2, typename RHS2, typename OP2>
1338 matrix_expression< const matrix_base<NumericT>,
1339 const matrix_expression<const LHS2, const RHS2, OP2>,
1340 op_sub>
1343 {
1344  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1345  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1346  && bool("Incompatible matrix sizes!"));
1349  op_sub>(proxy1, proxy2);
1350 }
1351 
1353 template<typename NumericT>
1354 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_sub >
1356 {
1358  const matrix_base<NumericT>,
1359  op_sub > (m1, m2);
1360 }
1361 
1362 
1363 
1364 // operator *
1370 template<typename S1, typename NumericT>
1372 matrix_expression< const matrix_base<NumericT>, const S1, op_mult>
1373 >::type
1374 operator * (S1 const & value, matrix_base<NumericT> const & m1)
1375 {
1376  return matrix_expression< const matrix_base<NumericT>, const S1, op_mult>(m1, value);
1377 }
1378 
1380 template<typename NumericT>
1381 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1383 {
1385 }
1386 
1388 template<typename NumericT>
1389 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1390 operator * (short value, matrix_base<NumericT> const & m1)
1391 {
1393 }
1394 
1396 template<typename NumericT>
1397 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1399 {
1401 }
1402 
1404 template<typename NumericT>
1405 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1407 {
1409 }
1410 
1412 template<typename NumericT>
1413 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1414 operator * (float value, matrix_base<NumericT> const & m1)
1415 {
1417 }
1418 
1420 template<typename NumericT>
1421 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1422 operator * (double value, matrix_base<NumericT> const & m1)
1423 {
1425 }
1426 
1427 
1428 
1434 template<typename LHS, typename RHS, typename OP, typename S1>
1436 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
1438  S1 const & val)
1439 {
1441 }
1442 
1443 
1449 template<typename S1, typename LHS, typename RHS, typename OP>
1451 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
1452 operator * (S1 const & val,
1453  matrix_expression< LHS, RHS, OP> const & proxy)
1454 {
1456 }
1457 
1460 template<typename NumericT, typename S1>
1462 matrix_expression< const matrix_base<NumericT>, const S1, op_mult> >::type
1464 {
1466 }
1467 
1469 template<typename NumericT>
1470 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1472 {
1474 }
1475 
1477 template<typename NumericT>
1478 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1480 {
1482 }
1483 
1485 template<typename NumericT>
1486 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1488 {
1490 }
1491 
1493 template<typename NumericT>
1494 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1496 {
1498 }
1499 
1501 template<typename NumericT>
1502 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1504 {
1506 }
1507 
1509 template<typename NumericT>
1510 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1512 {
1514 }
1515 
1516 
1517 // operator *=
1518 
1520 template<typename NumericT, typename S1>
1521 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, matrix_base<NumericT> & >::type
1522 operator *= (matrix_base<NumericT> & m1, S1 const & gpu_val)
1523 {
1524  bool is_sign_flip = viennacl::is_flip_sign_scalar<S1>::value;
1526  m1, gpu_val, 1, false, is_sign_flip ? true : false);
1527  return m1;
1528 }
1529 
1531 template<typename NumericT>
1532 matrix_base<NumericT> &
1534 {
1536  m1, NumericT(gpu_val), 1, false, false);
1537  return m1;
1538 }
1539 
1541 template<typename NumericT>
1542 matrix_base<NumericT> &
1544 {
1546  m1, NumericT(gpu_val), 1, false, false);
1547  return m1;
1548 }
1549 
1551 template<typename NumericT>
1552 matrix_base<NumericT> &
1554 {
1556  m1, NumericT(gpu_val), 1, false, false);
1557  return m1;
1558 }
1559 
1561 template<typename NumericT>
1562 matrix_base<NumericT> &
1564 {
1566  m1, NumericT(gpu_val), 1, false, false);
1567  return m1;
1568 }
1569 
1571 template<typename NumericT>
1572 matrix_base<NumericT> &
1574 {
1576  m1, NumericT(gpu_val), 1, false, false);
1577  return m1;
1578 }
1579 
1581 template<typename NumericT>
1582 matrix_base<NumericT> &
1584 {
1586  m1, NumericT(gpu_val), 1, false, false);
1587  return m1;
1588 }
1589 
1590 
1591 
1592 // operator /
1593 
1594 
1600 template<typename LHS, typename RHS, typename OP, typename S1>
1602 matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div> >::type
1604  S1 const & val)
1605 {
1607 }
1608 
1609 
1611 template<typename NumericT, typename S1>
1613 matrix_expression< const matrix_base<NumericT>, const S1, op_div> >::type
1615 {
1617 }
1618 
1620 template<typename NumericT>
1621 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1623 {
1625 }
1626 
1628 template<typename NumericT>
1629 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1631 {
1633 }
1634 
1636 template<typename NumericT>
1637 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1639 {
1641 }
1642 
1644 template<typename NumericT>
1645 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1647 {
1649 }
1650 
1652 template<typename NumericT>
1653 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1655 {
1657 }
1658 
1660 template<typename NumericT>
1661 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1663 {
1665 }
1666 
1667 
1668 
1669 // operator /=
1670 
1672 template<typename NumericT, typename S1>
1673 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, matrix_base<NumericT> & >::type
1674 operator /= (matrix_base<NumericT> & m1, S1 const & gpu_val)
1675 {
1677  m1, gpu_val, 1, true, false);
1678  return m1;
1679 }
1680 
1682 template<typename NumericT>
1683 matrix_base<NumericT> &
1685 {
1687  m1, NumericT(gpu_val), 1, true, false);
1688  return m1;
1689 }
1690 
1692 template<typename NumericT>
1693 matrix_base<NumericT> &
1695 {
1697  m1, gpu_val, 1, true, false);
1698  return m1;
1699 }
1700 
1702 template<typename NumericT>
1703 matrix_base<NumericT> &
1705 {
1707  m1, gpu_val, 1, true, false);
1708  return m1;
1709 }
1710 
1712 template<typename NumericT>
1713 matrix_base<NumericT> &
1715 {
1717  m1, gpu_val, 1, true, false);
1718  return m1;
1719 }
1720 
1722 template<typename NumericT>
1723 matrix_base<NumericT> &
1725 {
1727  m1, gpu_val, 1, true, false);
1728  return m1;
1729 }
1730 
1732 template<typename NumericT>
1733 matrix_base<NumericT> &
1735 {
1737  m1, gpu_val, 1, true, false);
1738  return m1;
1739 }
1740 
1741 
1742 
1743 
1744 
1745 // outer_prod(v1, v2) * val;
1746 template<typename NumericT, typename S1>
1749 const S1,
1750 op_mult>
1751 >::type
1753  const S1 & val)
1754 {
1756  const S1,
1757  op_mult>(proxy, val);
1758 }
1759 
1760 template<typename NumericT, typename S1>
1763 const NumericT,
1764 op_mult>
1765 >::type
1767  const S1 & val)
1768 {
1770  const NumericT,
1771  op_mult>(proxy, NumericT(val));
1772 }
1773 
1774 // val * outer_prod(v1, v2);
1775 template<typename NumericT, typename S1>
1778 const S1,
1779 op_mult>
1780 >::type
1781 operator*(const S1 & val,
1783 {
1785  const S1,
1786  op_mult>(proxy, val);
1787 }
1788 
1789 template<typename NumericT, typename S1>
1792 const NumericT,
1793 op_mult>
1794 >::type
1795 operator*(const S1 & val,
1797 {
1799  const NumericT,
1800  op_mult>(proxy, NumericT(val));
1801 }
1802 
1803 
1804 
1805 //
1806 // Specify available operations:
1807 //
1808 
1811 namespace linalg
1812 {
1813 namespace detail
1814 {
1815 
1816  // x = y
1817  template<typename T>
1818  struct op_executor<matrix_base<T>, op_assign, matrix_base<T> >
1819  {
1820  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1821  {
1822  viennacl::linalg::am(lhs, rhs, T(1), 1, false, false);
1823  }
1824  };
1825 
1826  // x = trans(y)
1827  template<typename T>
1828  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1829  {
1830  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1831  {
1832  matrix_base<T> temp(rhs);
1833  viennacl::linalg::am(lhs, temp, T(1), 1, false, false);
1834  }
1835  };
1836 
1837  // x = trans(expr)
1838  template<typename T, typename LhsT, typename RhsT, typename OpT>
1839  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>, const matrix_expression<const LhsT, const RhsT, OpT>, op_trans> >
1840  {
1841  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
1842  const matrix_expression<const LhsT, const RhsT, OpT>,
1843  op_trans> const & rhs)
1844  {
1845  matrix_base<T> temp1(rhs.rhs());
1846  matrix_base<T> temp2(viennacl::trans(temp1));
1847  viennacl::linalg::am(lhs, temp2, T(1), 1, false, false);
1848  }
1849  };
1850 
1851 
1852  // x += y
1853  template<typename T>
1854  struct op_executor<matrix_base<T>, op_inplace_add, matrix_base<T> >
1855  {
1856  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1857  {
1858  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false);
1859  }
1860  };
1861 
1862  // x += trans(y)
1863  template<typename T>
1864  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1865  {
1866  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1867  {
1868  matrix_base<T> temp(rhs);
1869  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, false);
1870  }
1871  };
1872 
1873  // x += trans(expr)
1874  template<typename T, typename LhsT, typename RhsT, typename OpT>
1875  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>, const matrix_expression<const LhsT, const RhsT, OpT>, op_trans> >
1876  {
1877  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
1878  const matrix_expression<const LhsT, const RhsT, OpT>,
1879  op_trans> const & rhs)
1880  {
1881  matrix_base<T> temp1(rhs.rhs());
1882  matrix_base<T> temp2(viennacl::trans(temp1));
1883  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp2, T(1), 1, false, false);
1884  }
1885  };
1886 
1887  // x -= y
1888  template<typename T>
1889  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_base<T> >
1890  {
1891  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1892  {
1893  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, true);
1894  }
1895  };
1896 
1897  // x -= trans(y)
1898  template<typename T>
1899  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1900  {
1901  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1902  {
1903  matrix_base<T> temp(rhs);
1904  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, true);
1905  }
1906  };
1907 
1908  // x -= trans(expr)
1909  template<typename T, typename LhsT, typename RhsT, typename OpT>
1910  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>, const matrix_expression<const LhsT, const RhsT, OpT>, op_trans> >
1911  {
1912  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
1913  const matrix_expression<const LhsT, const RhsT, OpT>,
1914  op_trans> const & rhs)
1915  {
1916  matrix_base<T> temp1(rhs.rhs());
1917  matrix_base<T> temp2(viennacl::trans(temp1));
1918  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp2, T(1), 1, false, true);
1919  }
1920  };
1921 
1923 
1924 
1925  // x = alpha * y
1926  template<typename T, typename ScalarType>
1927  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1928  {
1929  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1930  {
1931  viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false);
1932  }
1933  };
1934 
1935  // x += alpha * y
1936  template<typename T, typename ScalarType>
1937  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1938  {
1939  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1940  {
1941  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false);
1942  }
1943  };
1944 
1945  // x -= alpha * y
1946  template<typename T, typename ScalarType>
1947  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1948  {
1949  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1950  {
1951  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true);
1952  }
1953  };
1954 
1955 
1957 
1958  // x = alpha * vec_expr
1959  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1960  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1961  {
1962  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1963  {
1964  if (lhs.row_major())
1965  {
1966  matrix<T> temp(proxy.lhs());
1967  lhs = temp * proxy.rhs();
1968  }
1969  else
1970  {
1971  matrix<T, column_major> temp(proxy.lhs());
1972  lhs = temp * proxy.rhs();
1973  }
1974  }
1975  };
1976 
1977  // x += alpha * vec_expr
1978  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1979  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1980  {
1981  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1982  {
1983  if (lhs.row_major())
1984  {
1985  matrix<T> temp(proxy.lhs());
1986  lhs += temp * proxy.rhs();
1987  }
1988  else
1989  {
1990  matrix<T, column_major> temp(proxy.lhs());
1991  lhs += temp * proxy.rhs();
1992  }
1993  }
1994  };
1995 
1996  // x -= alpha * vec_expr
1997  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1998  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1999  {
2000  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
2001  {
2002  if (lhs.row_major())
2003  {
2004  matrix<T> temp(proxy.lhs());
2005  lhs -= temp * proxy.rhs();
2006  }
2007  else
2008  {
2009  matrix<T, column_major> temp(proxy.lhs());
2010  lhs -= temp * proxy.rhs();
2011  }
2012  }
2013  };
2014 
2015 
2017 
2018  // x = y / alpha
2019  template<typename T, typename ScalarType>
2020  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
2021  {
2022  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
2023  {
2024  viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false);
2025  }
2026  };
2027 
2028  // x += y / alpha
2029  template<typename T, typename ScalarType>
2030  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
2031  {
2032  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
2033  {
2034  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false);
2035  }
2036  };
2037 
2038  // x -= y / alpha
2039  template<typename T, typename ScalarType>
2040  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
2041  {
2042  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
2043  {
2044  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true);
2045  }
2046  };
2047 
2048 
2050 
2051  // x = vec_expr / alpha
2052  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
2053  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
2054  {
2055  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
2056  {
2057  if (lhs.row_major())
2058  {
2059  matrix<T> temp(proxy.lhs());
2060  lhs = temp / proxy.rhs();
2061  }
2062  else
2063  {
2064  matrix<T, column_major> temp(proxy.lhs());
2065  lhs = temp / proxy.rhs();
2066  }
2067  }
2068  };
2069 
2070  // x += vec_expr / alpha
2071  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
2072  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
2073  {
2074  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
2075  {
2076  if (lhs.row_major())
2077  {
2078  matrix<T> temp(proxy.lhs());
2079  lhs += temp / proxy.rhs();
2080  }
2081  else
2082  {
2083  matrix<T, column_major> temp(proxy.lhs());
2084  lhs += temp / proxy.rhs();
2085  }
2086  }
2087  };
2088 
2089  // x -= vec_expr / alpha
2090  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
2091  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
2092  {
2093  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
2094  {
2095  if (lhs.row_major())
2096  {
2097  matrix<T, row_major> temp(proxy.lhs());
2098  lhs -= temp / proxy.rhs();
2099  }
2100  else
2101  {
2102  matrix<T, column_major> temp(proxy.lhs());
2103  lhs -= temp / proxy.rhs();
2104  }
2105  }
2106  };
2107 
2108 
2109 
2110  // generic x = vec_expr1 + vec_expr2:
2111  template<typename T, typename LHS, typename RHS>
2112  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_add> >
2113  {
2114  // generic x = vec_expr1 + vec_expr2:
2115  template<typename LHS1, typename RHS1>
2116  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
2117  {
2118  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2119  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2120 
2121  if (op_aliasing_lhs || op_aliasing_rhs)
2122  {
2123  matrix_base<T> temp(proxy.lhs());
2124  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2125  lhs = temp;
2126  }
2127  else
2128  {
2129  op_executor<matrix_base<T>, op_assign, LHS>::apply(lhs, proxy.lhs());
2130  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2131  }
2132  }
2133 
2134  // x = y + z
2135  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2136  {
2138  proxy.lhs(), T(1), 1, false, false,
2139  proxy.rhs(), T(1), 1, false, false);
2140  }
2141 
2142  // x = alpha * y + z
2143  template<typename ScalarType>
2144  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2145  const matrix_base<T>,
2146  op_add> const & proxy)
2147  {
2149  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2150  proxy.rhs(), T(1), 1, false, false);
2151  }
2152 
2153  // x = y / alpha + z
2154  template<typename ScalarType>
2155  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2156  const matrix_base<T>,
2157  op_add> const & proxy)
2158  {
2160  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2161  proxy.rhs(), T(1), 1, false, false);
2162  }
2163 
2164  // x = y + beta * z
2165  template<typename ScalarType>
2166  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2167  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2168  op_add> const & proxy)
2169  {
2171  proxy.lhs(), T(1), 1, false, false,
2172  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2173  }
2174 
2175  // x = y + z / beta
2176  template<typename ScalarType>
2177  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2178  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2179  op_add> const & proxy)
2180  {
2182  proxy.lhs(), T(1), 1, false, false,
2183  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2184  }
2185 
2186  // x = alpha * y + beta * z
2187  template<typename ScalarType1, typename ScalarType2>
2188  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2189  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2190  op_add> const & proxy)
2191  {
2193  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2194  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2195  }
2196 
2197  // x = alpha * y + z / beta
2198  template<typename ScalarType1, typename ScalarType2>
2199  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2200  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2201  op_add> const & proxy)
2202  {
2204  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2205  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2206  }
2207 
2208  // x = y / alpha + beta * z
2209  template<typename ScalarType1, typename ScalarType2>
2210  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2211  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2212  op_add> const & proxy)
2213  {
2215  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2216  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2217  }
2218 
2219  // x = y / alpha + z / beta
2220  template<typename ScalarType1, typename ScalarType2>
2221  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2222  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2223  op_add> const & proxy)
2224  {
2226  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2227  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2228  }
2229  };
2230 
2231  // dense = sparse * dense
2232  template<typename T, typename LHS, typename RHS>
2233  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_prod> >
2234  {
2235  template< typename SparseMatrixType>
2236  static void apply(matrix_base<T> & lhs, matrix_expression<const SparseMatrixType,
2238  viennacl::op_prod> const & proxy)
2239  {
2240  // check for x = A * x
2241  if (op_aliasing(lhs, proxy.rhs()))
2242  {
2243  matrix_base<T> temp(proxy);
2244  lhs = temp;
2245  }
2246  else
2247  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
2248  }
2249 
2250  // dense = sparse * trans(dense)
2251  template< typename SparseMatrixType >
2252  static void apply(matrix_base<T> & lhs, matrix_expression<const SparseMatrixType,
2256  viennacl::op_prod> const & proxy)
2257  {
2258  // check for x = A * x
2259  if (op_aliasing(lhs, proxy.rhs()))
2260  {
2261  matrix_base<T> temp(proxy);
2262  lhs = temp;
2263  }
2264  else
2265  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
2266  }
2267 
2268  };
2269 
2270  // generic x += vec_expr1 + vec_expr2:
2271  template<typename T, typename LHS, typename RHS>
2272  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_add> >
2273  {
2274  // generic x += vec_expr1 + vec_expr2:
2275  template<typename LHS1, typename RHS1>
2276  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
2277  {
2278  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2279  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2280 
2281  if (op_aliasing_lhs || op_aliasing_rhs)
2282  {
2283  matrix_base<T> temp(proxy.lhs());
2284  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2285  lhs += temp;
2286  }
2287  else
2288  {
2289  op_executor<matrix_base<T>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
2290  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2291  }
2292  }
2293 
2294  // x += y + z
2295  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2296  {
2298  proxy.lhs(), T(1), 1, false, false,
2299  proxy.rhs(), T(1), 1, false, false);
2300  }
2301 
2302  // x += alpha * y + z
2303  template<typename ScalarType>
2304  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2305  const matrix_base<T>,
2306  op_add> const & proxy)
2307  {
2309  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2310  proxy.rhs(), T(1), 1, false, false);
2311  }
2312 
2313  // x += y / alpha + z
2314  template<typename ScalarType>
2315  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2316  const matrix_base<T>,
2317  op_add> const & proxy)
2318  {
2320  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2321  proxy.rhs(), T(1), 1, false, false);
2322  }
2323 
2324  // x += y + beta * z
2325  template<typename ScalarType>
2326  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2327  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2328  op_add> const & proxy)
2329  {
2331  proxy.lhs(), T(1), 1, false, false,
2332  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2333  }
2334 
2335  // x += y + z / beta
2336  template<typename ScalarType>
2337  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2338  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2339  op_add> const & proxy)
2340  {
2342  proxy.lhs(), T(1), 1, false, false,
2343  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2344  }
2345 
2346  // x += alpha * y + beta * z
2347  template<typename ScalarType1, typename ScalarType2>
2348  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2349  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2350  op_add> const & proxy)
2351  {
2353  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2354  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2355  }
2356 
2357  // x += alpha * y + z / beta
2358  template<typename ScalarType1, typename ScalarType2>
2359  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2360  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2361  op_add> const & proxy)
2362  {
2364  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2365  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2366  }
2367 
2368  // x += y / alpha + beta * z
2369  template<typename ScalarType1, typename ScalarType2>
2370  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2371  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2372  op_add> const & proxy)
2373  {
2375  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2376  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2377  }
2378 
2379  // x += y / alpha + z / beta
2380  template<typename ScalarType1, typename ScalarType2>
2381  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2382  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2383  op_add> const & proxy)
2384  {
2386  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2387  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2388  }
2389  };
2390 
2391 
2392 
2393  // generic x -= vec_expr1 + vec_expr2:
2394  template<typename T, typename LHS, typename RHS>
2395  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_add> >
2396  {
2397  // generic x -= vec_expr1 + vec_expr2:
2398  template<typename LHS1, typename RHS1>
2399  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
2400  {
2401  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2402  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2403 
2404  if (op_aliasing_lhs || op_aliasing_rhs)
2405  {
2406  matrix_base<T> temp(proxy.lhs());
2407  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2408  lhs -= temp;
2409  }
2410  else
2411  {
2412  op_executor<matrix_base<T>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
2413  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2414  }
2415  }
2416 
2417  // x -= y + z
2418  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2419  {
2421  proxy.lhs(), T(1), 1, false, true,
2422  proxy.rhs(), T(1), 1, false, true);
2423  }
2424 
2425  // x -= alpha * y + z
2426  template<typename ScalarType>
2427  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2428  const matrix_base<T>,
2429  op_add> const & proxy)
2430  {
2432  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2433  proxy.rhs(), T(1), 1, false, true);
2434  }
2435 
2436  // x -= y / alpha + z
2437  template<typename ScalarType>
2438  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2439  const matrix_base<T>,
2440  op_add> const & proxy)
2441  {
2443  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2444  proxy.rhs(), T(1), 1, false, true);
2445  }
2446 
2447  // x -= y + beta * z
2448  template<typename ScalarType>
2449  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2450  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2451  op_add> const & proxy)
2452  {
2454  proxy.lhs(), T(1), 1, false, true,
2455  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2456  }
2457 
2458  // x -= y + z / beta
2459  template<typename ScalarType>
2460  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2461  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2462  op_add> const & proxy)
2463  {
2465  proxy.lhs(), T(1), 1, false, true,
2466  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2467  }
2468 
2469  // x -= alpha * y + beta * z
2470  template<typename ScalarType1, typename ScalarType2>
2471  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2472  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2473  op_add> const & proxy)
2474  {
2476  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2477  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2478  }
2479 
2480  // x -= alpha * y + z / beta
2481  template<typename ScalarType1, typename ScalarType2>
2482  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2483  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2484  op_add> const & proxy)
2485  {
2487  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2488  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2489  }
2490 
2491  // x -= y / alpha + beta * z
2492  template<typename ScalarType1, typename ScalarType2>
2493  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2494  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2495  op_add> const & proxy)
2496  {
2498  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2499  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2500  }
2501 
2502  // x -= y / alpha + z / beta
2503  template<typename ScalarType1, typename ScalarType2>
2504  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2505  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2506  op_add> const & proxy)
2507  {
2509  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2510  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2511  }
2512  };
2513 
2514 
2515 
2517 
2518 
2519 
2520  // generic x = vec_expr1 - vec_expr2:
2521  template<typename T, typename LHS, typename RHS>
2522  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_sub> >
2523  {
2524  // generic x = vec_expr1 - vec_expr2:
2525  template<typename LHS1, typename RHS1>
2526  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2527  {
2528  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2529  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2530 
2531  if (op_aliasing_lhs || op_aliasing_rhs)
2532  {
2533  matrix_base<T> temp(proxy.lhs());
2534  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2535  lhs = temp;
2536  }
2537  else
2538  {
2539  op_executor<matrix_base<T>, op_assign, LHS>::apply(lhs, proxy.lhs());
2540  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2541  }
2542  }
2543 
2544  // x = y - z
2545  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2546  {
2548  proxy.lhs(), T(1), 1, false, false,
2549  proxy.rhs(), T(1), 1, false, true);
2550  }
2551 
2552  // x = alpha * y - z
2553  template<typename ScalarType>
2554  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2555  const matrix_base<T>,
2556  op_sub> const & proxy)
2557  {
2559  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2560  proxy.rhs(), T(1), 1, false, true);
2561  }
2562 
2563  // x = y / alpha - z
2564  template<typename ScalarType>
2565  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2566  const matrix_base<T>,
2567  op_sub> const & proxy)
2568  {
2570  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2571  proxy.rhs(), T(1), 1, false, true);
2572  }
2573 
2574  // x = y - beta * z
2575  template<typename ScalarType>
2576  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2577  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2578  op_sub> const & proxy)
2579  {
2581  proxy.lhs(), T(1), 1, false, false,
2582  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2583  }
2584 
2585  // x = y - z / beta
2586  template<typename ScalarType>
2587  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2588  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2589  op_sub> const & proxy)
2590  {
2592  proxy.lhs(), T(1), 1, false, false,
2593  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2594  }
2595 
2596  // x = alpha * y - beta * z
2597  template<typename ScalarType1, typename ScalarType2>
2598  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2599  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2600  op_sub> const & proxy)
2601  {
2603  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2604  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2605  }
2606 
2607  // x = alpha * y - z / beta
2608  template<typename ScalarType1, typename ScalarType2>
2609  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2610  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2611  op_sub> const & proxy)
2612  {
2614  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2615  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2616  }
2617 
2618  // x = y / alpha - beta * z
2619  template<typename ScalarType1, typename ScalarType2>
2620  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2621  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2622  op_sub> const & proxy)
2623  {
2625  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2626  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2627  }
2628 
2629  // x = y / alpha - z / beta
2630  template<typename ScalarType1, typename ScalarType2>
2631  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2632  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2633  op_sub> const & proxy)
2634  {
2636  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2637  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2638  }
2639  };
2640 
2641 
2642  // generic x += vec_expr1 - vec_expr2:
2643  template<typename T, typename LHS, typename RHS>
2644  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_sub> >
2645  {
2646  // generic x += vec_expr1 - vec_expr2:
2647  template<typename LHS1, typename RHS1>
2648  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2649  {
2650  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2651  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2652 
2653  if (op_aliasing_lhs || op_aliasing_rhs)
2654  {
2655  matrix_base<T> temp(proxy.lhs());
2656  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2657  lhs += temp;
2658  }
2659  else
2660  {
2661  op_executor<matrix_base<T>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
2662  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2663  }
2664  }
2665 
2666  // x += y - z
2667  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2668  {
2670  proxy.lhs(), T(1), 1, false, false,
2671  proxy.rhs(), T(1), 1, false, true);
2672  }
2673 
2674  // x += alpha * y - z
2675  template<typename ScalarType>
2676  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2677  const matrix_base<T>,
2678  op_sub> const & proxy)
2679  {
2681  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2682  proxy.rhs(), T(1), 1, false, true);
2683  }
2684 
2685  // x += y / alpha - z
2686  template<typename ScalarType>
2687  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2688  const matrix_base<T>,
2689  op_sub> const & proxy)
2690  {
2692  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2693  proxy.rhs(), T(1), 1, false, true);
2694  }
2695 
2696  // x += y - beta * z
2697  template<typename ScalarType>
2698  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2699  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2700  op_sub> const & proxy)
2701  {
2703  proxy.lhs(), T(1), 1, false, false,
2704  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2705  }
2706 
2707  // x += y - z / beta
2708  template<typename ScalarType>
2709  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2710  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2711  op_sub> const & proxy)
2712  {
2714  proxy.lhs(), T(1), 1, false, false,
2715  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2716  }
2717 
2718  // x += alpha * y - beta * z
2719  template<typename ScalarType1, typename ScalarType2>
2720  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2721  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2722  op_sub> const & proxy)
2723  {
2725  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2726  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2727  }
2728 
2729  // x += alpha * y - z / beta
2730  template<typename ScalarType1, typename ScalarType2>
2731  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2732  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2733  op_sub> const & proxy)
2734  {
2736  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2737  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2738  }
2739 
2740  // x += y / alpha - beta * z
2741  template<typename ScalarType1, typename ScalarType2>
2742  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2743  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2744  op_sub> const & proxy)
2745  {
2747  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2748  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2749  }
2750 
2751  // x += y / alpha - z / beta
2752  template<typename ScalarType1, typename ScalarType2>
2753  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2754  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2755  op_sub> const & proxy)
2756  {
2758  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2759  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2760  }
2761  };
2762 
2763 
2764 
2765  // generic x -= vec_expr1 - vec_expr2:
2766  template<typename T, typename LHS, typename RHS>
2767  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_sub> >
2768  {
2769  // generic x -= vec_expr1 - vec_expr2:
2770  template<typename LHS1, typename RHS1>
2771  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2772  {
2773  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2774  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2775 
2776  if (op_aliasing_lhs || op_aliasing_rhs)
2777  {
2778  matrix_base<T> temp(proxy.lhs());
2779  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2780  lhs -= temp;
2781  }
2782  else
2783  {
2784  op_executor<matrix_base<T>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
2785  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2786  }
2787  }
2788 
2789  // x -= y - z
2790  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2791  {
2793  proxy.lhs(), T(1), 1, false, true,
2794  proxy.rhs(), T(1), 1, false, false);
2795  }
2796 
2797  // x -= alpha * y - z
2798  template<typename ScalarType>
2799  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2800  const matrix_base<T>,
2801  op_sub> const & proxy)
2802  {
2804  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2805  proxy.rhs(), T(1), 1, false, false);
2806  }
2807 
2808  // x -= y / alpha - z
2809  template<typename ScalarType>
2810  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2811  const matrix_base<T>,
2812  op_sub> const & proxy)
2813  {
2815  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2816  proxy.rhs(), T(1), 1, false, false);
2817  }
2818 
2819  // x -= y - beta * z
2820  template<typename ScalarType>
2821  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2822  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2823  op_sub> const & proxy)
2824  {
2826  proxy.lhs(), T(1), 1, false, true,
2827  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2828  }
2829 
2830  // x -= y - z / beta
2831  template<typename ScalarType>
2832  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2833  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2834  op_sub> const & proxy)
2835  {
2837  proxy.lhs(), T(1), 1, false, true,
2838  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2839  }
2840 
2841  // x -= alpha * y - beta * z
2842  template<typename ScalarType1, typename ScalarType2>
2843  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2844  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2845  op_sub> const & proxy)
2846  {
2848  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2849  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2850  }
2851 
2852  // x -= alpha * y - z / beta
2853  template<typename ScalarType1, typename ScalarType2>
2854  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2855  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2856  op_sub> const & proxy)
2857  {
2859  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2860  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2861  }
2862 
2863  // x -= y / alpha - beta * z
2864  template<typename ScalarType1, typename ScalarType2>
2865  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2866  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2867  op_sub> const & proxy)
2868  {
2870  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2871  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2872  }
2873 
2874  // x -= y / alpha - z / beta
2875  template<typename ScalarType1, typename ScalarType2>
2876  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2877  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2878  op_sub> const & proxy)
2879  {
2881  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2882  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2883  }
2884  };
2885 
2886 
2888 
2889  template<typename T, typename LHS>
2890  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const int, op_vector_diag> >
2891  {
2892  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const int, op_vector_diag> const & proxy)
2893  {
2894  viennacl::linalg::matrix_diag_from_vector(proxy.lhs(), proxy.rhs(), lhs);
2895  }
2896  };
2897 
2898 
2899  template<typename T, typename LHS>
2900  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const int, op_matrix_diag> >
2901  {
2902  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const int, op_matrix_diag> const & proxy)
2903  {
2904  viennacl::linalg::matrix_diag_to_vector(proxy.lhs(), proxy.rhs(), lhs);
2905  }
2906  };
2907 
2908  template<typename T, typename LHS>
2909  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_row> >
2910  {
2911  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const unsigned int, op_row> const & proxy)
2912  {
2913  viennacl::linalg::matrix_row(proxy.lhs(), proxy.rhs(), lhs);
2914  }
2915  };
2916 
2917 
2918  template<typename T, typename LHS>
2919  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_column> >
2920  {
2921  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const unsigned int, op_column> const & proxy)
2922  {
2923  viennacl::linalg::matrix_column(proxy.lhs(), proxy.rhs(), lhs);
2924  }
2925  };
2926 
2928 
2929  template<typename T>
2930  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const matrix_base<T>, op_row_sum> >
2931  {
2932  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const matrix_base<T>, op_row_sum> const & proxy)
2933  {
2934  viennacl::linalg::row_sum_impl(proxy.lhs(), lhs);
2935  }
2936  };
2937 
2938  template<typename T, typename LHS, typename RHS, typename OP>
2939  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_row_sum> >
2940  {
2941  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_row_sum> const & proxy)
2942  {
2943  matrix_base<T> tmp(proxy.lhs());
2945  }
2946  };
2947 
2948  template<typename T>
2949  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const matrix_base<T>, op_col_sum> >
2950  {
2951  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const matrix_base<T>, op_col_sum> const & proxy)
2952  {
2953  viennacl::linalg::column_sum_impl(proxy.lhs(), lhs);
2954  }
2955  };
2956 
2957 
2958  template<typename T, typename LHS, typename RHS, typename OP>
2959  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_col_sum> >
2960  {
2961  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_col_sum> const & proxy)
2962  {
2963  matrix_base<T> tmp(proxy.lhs());
2965  }
2966  };
2967 
2969 
2970  // generic x = mat_expr1 .* mat_expr2:
2971  template<typename T, typename LHS, typename RHS, typename OP>
2972  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2973  {
2974  // x = y .* z
2975  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2976  {
2977  viennacl::linalg::element_op(lhs, proxy);
2978  }
2979 
2980  // x = y .* mat_expr
2981  template<typename LHS2, typename RHS2, typename OP2>
2982  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2983  {
2984  matrix_base<T> temp(proxy.rhs());
2985  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
2986  }
2987 
2988  // x = mat_expr .* z
2989  template<typename LHS1, typename RHS1, typename OP1>
2990  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2991  {
2992  matrix_base<T> temp(proxy.lhs());
2993  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
2994  }
2995 
2996  // x = mat_expr .* mat_expr
2997  template<typename LHS1, typename RHS1, typename OP1,
2998  typename LHS2, typename RHS2, typename OP2>
2999  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
3000  const matrix_expression<const LHS2, const RHS2, OP2>,
3001  op_element_binary<OP> > const & proxy)
3002  {
3003  matrix_base<T> temp1(proxy.lhs());
3004  matrix_base<T> temp2(proxy.rhs());
3005  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
3006  }
3007  };
3008 
3009  // generic x += mat_expr .* mat_expr:
3010  template<typename T, typename LHS, typename RHS, typename OP>
3011  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
3012  {
3013  // x += y .* z
3014  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
3015  {
3016  matrix_base<T> temp(proxy);
3017  lhs += temp;
3018  }
3019 
3020  // x += y .* mat_expr
3021  template<typename LHS2, typename RHS2, typename OP2>
3022  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
3023  {
3024  matrix_base<T> temp(proxy.rhs());
3025  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
3026  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
3027  lhs += temp2;
3028  }
3029 
3030  // x += mat_expr .* z
3031  template<typename LHS1, typename RHS1, typename OP1>
3032  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
3033  {
3034  matrix_base<T> temp(proxy.lhs());
3035  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
3036  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
3037  lhs += temp2;
3038  }
3039 
3040  // x += mat_expr .* mat_expr
3041  template<typename LHS1, typename RHS1, typename OP1,
3042  typename LHS2, typename RHS2, typename OP2>
3043  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
3044  const matrix_expression<const LHS2, const RHS2, OP2>,
3045  op_element_binary<OP> > const & proxy)
3046  {
3047  matrix_base<T> temp1(proxy.lhs());
3048  matrix_base<T> temp2(proxy.rhs());
3049  matrix_base<T> temp3(temp1.size1(), temp1.size2(), lhs.row_major(), viennacl::traits::context(lhs));
3050  viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
3051  lhs += temp3;
3052  }
3053  };
3054 
3055  // generic x -= mat_expr1 .* mat_expr2:
3056  template<typename T, typename LHS, typename RHS, typename OP>
3057  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
3058  {
3059 
3060  // x -= y .* z
3061  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
3062  {
3063  matrix_base<T> temp(proxy);
3064  lhs -= temp;
3065  }
3066 
3067  // x -= y .* mat_expr
3068  template<typename LHS2, typename RHS2, typename OP2>
3069  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
3070  {
3071  matrix_base<T> temp(proxy.rhs());
3072  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
3073  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
3074  lhs -= temp2;
3075  }
3076 
3077  // x -= mat_expr .* z
3078  template<typename LHS1, typename RHS1, typename OP1>
3079  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
3080  {
3081  matrix_base<T> temp(proxy.lhs());
3082  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
3083  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
3084  lhs -= temp2;
3085  }
3086 
3087  // x -= mat_expr .* mat_expr
3088  template<typename LHS1, typename RHS1, typename OP1,
3089  typename LHS2, typename RHS2, typename OP2>
3090  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
3091  const matrix_expression<const LHS2, const RHS2, OP2>,
3092  op_element_binary<OP> > const & proxy)
3093  {
3094  matrix_base<T> temp1(proxy.lhs());
3095  matrix_base<T> temp2(proxy.rhs());
3096  matrix_base<T> temp3(temp1.size1(), temp1.size2(), lhs.row_major(), viennacl::traits::context(lhs));
3097  viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
3098  lhs -= temp3;
3099  }
3100  };
3101 
3103 
3104  template<typename T, typename LHS, typename RHS, typename OP>
3105  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
3106  {
3107  // x = OP(y)
3108  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
3109  {
3110  viennacl::linalg::element_op(lhs, proxy);
3111  }
3112 
3113  // x = OP(vec_expr)
3114  template<typename LHS2, typename RHS2, typename OP2>
3115  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
3116  const matrix_expression<const LHS2, const RHS2, OP2>,
3117  op_element_unary<OP> > const & proxy)
3118  {
3119  matrix_base<T> temp(proxy.rhs());
3120  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp));
3121  }
3122  };
3123 
3124  template<typename T, typename LHS, typename RHS, typename OP>
3125  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
3126  {
3127  // x += OP(y)
3128  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
3129  {
3130  matrix_base<T> temp(proxy);
3131  lhs += temp;
3132  }
3133 
3134  // x += OP(vec_expr)
3135  template<typename LHS2, typename RHS2, typename OP2>
3136  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
3137  const matrix_expression<const LHS2, const RHS2, OP2>,
3138  op_element_unary<OP> > const & proxy)
3139  {
3140  matrix_base<T> temp(proxy.rhs());
3141  viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
3142  lhs += temp;
3143  }
3144  };
3145 
3146  template<typename T, typename LHS, typename RHS, typename OP>
3147  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
3148  {
3149  // x -= OP(y)
3150  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
3151  {
3152  matrix_base<T> temp(proxy);
3153  lhs -= temp;
3154  }
3155 
3156  // x -= OP(vec_expr)
3157  template<typename LHS2, typename RHS2, typename OP2>
3158  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
3159  const matrix_expression<const LHS2, const RHS2, OP2>,
3160  op_element_unary<OP> > const & proxy)
3161  {
3162  matrix_base<T> temp(proxy.rhs());
3163  viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
3164  lhs -= temp;
3165  }
3166  };
3167 
3168 
3169 
3171 
3172  // C = A * B
3173  template<typename T>
3174  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3175  {
3176  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3177  {
3178  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))
3179  {
3180  matrix_base<T> temp(rhs);
3181  lhs = temp;
3182  }
3183  else
3184  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3185  }
3186  };
3187 
3188  // C = A * B^T
3189  template<typename T>
3190  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>,
3191  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3192  op_mat_mat_prod> >
3193  {
3194  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3195  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3196  op_mat_mat_prod> const & rhs)
3197  {
3198  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3199  {
3200  matrix_base<T> temp(rhs);
3201  lhs = temp;
3202  }
3203  else
3204  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3205  }
3206  };
3207 
3208  // C = A * EXPR for some matrix expression EXPR
3209  template<typename T, typename LhsT, typename RhsT, typename OpT>
3210  struct op_executor<matrix_base<T>,
3211  op_assign,
3212  matrix_expression<const matrix_base<T>,
3213  const matrix_expression<const LhsT, const RhsT, OpT>,
3214  op_mat_mat_prod>
3215  >
3216  {
3217  static void apply(matrix_base<T> & lhs,
3218  matrix_expression<const matrix_base<T>,
3219  const matrix_expression<const LhsT, const RhsT, OpT>,
3220  op_mat_mat_prod> const & rhs)
3221  {
3222  matrix_base<T> temp(rhs.rhs());
3223  viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs, T(1.0), T(0));
3224  }
3225  };
3226 
3227 
3228 
3229  // C = A^T * B
3230  template<typename T>
3231  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3232  const matrix_base<T>,
3233  op_mat_mat_prod> >
3234  {
3235  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3236  const matrix_base<T>,
3237  op_mat_mat_prod> const & rhs)
3238  {
3239  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))
3240  {
3241  matrix_base<T> temp(rhs);
3242  lhs = temp;
3243  }
3244  else
3245  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3246  }
3247  };
3248 
3249  // C = EXPR * B for some matrix expression EXPR
3250  template<typename T, typename LhsT, typename RhsT, typename OpT>
3251  struct op_executor<matrix_base<T>,
3252  op_assign,
3253  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3254  const matrix_base<T>,
3255  op_mat_mat_prod>
3256  >
3257  {
3258  static void apply(matrix_base<T> & lhs,
3259  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3260  const matrix_base<T>,
3261  op_mat_mat_prod> const & rhs)
3262  {
3263  matrix_base<T> temp(rhs.lhs());
3264  viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs, T(1.0), T(0));
3265  }
3266  };
3267 
3268 
3269  // C = A^T * B^T
3270  template<typename T>
3271  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3272  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3273  op_mat_mat_prod> >
3274  {
3275  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3276  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3277  op_mat_mat_prod> const & rhs)
3278  {
3279  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3280  {
3281  matrix_base<T> temp(rhs);
3282  lhs = temp;
3283  }
3284  else
3285  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3286  }
3287  };
3288 
3289  // C = EXPR1 * EXPR2 for some matrix expressions EXPR1 and EXPR2
3290  template<typename T,
3291  typename LhsT1, typename RhsT1, typename OpT1,
3292  typename LhsT2, typename RhsT2, typename OpT2>
3293  struct op_executor<matrix_base<T>,
3294  op_assign,
3295  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3296  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3297  op_mat_mat_prod>
3298  >
3299  {
3300  static void apply(matrix_base<T> & lhs,
3301  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3302  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3303  op_mat_mat_prod> const & rhs)
3304  {
3305  matrix_base<T> temp1(rhs.lhs());
3306  matrix_base<T> temp2(rhs.rhs());
3307  viennacl::linalg::prod_impl(temp1, temp2, lhs, T(1.0), T(0));
3308  }
3309  };
3310 
3311 
3312 
3313 
3314  // C += A * B
3315  template<typename T>
3316  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3317  {
3318  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3319  {
3320  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))
3321  {
3322  matrix_base<T> temp(rhs);
3323  lhs += temp;
3324  }
3325  else
3326  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3327  }
3328  };
3329 
3330  // C += A * B^T
3331  template<typename T>
3332  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>,
3333  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3334  op_mat_mat_prod> >
3335  {
3336  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3337  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3338  op_mat_mat_prod> const & rhs)
3339  {
3340  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3341  {
3342  matrix_base<T> temp(rhs);
3343  lhs += temp;
3344  }
3345  else
3346  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3347  }
3348  };
3349 
3350  // C += A * EXPR for some matrix expression EXPR
3351  template<typename T, typename LhsT, typename RhsT, typename OpT>
3352  struct op_executor<matrix_base<T>,
3353  op_inplace_add,
3354  matrix_expression<const matrix_base<T>,
3355  const matrix_expression<const LhsT, const RhsT, OpT>,
3356  op_mat_mat_prod>
3357  >
3358  {
3359  static void apply(matrix_base<T> & lhs,
3360  matrix_expression<const matrix_base<T>,
3361  const matrix_expression<const LhsT, const RhsT, OpT>,
3362  op_mat_mat_prod> const & rhs)
3363  {
3364  matrix_base<T> temp(rhs.rhs());
3365  viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs, T(1.0), T(1.0));
3366  }
3367  };
3368 
3369 
3370  // C += A^T * B
3371  template<typename T>
3372  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3373  const matrix_base<T>,
3374  op_mat_mat_prod> >
3375  {
3376  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3377  const matrix_base<T>,
3378  op_mat_mat_prod> const & rhs)
3379  {
3380  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))
3381  {
3382  matrix_base<T> temp(rhs);
3383  lhs += temp;
3384  }
3385  else
3386  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3387  }
3388  };
3389 
3390  // C += EXPR * B for some matrix expression EXPR
3391  template<typename T, typename LhsT, typename RhsT, typename OpT>
3392  struct op_executor<matrix_base<T>,
3393  op_inplace_add,
3394  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3395  const matrix_base<T>,
3396  op_mat_mat_prod>
3397  >
3398  {
3399  static void apply(matrix_base<T> & lhs,
3400  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3401  const matrix_base<T>,
3402  op_mat_mat_prod> const & rhs)
3403  {
3404  matrix_base<T> temp(rhs.lhs());
3405  viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs, T(1.0), T(1.0));
3406  }
3407  };
3408 
3409 
3410  // C += A^T * B^T
3411  template<typename T>
3412  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3413  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3414  op_mat_mat_prod> >
3415  {
3416  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3417  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3418  op_mat_mat_prod> const & rhs)
3419  {
3420  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3421  {
3422  matrix_base<T> temp(rhs);
3423  lhs += temp;
3424  }
3425  else
3426  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3427  }
3428  };
3429 
3430 
3431  // C += EXPR1 * EXPR2 for some matrix expressions EXPR1 and EXPR2
3432  template<typename T,
3433  typename LhsT1, typename RhsT1, typename OpT1,
3434  typename LhsT2, typename RhsT2, typename OpT2>
3435  struct op_executor<matrix_base<T>,
3436  op_inplace_add,
3437  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3438  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3439  op_mat_mat_prod>
3440  >
3441  {
3442  static void apply(matrix_base<T> & lhs,
3443  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3444  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3445  op_mat_mat_prod> const & rhs)
3446  {
3447  matrix_base<T> temp1(rhs.lhs());
3448  matrix_base<T> temp2(rhs.rhs());
3449  viennacl::linalg::prod_impl(temp1, temp2, lhs, T(1.0), T(1.0));
3450  }
3451  };
3452 
3453 
3454 
3455  // C -= A * B
3456  template<typename T>
3457  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3458  {
3459  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3460  {
3461  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))
3462  {
3463  matrix_base<T> temp(rhs);
3464  lhs -= temp;
3465  }
3466  else
3467  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3468  }
3469  };
3470 
3471  // C -= A * B^T
3472  template<typename T>
3473  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>,
3474  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3475  op_mat_mat_prod> >
3476  {
3477  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3478  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3479  op_mat_mat_prod> const & rhs)
3480  {
3481  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3482  {
3483  matrix_base<T> temp(rhs);
3484  lhs -= temp;
3485  }
3486  else
3487  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3488  }
3489  };
3490 
3491  // C -= A * EXPR for some matrix expression EXPR
3492  template<typename T, typename LhsT, typename RhsT, typename OpT>
3493  struct op_executor<matrix_base<T>,
3494  op_inplace_sub,
3495  matrix_expression<const matrix_base<T>,
3496  const matrix_expression<const LhsT, const RhsT, OpT>,
3497  op_mat_mat_prod>
3498  >
3499  {
3500  static void apply(matrix_base<T> & lhs,
3501  matrix_expression<const matrix_base<T>,
3502  const matrix_expression<const LhsT, const RhsT, OpT>,
3503  op_mat_mat_prod> const & rhs)
3504  {
3505  matrix_base<T> temp(rhs.rhs());
3506  viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs, T(-1.0), T(1.0));
3507  }
3508  };
3509 
3510 
3511  // C -= A^T * B
3512  template<typename T>
3513  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3514  const matrix_base<T>,
3515  op_mat_mat_prod> >
3516  {
3517  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3518  const matrix_base<T>,
3519  op_mat_mat_prod> const & rhs)
3520  {
3521  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))
3522  {
3523  matrix_base<T> temp(rhs);
3524  lhs -= temp;
3525  }
3526  else
3527  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3528  }
3529  };
3530 
3531  // C += EXPR * B for some matrix expression EXPR
3532  template<typename T, typename LhsT, typename RhsT, typename OpT>
3533  struct op_executor<matrix_base<T>,
3534  op_inplace_sub,
3535  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3536  const matrix_base<T>,
3537  op_mat_mat_prod>
3538  >
3539  {
3540  static void apply(matrix_base<T> & lhs,
3541  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3542  const matrix_base<T>,
3543  op_mat_mat_prod> const & rhs)
3544  {
3545  matrix_base<T> temp(rhs.lhs());
3546  viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs, T(-1.0), T(1.0));
3547  }
3548  };
3549 
3550 
3551  // C -= A^T * B^T
3552  template<typename T>
3553  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3554  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3555  op_mat_mat_prod> >
3556  {
3557  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3558  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3559  op_mat_mat_prod> const & rhs)
3560  {
3561  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3562  {
3563  matrix_base<T> temp(rhs);
3564  lhs -= temp;
3565  }
3566  else
3567  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3568  }
3569  };
3570 
3571  // C -= EXPR1 * EXPR2 for some matrix expressions EXPR1 and EXPR2
3572  template<typename T,
3573  typename LhsT1, typename RhsT1, typename OpT1,
3574  typename LhsT2, typename RhsT2, typename OpT2>
3575  struct op_executor<matrix_base<T>,
3576  op_inplace_sub,
3577  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3578  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3579  op_mat_mat_prod>
3580  >
3581  {
3582  static void apply(matrix_base<T> & lhs,
3583  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3584  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3585  op_mat_mat_prod> const & rhs)
3586  {
3587  matrix_base<T> temp1(rhs.lhs());
3588  matrix_base<T> temp2(rhs.rhs());
3589  viennacl::linalg::prod_impl(temp1, temp2, lhs, T(-1.0), T(1.0));
3590  }
3591  };
3592 
3594 
3595  // y = A * x
3596  template<typename T>
3597  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3598  {
3599  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3600  {
3601  // check for x = A * x
3602  if (op_aliasing(lhs, rhs.rhs()))
3603  {
3604  vector_base<T> temp(rhs);
3605  lhs = temp;
3606  }
3607  else
3608  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
3609  }
3610  };
3611 
3612  // y = A^T * x
3613  template<typename T>
3614  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3615  const vector_base<T>,
3616  op_prod> >
3617  {
3618  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3619  const vector_base<T>,
3620  op_prod> const & rhs)
3621  {
3622  // check for x = A^T * x
3623  if (op_aliasing(lhs, rhs.rhs()))
3624  {
3625  vector_base<T> temp(rhs);
3626  lhs = temp;
3627  }
3628  else
3629  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
3630  }
3631  };
3632 
3633  // y = MAT_EXPR * x for a matrix expression MAT_EXPR
3634  template<typename T, typename LhsT, typename RhsT, typename OpT>
3635  struct op_executor<vector_base<T>,
3636  op_assign,
3637  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3638  const vector_base<T>,
3639  op_prod>
3640  >
3641  {
3642  static void apply(vector_base<T> & lhs,
3643  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3644  const vector_base<T>,
3645  op_prod> const & rhs)
3646  {
3647  matrix_base<T> temp(rhs.lhs());
3648  viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs);
3649  }
3650  };
3651 
3652  // y = A * VEC_EXPR for a vector expression VEC_EXPR
3653  template<typename T, typename LhsT, typename RhsT, typename OpT>
3654  struct op_executor<vector_base<T>,
3655  op_assign,
3656  vector_expression<const matrix_base<T>,
3657  const vector_expression<const LhsT, const RhsT, OpT>,
3658  op_prod>
3659  >
3660  {
3661  static void apply(vector_base<T> & lhs,
3662  vector_expression<const matrix_base<T>,
3663  const vector_expression<const LhsT, const RhsT, OpT>,
3664  op_prod> const & rhs)
3665  {
3666  vector_base<T> x(rhs.rhs());
3667  viennacl::linalg::prod_impl(rhs.lhs(), x, lhs);
3668  }
3669  };
3670 
3671  // y = MAT_EXPR * VEC_EXPR for a matrix expression MAT_EXPR and a vector expression VEC_EXPR
3672  template<typename T,
3673  typename LhsT1, typename RhsT1, typename OpT1,
3674  typename LhsT2, typename RhsT2, typename OpT2>
3675  struct op_executor<vector_base<T>,
3676  op_assign,
3677  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3678  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3679  op_prod>
3680  >
3681  {
3682  static void apply(vector_base<T> & lhs,
3683  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3684  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3685  op_prod> const & rhs)
3686  {
3687  matrix_base<T> A(rhs.lhs());
3688  vector_base<T> x(rhs.rhs());
3689  viennacl::linalg::prod_impl(A, x, lhs);
3690  }
3691  };
3692 
3693 
3694 
3695  // y += A * x
3696  template<typename T>
3697  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3698  {
3699  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3700  {
3701  vector_base<T> temp(rhs);
3702  lhs += temp;
3703  }
3704  };
3705 
3706  // y += A^T * x
3707  template<typename T>
3708  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3709  const vector_base<T>,
3710  op_prod> >
3711  {
3712  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3713  const vector_base<T>,
3714  op_prod> const & rhs)
3715  {
3716  vector_base<T> temp(rhs);
3717  lhs += temp;
3718  }
3719  };
3720 
3721  // y += MAT_EXPR * x for a matrix expression MAT_EXPR
3722  template<typename T, typename LhsT, typename RhsT, typename OpT>
3723  struct op_executor<vector_base<T>,
3724  op_inplace_add,
3725  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3726  const vector_base<T>,
3727  op_prod>
3728  >
3729  {
3730  static void apply(vector_base<T> & lhs,
3731  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3732  const vector_base<T>,
3733  op_prod> const & rhs)
3734  {
3735  matrix_base<T> A(rhs.lhs());
3736  vector_base<T> y(lhs);
3737  viennacl::linalg::prod_impl(A, rhs.rhs(), y);
3738  lhs += y;
3739  }
3740  };
3741 
3742  // y += A * VEC_EXPR for a vector expression VEC_EXPR
3743  template<typename T, typename LhsT, typename RhsT, typename OpT>
3744  struct op_executor<vector_base<T>,
3745  op_inplace_add,
3746  vector_expression<const matrix_base<T>,
3747  const vector_expression<const LhsT, const RhsT, OpT>,
3748  op_prod>
3749  >
3750  {
3751  static void apply(vector_base<T> & lhs,
3752  vector_expression<const matrix_base<T>,
3753  const vector_expression<const LhsT, const RhsT, OpT>,
3754  op_prod> const & rhs)
3755  {
3756  vector_base<T> x(rhs.rhs());
3757  vector_base<T> y(lhs);
3758  viennacl::linalg::prod_impl(rhs.lhs(), x, y);
3759  lhs += y;
3760  }
3761  };
3762 
3763  // y += MAT_EXPR * VEC_EXPR for a matrix expression MAT_EXPR and a vector expression VEC_EXPR
3764  template<typename T,
3765  typename LhsT1, typename RhsT1, typename OpT1,
3766  typename LhsT2, typename RhsT2, typename OpT2>
3767  struct op_executor<vector_base<T>,
3768  op_inplace_add,
3769  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3770  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3771  op_prod>
3772  >
3773  {
3774  static void apply(vector_base<T> & lhs,
3775  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3776  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3777  op_prod> const & rhs)
3778  {
3779  matrix_base<T> A(rhs.lhs());
3780  vector_base<T> x(rhs.rhs());
3781  vector_base<T> y(lhs);
3782  viennacl::linalg::prod_impl(A, x, y);
3783  lhs += y;
3784  }
3785  };
3786 
3787 
3788 
3789  // y -= A * x
3790  template<typename T>
3791  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3792  {
3793  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3794  {
3795  vector_base<T> temp(rhs);
3796  lhs -= temp;
3797  }
3798  };
3799 
3800  // y -= A^T * x
3801  template<typename T>
3802  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3803  const vector_base<T>,
3804  op_prod> >
3805  {
3806  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3807  const vector_base<T>,
3808  op_prod> const & rhs)
3809  {
3810  vector_base<T> temp(rhs);
3811  lhs -= temp;
3812  }
3813  };
3814 
3815  // y -= MAT_EXPR * x for a matrix expression MAT_EXPR
3816  template<typename T, typename LhsT, typename RhsT, typename OpT>
3817  struct op_executor<vector_base<T>,
3818  op_inplace_sub,
3819  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3820  const vector_base<T>,
3821  op_prod>
3822  >
3823  {
3824  static void apply(vector_base<T> & lhs,
3825  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3826  const vector_base<T>,
3827  op_prod> const & rhs)
3828  {
3829  matrix_base<T> A(rhs.lhs());
3830  vector_base<T> y(lhs);
3831  viennacl::linalg::prod_impl(A, rhs.rhs(), y);
3832  lhs -= y;
3833  }
3834  };
3835 
3836  // y -= A * VEC_EXPR for a vector expression VEC_EXPR
3837  template<typename T, typename LhsT, typename RhsT, typename OpT>
3838  struct op_executor<vector_base<T>,
3839  op_inplace_sub,
3840  vector_expression<const matrix_base<T>,
3841  const vector_expression<const LhsT, const RhsT, OpT>,
3842  op_prod>
3843  >
3844  {
3845  static void apply(vector_base<T> & lhs,
3846  vector_expression<const matrix_base<T>,
3847  const vector_expression<const LhsT, const RhsT, OpT>,
3848  op_prod> const & rhs)
3849  {
3850  vector_base<T> x(rhs.rhs());
3851  vector_base<T> y(lhs);
3852  viennacl::linalg::prod_impl(rhs.lhs(), x, y);
3853  lhs -= y;
3854  }
3855  };
3856 
3857  // y -= MAT_EXPR * VEC_EXPR for a matrix expression MAT_EXPR and a vector expression VEC_EXPR
3858  template<typename T,
3859  typename LhsT1, typename RhsT1, typename OpT1,
3860  typename LhsT2, typename RhsT2, typename OpT2>
3861  struct op_executor<vector_base<T>,
3862  op_inplace_sub,
3863  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3864  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3865  op_prod>
3866  >
3867  {
3868  static void apply(vector_base<T> & lhs,
3869  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3870  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3871  op_prod> const & rhs)
3872  {
3873  matrix_base<T> A(rhs.lhs());
3874  vector_base<T> x(rhs.rhs());
3875  vector_base<T> y(lhs);
3876  viennacl::linalg::prod_impl(A, x, y);
3877  lhs -= y;
3878  }
3879  };
3880 
3881 
3882 
3884 
3885  // A = v1 * v2^T
3886  template<typename T>
3887  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3888  {
3889  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3890  {
3891  lhs.clear();
3892  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
3893  }
3894  };
3895 
3896  // A = alpha * v1 * v2^T
3897  template<typename T, typename ScalarType>
3898  struct op_executor<matrix_base<T>, op_assign, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3899  const ScalarType,
3900  op_mult> >
3901  {
3902  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3903  const ScalarType,
3904  op_mult> const & rhs)
3905  {
3906  lhs.clear();
3907  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
3908  }
3909  };
3910 
3911  // A += v1 * v2^T
3912  template<typename T>
3913  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3914  {
3915  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3916  {
3917  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
3918  }
3919  };
3920 
3921  // A += alpha * v1 * v2^T
3922  template<typename T, typename ScalarType>
3923  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3924  const ScalarType,
3925  op_mult> >
3926  {
3927  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3928  const ScalarType,
3929  op_mult> const & rhs)
3930  {
3931  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
3932  }
3933  };
3934 
3935  // A -= v1 * v2^T
3936  template<typename T>
3937  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3938  {
3939  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3940  {
3941  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, true, rhs.lhs(), rhs.rhs());
3942  }
3943  };
3944 
3945  // A -= alpha * v1 * v2^T
3946  template<typename T, typename ScalarType>
3947  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3948  const ScalarType,
3949  op_mult> >
3950  {
3951  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3952  const ScalarType,
3953  op_mult> const & rhs)
3954  {
3955  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, true, rhs.lhs().lhs(), rhs.lhs().rhs());
3956  }
3957  };
3958 
3959 
3960 } // namespace detail
3961 
3962 } // namespace linalg
3963 
3966 } //namespace viennacl
3967 
3968 #endif
void row_sum_impl(const matrix_base< NumericT > &A, vector_base< NumericT > &result)
Simple enable-if variant that uses the SFINAE pattern.
Definition: enable_if.hpp:30
bool operator==(self_type const &other)
Definition: matrix.hpp:106
A tag class representing multiplication by a scalar.
Definition: forwards.h:92
matrix(NumericT *ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type cols)
Wraps a CUDA or host buffer provided by the user.
Definition: matrix.hpp:721
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:92
void matrix_diag_to_vector(const matrix_base< NumericT > &A, int k, vector_base< NumericT > &v)
Dispatcher interface for v = diag(A, k)
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:314
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
Definition: memory.hpp:220
base_type & operator=(viennacl::matrix< OtherNumericT, F2 > const &B)
Definition: matrix.hpp:798
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix.
Worker class for decomposing expression templates.
Definition: op_executor.hpp:80
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:236
Implementations of dense matrix related operations including matrix-vector products.
size_type internal_size() const
Returns the total amount of allocated memory in multiples of sizeof(NumericT)
Definition: matrix_def.hpp:242
Class for representing strided submatrices of a bigger matrix A.
Definition: forwards.h:443
self_type & operator=(const self_type &other)
Definition: matrix.hpp:262
matrix(zero_matrix< NumericT > const &m)
Creates the matrix from the supplied zero matrix.
Definition: matrix.hpp:760
Helper class for checking whether a matrix has a row-major layout.
Definition: forwards.h:484
Helper struct for checking whether a type represents a sign flip on a viennacl::scalar<> ...
Definition: forwards.h:462
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
Various little tools used here and there in ViennaCL.
matrix_expression< const self_type, const NumericT, op_mult > operator-() const
Sign flip for the matrix. Emulated to be equivalent to -1.0 * matrix.
Definition: matrix.hpp:628
A tag class representing the extraction of a matrix column to a vector.
Definition: forwards.h:195
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:386
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
A tag class representing a matrix given by a vector placed on a certain (off-)diagonal.
Definition: forwards.h:189
A tag class representing subtraction.
Definition: forwards.h:90
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
viennacl::context context() const
Definition: matrix_def.hpp:46
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:394
A tag indicating iteration along increasing row index of a matrix.
Definition: matrix.hpp:84
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:341
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:43
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:375
A tag class representing division.
Definition: forwards.h:98
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
entry_proxy< NumericT > operator()(size_type row_index, size_type col_index)
Read-write access to a single element of the matrix/matrix_range/matrix_slice.
Definition: matrix.hpp:477
static vcl_size_t size1(LHS &lhs, RHS &)
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: matrix_def.hpp:69
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT > & >::type operator/=(matrix_base< NumericT > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1674
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_base< NumericT >, const S1, op_mult >>::type operator*(S1 const &value, matrix_base< NumericT > const &m1)
Operator overload for the expression alpha * m1, where alpha is a host scalar (float or double) and m...
Definition: matrix.hpp:1374
viennacl::scalar< float > s1
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:375
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:239
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, OP > const &proxy)
Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syn...
Forward declaration of dense matrix classes.
bool op_aliasing(vector_base< NumericT > const &, B const &)
Definition: op_executor.hpp:36
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
bool operator!=(self_type const &other)
Definition: matrix.hpp:107
matrix(scalar_matrix< NumericT > const &m)
Creates the matrix from the supplied scalar matrix.
Definition: matrix.hpp:767
vcl_size_t size1() const
Returns the size of the result vector.
Definition: matrix.hpp:72
self_type operator++(int)
Definition: matrix.hpp:104
viennacl::vector< NumericT > operator-(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 - A * v2', where A is a matrix.
float NumericT
Definition: bisect.cpp:40
A tag class representing the (off-)diagonal of a matrix.
Definition: forwards.h:186
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
void resize(size_type rows, size_type columns, bool preserve=true)
Resizes the matrix. Existing entries can optionally be preserved.
Definition: matrix.hpp:813
size_type size2() const
Definition: matrix_def.hpp:45
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT > & >::type operator*=(matrix_base< NumericT > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1522
Obtain the cpu scalar type from a type, including a GPU type like viennacl::scalar ...
Definition: tools.hpp:225
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239
matrix_base()
The default constructor. Does not allocate any memory.
Definition: matrix_def.hpp:117
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
void matrix_row(const matrix_base< NumericT > &A, unsigned int i, vector_base< NumericT > &v)
matrix(matrix_expression< LHS, RHS, OP > const &proxy)
Definition: matrix.hpp:750
size_type size1() const
Definition: matrix_def.hpp:44
vcl_size_t size2() const
Definition: matrix.hpp:73
void resize(MatrixType &matrix, vcl_size_t rows, vcl_size_t cols)
Generic resize routine for resizing a matrix (ViennaCL, uBLAS, etc.) to a new size/dimension.
Definition: size.hpp:63
MatrixT::value_type value_type
Definition: matrix.hpp:96
void clear()
Resets all entries to zero.
Definition: matrix.hpp:634
MatrixT & operator()(void) const
Definition: matrix.hpp:112
Implementations of operations using sparse matrices.
A tag class representing addition.
Definition: forwards.h:88
matrix(const self_type &other)
Definition: matrix.hpp:780
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: matrix_def.hpp:93
matrix_expression(LHS &lhs, RHS &rhs)
Definition: matrix.hpp:62
void scaled_rank_1_update(matrix_base< NumericT > &mat1, S1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_expression< const LHS, const RHS, OP >, const S1, op_div > >::type operator/(matrix_expression< const LHS, const RHS, OP > const &proxy, S1 const &val)
Operator overload for the division of a matrix expression by a scalar from the right, e.g. (beta * m1) / alpha. Here, beta * m1 is wrapped into a matrix_expression and then divided by alpha.
Definition: matrix.hpp:1603
Determines whether a given expression has a row-major matrix layout.
std::size_t vcl_size_t
Definition: forwards.h:75
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:226
matrix()
The default constructor. Does not allocate any memory.
Definition: matrix.hpp:704
handle_type & handle()
Returns the OpenCL handle, non-const-version.
Definition: matrix_def.hpp:244
matrix(identity_matrix< NumericT > const &m)
Creates the matrix from the supplied identity matrix.
Definition: matrix.hpp:753
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
Definition: matrix.hpp:895
viennacl::memory_types active_handle_id(T const &obj)
Returns an ID for the currently active memory domain of an object.
Definition: handle.hpp:218
Represents a vector consisting of zeros only. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only.
Definition: matrix_def.hpp:81
void matrix_column(const matrix_base< NumericT > &A, unsigned int j, vector_base< NumericT > &v)
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:224
base_type & operator=(viennacl::matrix_slice< viennacl::matrix< OtherNumericT, F2 > > const &B)
Definition: matrix.hpp:804
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910
base_type::size_type size_type
Definition: matrix.hpp:701
matrix(const base_type &other)
Definition: matrix.hpp:773
base_type & operator=(viennacl::matrix_range< viennacl::matrix< OtherNumericT, F2 > > const &B)
Definition: matrix.hpp:801
RHS & rhs() const
Get right hand side operand.
Definition: matrix.hpp:69
viennacl::vector< NumericT > operator+(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 + A * v2', where A is a matrix.
matrix_iterator(MatrixT &mat, vcl_size_t start_row, vcl_size_t start_col)
Definition: matrix.hpp:98
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:121
A tag class representing matrix-vector products and element-wise multiplications. ...
Definition: forwards.h:94
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
INT_TYPE align_to_multiple(INT_TYPE to_reach, INT_TYPE base)
Rounds an integer to the next multiple of another integer.
Definition: tools.hpp:133
A dense matrix class.
Definition: forwards.h:369
bool row_major() const
Definition: matrix_def.hpp:248
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
bool row_major(T const &)
Definition: row_major.hpp:38
void set(vcl_size_t index, U value)
Definition: util.hpp:115
float ScalarType
Definition: fft_1d.cpp:42
value_type operator*(void)
Definition: matrix.hpp:102
A tag class representing transposed matrices.
Definition: forwards.h:220
Helper implementations that deduce the dimensions of the supplied matrix-valued expressions.
matrix(NumericT *ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type internal_row_count, size_type cols, size_type internal_col_count)
Wraps a CUDA or host buffer provided by the user including padding of rows and columns.
Definition: matrix.hpp:737
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:230
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
Class for representing non-strided submatrices of a bigger matrix A.
Definition: forwards.h:440
self_type & operator*=(char val)
Scales the matrix by a char (8-bit integer)
Definition: matrix.hpp:517
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
viennacl::matrix< float > m1
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238
matrix(size_type rows, size_type columns, viennacl::context ctx=viennacl::context())
Creates the matrix with the given dimensions.
Definition: matrix.hpp:712
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:331
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
Definition: matrix.hpp:918
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
self_type & operator+=(const matrix_expression< const LHS, const RHS, OP > &proxy)
self_type & operator/=(char val)
Scales the matrix by a char (8-bit integer)
Definition: matrix.hpp:573
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void resize(size_type rows, size_type columns, bool preserve=true)
Definition: matrix.hpp:638
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
memory_types
Definition: forwards.h:345
static vcl_size_t size2(LHS &, RHS &rhs)
void matrix_diag_from_vector(const vector_base< NumericT > &v, int k, matrix_base< NumericT > &A)
Dispatcher interface for A = diag(v, k)
LHS & lhs() const
Get left hand side operand.
Definition: matrix.hpp:66
A tag class representing the extraction of a matrix row to a vector.
Definition: forwards.h:192
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:233
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Implementation of the ViennaCL scalar class.
static void apply(const MATRIXTYPE &, unsigned int &, unsigned int &)
Definition: forwards.h:621
A collection of compile time type deductions.
A tag for row-major storage of a dense matrix.
Definition: forwards.h:304
self_type & operator++(void)
Definition: matrix.hpp:103
ram_handle_type & ram_handle()
Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated...
Definition: mem_handle.hpp:99
A tag indicating iteration along increasing columns index of a matrix.
Definition: matrix.hpp:87
Simple enable-if variant that uses the SFINAE pattern.
void column_sum_impl(const matrix_base< NumericT > &A, vector_base< NumericT > &result)
self_type & operator-=(const matrix_expression< const LHS, const RHS, OP > &proxy)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)