ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/scalar.hpp"
27 #include "viennacl/vector.hpp"
29 #include "viennacl/tools/tools.hpp"
33 #include "viennacl/traits/size.hpp"
39 #include "viennacl/linalg/prod.hpp"
40 
41 // Minimum Matrix size(size1*size2) for using OpenMP on matrix operations:
42 #ifndef VIENNACL_OPENMP_MATRIX_MIN_SIZE
43  #define VIENNACL_OPENMP_MATRIX_MIN_SIZE 5000
44 #endif
45 
46 namespace viennacl
47 {
48 namespace linalg
49 {
50 namespace host_based
51 {
52 
53 //
54 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
55 //
56 
57 template<typename DestNumericT, typename SrcNumericT>
59 {
60  assert(mat1.row_major() == mat2.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
61 
62  DestNumericT * data_A = detail::extract_raw_pointer<DestNumericT>(mat1);
63  SrcNumericT const * data_B = detail::extract_raw_pointer<SrcNumericT>(mat2);
64 
65  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
66  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
69  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
70  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
71  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
72  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
73 
74  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
75  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
78  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
79  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
80 
81  if (mat1.row_major())
82  {
83  detail::matrix_array_wrapper<DestNumericT, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
84  detail::matrix_array_wrapper<SrcNumericT const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
85 
86 #ifdef VIENNACL_WITH_OPENMP
87  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
88 #endif
89  for (long row = 0; row < static_cast<long>(A_size1); ++row)
90  for (vcl_size_t col = 0; col < A_size2; ++col)
91  wrapper_A(row, col) = static_cast<DestNumericT>(wrapper_B(row, col));
92  }
93  else
94  {
95  detail::matrix_array_wrapper<DestNumericT, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
96  detail::matrix_array_wrapper<SrcNumericT const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
97 
98 #ifdef VIENNACL_WITH_OPENMP
99  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
100 #endif
101  for (long col = 0; col < static_cast<long>(A_size2); ++col)
102  for (vcl_size_t row = 0; row < A_size1; ++row)
103  wrapper_A(row, col) = static_cast<DestNumericT>(wrapper_B(row, col));
104  }
105 }
106 
107 
108 
109 template<typename NumericT,
110  typename SizeT, typename DistanceT>
113 {
114  typedef NumericT value_type;
115  const value_type * data_A = detail::extract_raw_pointer<value_type>(proxy.lhs());
116  value_type * data_B = detail::extract_raw_pointer<value_type>(temp_trans);
117 
118  vcl_size_t A_start1 = viennacl::traits::start1(proxy.lhs());
119  vcl_size_t A_start2 = viennacl::traits::start2(proxy.lhs());
120  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(proxy.lhs());
121  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(proxy.lhs());
122  vcl_size_t A_inc1 = viennacl::traits::stride1(proxy.lhs());
123  vcl_size_t A_inc2 = viennacl::traits::stride2(proxy.lhs());
124  vcl_size_t A_size1 = viennacl::traits::size1(proxy.lhs());
125  vcl_size_t A_size2 = viennacl::traits::size2(proxy.lhs());
126 
127  vcl_size_t B_start1 = viennacl::traits::start1(temp_trans);
128  vcl_size_t B_start2 = viennacl::traits::start2(temp_trans);
129  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(temp_trans);
130  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(temp_trans);
131  vcl_size_t B_inc1 = viennacl::traits::stride1(temp_trans);
132  vcl_size_t B_inc2 = viennacl::traits::stride2(temp_trans);
133 
134  const vcl_size_t sub_mat_size = 64; //The matrix will be divided into sub-matrices for better storage access.
135 
136  vcl_size_t row_count = A_size1 / sub_mat_size;
137  vcl_size_t col_count = A_size2 / sub_mat_size;
138 
139  vcl_size_t row_count_remainder = A_size1 % sub_mat_size;
140  vcl_size_t col_count_remainder = A_size2 % sub_mat_size;
141 
142  if (proxy.lhs().row_major())
143  {
144 #ifdef VIENNACL_WITH_OPENMP
145  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
146 #endif
147  for(long i = 0; i < static_cast<long>(row_count*col_count); ++i)//This is the main part of the transposition
148  {
149  vcl_size_t row = vcl_size_t(i) / col_count;
150  vcl_size_t col = vcl_size_t(i) % col_count;
151 
152  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1 + A_inc1 * (row * sub_mat_size)
153  , A_start2 + A_inc2 * (col * sub_mat_size), A_inc1
154  , A_inc2, A_internal_size1, A_internal_size2);
155  detail::matrix_array_wrapper<value_type , row_major, false> wrapper_B(data_B, B_start1 + B_inc1 * (col * sub_mat_size)
156  , B_start2 + B_inc2 * (row * sub_mat_size), B_inc1
157  , B_inc2, B_internal_size1, B_internal_size2);
158  for(vcl_size_t j = 0; j < (sub_mat_size); ++j)
159  for(vcl_size_t k = 0; k < (sub_mat_size); ++k)
160  wrapper_B(j, k) = wrapper_A(k, j);
161  }
162  { //This is the transposition of the remainder on the right side of the matrix
164  , A_start2 + A_inc2 * (col_count * sub_mat_size), A_inc1
165  , A_inc2, A_internal_size1, A_internal_size2);
166  detail::matrix_array_wrapper<value_type , row_major, false> wrapper_B(data_B, B_start1 + B_inc1 * (col_count * sub_mat_size)
167  , B_start2, B_inc1
168  , B_inc2, B_internal_size1, B_internal_size2);
169  for(vcl_size_t j = 0; j < col_count_remainder; ++j)
170  for(vcl_size_t k = 0 ; k < A_size1; ++k)
171  wrapper_B(j, k) = wrapper_A(k, j);
172  }
173  { //This is the transposition of the remainder on the bottom side of the matrix
174  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1 + A_inc1 * (row_count * sub_mat_size)
175  , A_start2, A_inc1
176  , A_inc2, A_internal_size1, A_internal_size2);
178  , B_start2 + B_inc2 * (row_count * sub_mat_size), B_inc1
179  , B_inc2, B_internal_size1, B_internal_size2);
180  for(vcl_size_t j = 0; j < row_count_remainder; ++j)
181  for(vcl_size_t k = 0; k < (A_size2 - col_count_remainder); ++k)
182  wrapper_B(k, j) = wrapper_A(j, k);
183  }
184  }
185  else
186  {
187 #ifdef VIENNACL_WITH_OPENMP
188  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
189 #endif
190  for(long i = 0; i < static_cast<long>(row_count*col_count); ++i)//This is the main part of the transposition
191  {
192  vcl_size_t row = vcl_size_t(i) / col_count;
193  vcl_size_t col = vcl_size_t(i) % col_count;
194 
195  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1 + A_inc1 * (row * sub_mat_size)
196  , A_start2 + A_inc2 * (col * sub_mat_size), A_inc1
197  , A_inc2, A_internal_size1, A_internal_size2);
198  detail::matrix_array_wrapper<value_type , column_major, false> wrapper_B(data_B, B_start1 + B_inc1 * (col * sub_mat_size)
199  , B_start2 + B_inc2 * (row * sub_mat_size), B_inc1
200  , B_inc2, B_internal_size1, B_internal_size2);
201  for(vcl_size_t j = 0; j < (sub_mat_size); ++j)
202  for(vcl_size_t k = 0; k < (sub_mat_size); ++k)
203  wrapper_B(k, j)=wrapper_A(j, k);
204  }
205  { //This is the transposition of the remainder on the right side of the matrix
207  , A_start2 + A_inc2 * (col_count * sub_mat_size), A_inc1
208  , A_inc2, A_internal_size1, A_internal_size2);
209  detail::matrix_array_wrapper<value_type , column_major, false> wrapper_B(data_B,B_start1 + B_inc1 * (col_count * sub_mat_size)
210  , B_start2, B_inc1
211  , B_inc2, B_internal_size1, B_internal_size2);
212  for(vcl_size_t j = 0; j < col_count_remainder; ++j)
213  for(vcl_size_t k = 0; k < A_size1; ++k)
214  wrapper_B(j, k)=wrapper_A(k, j);
215  }
216  { //This is the transposition of the remainder on the bottom side of the matrix
217  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1 + A_inc1 * (row_count * sub_mat_size)
218  , A_start2, A_inc1
219  , A_inc2, A_internal_size1, A_internal_size2);
221  , B_start2 + B_inc2 * (row_count * sub_mat_size), B_inc1
222  , B_inc2, B_internal_size1, B_internal_size2);
223  for(vcl_size_t j = 0; j < row_count_remainder; ++j)
224  for(vcl_size_t k = 0; k < (A_size2 - col_count_remainder); ++k)
225  wrapper_B(k, j)=wrapper_A(j, k);
226  }
227  }
228 }
229 
230 template<typename NumericT, typename ScalarT1>
232  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha)
233 {
234  assert(mat1.row_major() == mat2.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
235 
236  typedef NumericT value_type;
237 
238  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
239  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
240 
241  value_type data_alpha = alpha;
242  if (flip_sign_alpha)
243  data_alpha = -data_alpha;
244 
245  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
246  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
247  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
248  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
249  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
250  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
251  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
252  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
253 
254  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
255  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
256  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
257  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
258  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
259  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
260 
261  if (mat1.row_major())
262  {
263  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
264  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
265 
266  if (reciprocal_alpha)
267  {
268 #ifdef VIENNACL_WITH_OPENMP
269  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
270 #endif
271  for (long row = 0; row < static_cast<long>(A_size1); ++row)
272  for (vcl_size_t col = 0; col < A_size2; ++col)
273  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha;
274  }
275  else
276  {
277 #ifdef VIENNACL_WITH_OPENMP
278  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
279 #endif
280  for (long row = 0; row < static_cast<long>(A_size1); ++row)
281  for (vcl_size_t col = 0; col < A_size2; ++col)
282  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha;
283  }
284  }
285  else
286  {
287  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
288  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
289 
290  if (reciprocal_alpha)
291  {
292 #ifdef VIENNACL_WITH_OPENMP
293  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
294 #endif
295  for (long col = 0; col < static_cast<long>(A_size2); ++col)
296  for (vcl_size_t row = 0; row < A_size1; ++row)
297  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha;
298  }
299  else
300  {
301 #ifdef VIENNACL_WITH_OPENMP
302  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
303 #endif
304  for (long col = 0; col < static_cast<long>(A_size2); ++col)
305  for (vcl_size_t row = 0; row < A_size1; ++row)
306  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha;
307  }
308  }
309 }
310 
311 
312 template<typename NumericT,
313  typename ScalarT1, typename ScalarT2>
315  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
316  matrix_base<NumericT> const & mat3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
317 {
318  assert(mat1.row_major() == mat2.row_major() && mat1.row_major() == mat3.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
319 
320  typedef NumericT value_type;
321 
322  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
323  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
324  value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3);
325 
326  value_type data_alpha = alpha;
327  if (flip_sign_alpha)
328  data_alpha = -data_alpha;
329 
330  value_type data_beta = beta;
331  if (flip_sign_beta)
332  data_beta = -data_beta;
333 
334  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
335  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
336  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
337  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
338  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
339  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
340  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
341  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
342 
343  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
344  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
345  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
346  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
347  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
348  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
349 
350  vcl_size_t C_start1 = viennacl::traits::start1(mat3);
351  vcl_size_t C_start2 = viennacl::traits::start2(mat3);
352  vcl_size_t C_inc1 = viennacl::traits::stride1(mat3);
353  vcl_size_t C_inc2 = viennacl::traits::stride2(mat3);
354  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(mat3);
355  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(mat3);
356 
357  if (mat1.row_major())
358  {
359  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
360  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
361  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
362 
363  if (reciprocal_alpha && reciprocal_beta)
364  {
365 #ifdef VIENNACL_WITH_OPENMP
366  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
367 #endif
368  for (long row = 0; row < static_cast<long>(A_size1); ++row)
369  for (vcl_size_t col = 0; col < A_size2; ++col)
370  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
371  }
372  else if (reciprocal_alpha && !reciprocal_beta)
373  {
374 #ifdef VIENNACL_WITH_OPENMP
375  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
376 #endif
377  for (long row = 0; row < static_cast<long>(A_size1); ++row)
378  for (vcl_size_t col = 0; col < A_size2; ++col)
379  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
380  }
381  else if (!reciprocal_alpha && reciprocal_beta)
382  {
383 #ifdef VIENNACL_WITH_OPENMP
384  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
385 #endif
386  for (long row = 0; row < static_cast<long>(A_size1); ++row)
387  for (vcl_size_t col = 0; col < A_size2; ++col)
388  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
389  }
390  else if (!reciprocal_alpha && !reciprocal_beta)
391  {
392 #ifdef VIENNACL_WITH_OPENMP
393  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
394 #endif
395  for (long row = 0; row < static_cast<long>(A_size1); ++row)
396  for (vcl_size_t col = 0; col < A_size2; ++col)
397  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
398  }
399  }
400  else
401  {
402  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
403  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
404  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
405 
406  if (reciprocal_alpha && reciprocal_beta)
407  {
408 #ifdef VIENNACL_WITH_OPENMP
409  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
410 #endif
411  for (long col = 0; col < static_cast<long>(A_size2); ++col)
412  for (vcl_size_t row = 0; row < A_size1; ++row)
413  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
414  }
415  else if (reciprocal_alpha && !reciprocal_beta)
416  {
417 #ifdef VIENNACL_WITH_OPENMP
418  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
419 #endif
420  for (long col = 0; col < static_cast<long>(A_size2); ++col)
421  for (vcl_size_t row = 0; row < A_size1; ++row)
422  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
423  }
424  else if (!reciprocal_alpha && reciprocal_beta)
425  {
426 #ifdef VIENNACL_WITH_OPENMP
427  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
428 #endif
429  for (long col = 0; col < static_cast<long>(A_size2); ++col)
430  for (vcl_size_t row = 0; row < A_size1; ++row)
431  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
432  }
433  else if (!reciprocal_alpha && !reciprocal_beta)
434  {
435 #ifdef VIENNACL_WITH_OPENMP
436  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
437 #endif
438  for (long col = 0; col < static_cast<long>(A_size2); ++col)
439  for (vcl_size_t row = 0; row < A_size1; ++row)
440  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
441  }
442  }
443 
444 }
445 
446 
447 template<typename NumericT,
448  typename ScalarT1, typename ScalarT2>
450  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
451  matrix_base<NumericT> const & mat3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
452 {
453  assert(mat1.row_major() == mat2.row_major() && mat1.row_major() == mat3.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
454 
455  typedef NumericT value_type;
456 
457  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
458  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
459  value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3);
460 
461  value_type data_alpha = alpha;
462  if (flip_sign_alpha)
463  data_alpha = -data_alpha;
464 
465  value_type data_beta = beta;
466  if (flip_sign_beta)
467  data_beta = -data_beta;
468 
469  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
470  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
471  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
472  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
473  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
474  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
475  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
476  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
477 
478  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
479  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
480  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
481  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
482  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
483  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
484 
485  vcl_size_t C_start1 = viennacl::traits::start1(mat3);
486  vcl_size_t C_start2 = viennacl::traits::start2(mat3);
487  vcl_size_t C_inc1 = viennacl::traits::stride1(mat3);
488  vcl_size_t C_inc2 = viennacl::traits::stride2(mat3);
489  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(mat3);
490  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(mat3);
491 
492  if (mat1.row_major())
493  {
494  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
495  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
496  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
497 
498  if (reciprocal_alpha && reciprocal_beta)
499  {
500 #ifdef VIENNACL_WITH_OPENMP
501  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
502 #endif
503  for (long row = 0; row < static_cast<long>(A_size1); ++row)
504  for (vcl_size_t col = 0; col < A_size2; ++col)
505  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
506  }
507  else if (reciprocal_alpha && !reciprocal_beta)
508  {
509 #ifdef VIENNACL_WITH_OPENMP
510  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
511 #endif
512  for (long row = 0; row < static_cast<long>(A_size1); ++row)
513  for (vcl_size_t col = 0; col < A_size2; ++col)
514  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
515  }
516  else if (!reciprocal_alpha && reciprocal_beta)
517  {
518 #ifdef VIENNACL_WITH_OPENMP
519  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
520 #endif
521  for (long row = 0; row < static_cast<long>(A_size1); ++row)
522  for (vcl_size_t col = 0; col < A_size2; ++col)
523  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
524  }
525  else if (!reciprocal_alpha && !reciprocal_beta)
526  {
527 #ifdef VIENNACL_WITH_OPENMP
528  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
529 #endif
530  for (long row = 0; row < static_cast<long>(A_size1); ++row)
531  for (vcl_size_t col = 0; col < A_size2; ++col)
532  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
533  }
534  }
535  else
536  {
537  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
538  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
539  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
540 
541  if (reciprocal_alpha && reciprocal_beta)
542  {
543 #ifdef VIENNACL_WITH_OPENMP
544  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
545 #endif
546  for (long col = 0; col < static_cast<long>(A_size2); ++col)
547  for (vcl_size_t row = 0; row < A_size1; ++row)
548  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
549  }
550  else if (reciprocal_alpha && !reciprocal_beta)
551  {
552 #ifdef VIENNACL_WITH_OPENMP
553  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
554 #endif
555  for (long col = 0; col < static_cast<long>(A_size2); ++col)
556  for (vcl_size_t row = 0; row < A_size1; ++row)
557  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
558  }
559  else if (!reciprocal_alpha && reciprocal_beta)
560  {
561 #ifdef VIENNACL_WITH_OPENMP
562  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
563 #endif
564  for (long col = 0; col < static_cast<long>(A_size2); ++col)
565  for (vcl_size_t row = 0; row < A_size1; ++row)
566  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
567  }
568  else if (!reciprocal_alpha && !reciprocal_beta)
569  {
570 #ifdef VIENNACL_WITH_OPENMP
571  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
572 #endif
573  for (long col = 0; col < static_cast<long>(A_size2); ++col)
574  for (vcl_size_t row = 0; row < A_size1; ++row)
575  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
576  }
577  }
578 
579 }
580 
581 
582 
583 
584 template<typename NumericT>
585 void matrix_assign(matrix_base<NumericT> & mat, NumericT s, bool clear = false)
586 {
587  typedef NumericT value_type;
588 
589  value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
590  value_type alpha = static_cast<value_type>(s);
591 
592  vcl_size_t A_start1 = viennacl::traits::start1(mat);
593  vcl_size_t A_start2 = viennacl::traits::start2(mat);
598  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
599  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
600 
601  if (mat.row_major())
602  {
603  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
604 
605 #ifdef VIENNACL_WITH_OPENMP
606  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
607 #endif
608  for (long row = 0; row < static_cast<long>(A_size1); ++row)
609  for (vcl_size_t col = 0; col < A_size2; ++col)
610  wrapper_A(static_cast<vcl_size_t>(row), col) = alpha;
611  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
612  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha;
613  }
614  else
615  {
616  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
617 
618 #ifdef VIENNACL_WITH_OPENMP
619  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
620 #endif
621  for (long col = 0; col < static_cast<long>(A_size2); ++col)
622  for (vcl_size_t row = 0; row < A_size1; ++row)
623  wrapper_A(row, static_cast<vcl_size_t>(col)) = alpha;
624  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
625  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha;
626  }
627 }
628 
629 
630 
631 template<typename NumericT>
633 {
634  typedef NumericT value_type;
635 
636  value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
637  value_type alpha = static_cast<value_type>(s);
638 
639  vcl_size_t A_start1 = viennacl::traits::start1(mat);
640  vcl_size_t A_start2 = viennacl::traits::start2(mat);
643  vcl_size_t A_size1 = viennacl::traits::size1(mat);
644  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
645  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
646  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
647 
648  if (mat.row_major())
649  {
650  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
651 
652 #ifdef VIENNACL_WITH_OPENMP
653  #pragma omp parallel for if ((A_size1*A_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
654 #endif
655  for (long row = 0; row < static_cast<long>(A_size1); ++row)
656  wrapper_A(row, row) = alpha;
657  }
658  else
659  {
660  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
661 
662 #ifdef VIENNACL_WITH_OPENMP
663  #pragma omp parallel for if ((A_size1*A_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
664 #endif
665  for (long row = 0; row < static_cast<long>(A_size1); ++row)
666  wrapper_A(row, row) = alpha;
667  }
668 }
669 
670 template<typename NumericT>
672 {
673  typedef NumericT value_type;
674 
675  value_type *data_A = detail::extract_raw_pointer<value_type>(mat);
676  value_type const *data_vec = detail::extract_raw_pointer<value_type>(vec);
677 
678  vcl_size_t A_start1 = viennacl::traits::start1(mat);
679  vcl_size_t A_start2 = viennacl::traits::start2(mat);
682  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
683  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
684  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
685  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
686 
687  vcl_size_t v_start = viennacl::traits::start(vec);
689  vcl_size_t v_size = viennacl::traits::size(vec);
690 
691  vcl_size_t row_start = 0;
692  vcl_size_t col_start = 0;
693 
694  if (k >= 0)
695  col_start = static_cast<vcl_size_t>(k);
696  else
697  row_start = static_cast<vcl_size_t>(-k);
698 
699  matrix_assign(mat, NumericT(0));
700 
701  if (mat.row_major())
702  {
703  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
704 
705  for (vcl_size_t i = 0; i < v_size; ++i)
706  wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
707  }
708  else
709  {
710  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
711 
712  for (vcl_size_t i = 0; i < v_size; ++i)
713  wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
714  }
715 }
716 
717 template<typename NumericT>
719 {
720  typedef NumericT value_type;
721 
722  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
723  value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
724 
725  vcl_size_t A_start1 = viennacl::traits::start1(mat);
726  vcl_size_t A_start2 = viennacl::traits::start2(mat);
729  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
730  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
731  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
732  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
733 
734  vcl_size_t v_start = viennacl::traits::start(vec);
736  vcl_size_t v_size = viennacl::traits::size(vec);
737 
738  vcl_size_t row_start = 0;
739  vcl_size_t col_start = 0;
740 
741  if (k >= 0)
742  col_start = static_cast<vcl_size_t>(k);
743  else
744  row_start = static_cast<vcl_size_t>(-k);
745 
746  if (mat.row_major())
747  {
748  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
749 
750  for (vcl_size_t i = 0; i < v_size; ++i)
751  data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
752  }
753  else
754  {
755  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
756 
757  for (vcl_size_t i = 0; i < v_size; ++i)
758  data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
759  }
760 }
761 
762 template<typename NumericT>
763 void matrix_row(const matrix_base<NumericT> & mat, unsigned int i, vector_base<NumericT> & vec)
764 {
765  typedef NumericT value_type;
766 
767  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
768  value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
769 
770  vcl_size_t A_start1 = viennacl::traits::start1(mat);
771  vcl_size_t A_start2 = viennacl::traits::start2(mat);
774  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
775  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
776  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
777  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
778 
779  vcl_size_t v_start = viennacl::traits::start(vec);
781  vcl_size_t v_size = viennacl::traits::size(vec);
782 
783  if (mat.row_major())
784  {
785  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
786 
787  for (vcl_size_t j = 0; j < v_size; ++j)
788  data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
789  }
790  else
791  {
792  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
793 
794  for (vcl_size_t j = 0; j < v_size; ++j)
795  data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
796  }
797 }
798 
799 template<typename NumericT>
800 void matrix_column(const matrix_base<NumericT> & mat, unsigned int j, vector_base<NumericT> & vec)
801 {
802  typedef NumericT value_type;
803 
804  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
805  value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
806 
807  vcl_size_t A_start1 = viennacl::traits::start1(mat);
808  vcl_size_t A_start2 = viennacl::traits::start2(mat);
811  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
812  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
813  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
814  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
815 
816  vcl_size_t v_start = viennacl::traits::start(vec);
818  vcl_size_t v_size = viennacl::traits::size(vec);
819 
820  if (mat.row_major())
821  {
822  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
823 
824  for (vcl_size_t i = 0; i < v_size; ++i)
825  data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
826  }
827  else
828  {
829  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
830 
831  for (vcl_size_t i = 0; i < v_size; ++i)
832  data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
833  }
834 }
835 
836 //
838 //
839 
840 // Binary operations A = B .* C and A = B ./ C
841 
847 template<typename NumericT, typename OpT>
850 {
851  assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!"));
852 
853  typedef NumericT value_type;
855 
856  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
857  value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
858  value_type const * data_C = detail::extract_raw_pointer<value_type>(proxy.rhs());
859 
860  vcl_size_t A_start1 = viennacl::traits::start1(A);
861  vcl_size_t A_start2 = viennacl::traits::start2(A);
864  vcl_size_t A_size1 = viennacl::traits::size1(A);
865  vcl_size_t A_size2 = viennacl::traits::size2(A);
866  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
867  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
868 
869  vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs());
870  vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs());
871  vcl_size_t B_inc1 = viennacl::traits::stride1(proxy.lhs());
872  vcl_size_t B_inc2 = viennacl::traits::stride2(proxy.lhs());
873  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy.lhs());
874  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy.lhs());
875 
876  vcl_size_t C_start1 = viennacl::traits::start1(proxy.rhs());
877  vcl_size_t C_start2 = viennacl::traits::start2(proxy.rhs());
878  vcl_size_t C_inc1 = viennacl::traits::stride1(proxy.rhs());
879  vcl_size_t C_inc2 = viennacl::traits::stride2(proxy.rhs());
880  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(proxy.rhs());
881  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(proxy.rhs());
882 
883  if (A.row_major())
884  {
885  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
886  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
887  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
888 
889 #ifdef VIENNACL_WITH_OPENMP
890  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
891 #endif
892  for (long row = 0; row < static_cast<long>(A_size1); ++row)
893  for (vcl_size_t col = 0; col < A_size2; ++col)
894  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col));
895  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
896  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha
897  // + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta;
898  }
899  else
900  {
901  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
902  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
903  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
904 
905 #ifdef VIENNACL_WITH_OPENMP
906  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
907 #endif
908  for (long col = 0; col < static_cast<long>(A_size2); ++col)
909  for (vcl_size_t row = 0; row < A_size1; ++row)
910  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col));
911 
912  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
913  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha
914  // + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta;
915  }
916 }
917 
918 // Unary operations
919 
920 // A = op(B)
921 template<typename NumericT, typename OpT>
924 {
925  assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!"));
926 
927  typedef NumericT value_type;
929 
930  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
931  value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
932 
933  vcl_size_t A_start1 = viennacl::traits::start1(A);
934  vcl_size_t A_start2 = viennacl::traits::start2(A);
937  vcl_size_t A_size1 = viennacl::traits::size1(A);
938  vcl_size_t A_size2 = viennacl::traits::size2(A);
939  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
940  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
941 
942  vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs());
943  vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs());
944  vcl_size_t B_inc1 = viennacl::traits::stride1(proxy.lhs());
945  vcl_size_t B_inc2 = viennacl::traits::stride2(proxy.lhs());
946  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy.lhs());
947  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy.lhs());
948 
949  if (A.row_major())
950  {
951  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
952  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
953 
954 #ifdef VIENNACL_WITH_OPENMP
955  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
956 #endif
957  for (long row = 0; row < static_cast<long>(A_size1); ++row)
958  for (vcl_size_t col = 0; col < A_size2; ++col)
959  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col));
960  }
961  else
962  {
963  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
964  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
965 
966 #ifdef VIENNACL_WITH_OPENMP
967  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
968 #endif
969  for (long col = 0; col < static_cast<long>(A_size2); ++col)
970  for (vcl_size_t row = 0; row < A_size1; ++row)
971  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col));
972  }
973 }
974 
975 
976 
977 //
979 //
980 
981 // A * x
982 
992 template<typename NumericT>
993 void prod_impl(const matrix_base<NumericT> & mat, bool trans,
994  const vector_base<NumericT> & vec,
995  vector_base<NumericT> & result)
996 {
997  typedef NumericT value_type;
998 
999  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
1000  value_type const * data_x = detail::extract_raw_pointer<value_type>(vec);
1001  value_type * data_result = detail::extract_raw_pointer<value_type>(result);
1002 
1003  vcl_size_t A_start1 = viennacl::traits::start1(mat);
1004  vcl_size_t A_start2 = viennacl::traits::start2(mat);
1005  vcl_size_t A_inc1 = viennacl::traits::stride1(mat);
1006  vcl_size_t A_inc2 = viennacl::traits::stride2(mat);
1007  vcl_size_t A_size1 = viennacl::traits::size1(mat);
1008  vcl_size_t A_size2 = viennacl::traits::size2(mat);
1009  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
1010  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
1011 
1014 
1016  vcl_size_t inc2 = viennacl::traits::stride(result);
1017 
1018  if (mat.row_major())
1019  {
1020  if (trans)
1021  {
1022  vcl_size_t thread_count = 1;
1023 #ifdef VIENNACL_WITH_OPENMP
1024  if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1025  thread_count = omp_get_max_threads();
1026 #endif
1027  std::vector<value_type> temp_array(A_size2*thread_count, 0);
1028  detail::vector_array_wrapper<value_type> wrapper_res(data_result, start2, inc2);
1029 
1030  for (vcl_size_t col = 0; col < A_size2; ++col)
1031  wrapper_res(col) = 0;
1032 
1033 #ifdef VIENNACL_WITH_OPENMP
1034  #pragma omp parallel if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1035 #endif
1036  {
1037  vcl_size_t id = 0;
1038 #ifdef VIENNACL_WITH_OPENMP
1039  if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1040  id = omp_get_thread_num();
1041  #endif
1042  vcl_size_t begin = (A_size1 * id) / thread_count;
1043  vcl_size_t end = (A_size1 * (id + 1)) / thread_count;
1044 
1045  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_mat(data_A, A_start1 + A_inc1 * begin, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1046  detail::vector_array_wrapper<value_type const> wrapper_vec(data_x, start1 + inc1 * begin, inc1);
1047 
1048  for (vcl_size_t row = 0; row < (end - begin); ++row) //run through matrix sequentially
1049  {
1050  value_type temp = wrapper_vec(row);
1051  for (vcl_size_t col = 0; col < A_size2; ++col)
1052  temp_array[A_size2 * id + col] += wrapper_mat(row , col) * temp;
1053  }
1054  }
1055  for (vcl_size_t id = 0; id < thread_count; ++id)
1056  for (vcl_size_t col = 0; col < A_size2; ++col)
1057  wrapper_res(col) += temp_array[A_size2 * id + col];
1058  }
1059 
1060  else
1061  {
1062 #ifdef VIENNACL_WITH_OPENMP
1063  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1064 #endif
1065  for (long row = 0; row < static_cast<long>(A_size1); ++row)
1066  {
1067  value_type temp = 0;
1068  for (vcl_size_t col = 0; col < A_size2; ++col)
1069  temp += data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
1070 
1071  data_result[static_cast<vcl_size_t>(row) * inc2 + start2] = temp;
1072  }
1073  }
1074  }
1075  else
1076  {
1077  if (!trans)
1078  {
1079  vcl_size_t thread_count = 1;
1080 #ifdef VIENNACL_WITH_OPENMP
1081  if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1082  thread_count = omp_get_max_threads();
1083 #endif
1084  std::vector<value_type> temp_array(A_size1*thread_count, 0);
1085  detail::vector_array_wrapper<value_type> wrapper_res(data_result, start2, inc2);
1086 
1087  for (vcl_size_t row = 0; row < A_size1; ++row)
1088  wrapper_res(row) = 0;
1089 
1090 #ifdef VIENNACL_WITH_OPENMP
1091  #pragma omp parallel if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1092 #endif
1093  {
1094  vcl_size_t id = 0;
1095 #ifdef VIENNACL_WITH_OPENMP
1096  if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1097  id = omp_get_thread_num();
1098  #endif
1099  vcl_size_t begin = (A_size2 * id) / thread_count;
1100  vcl_size_t end = (A_size2 * (id + 1)) / thread_count;
1101 
1102  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_mat(data_A, A_start1, A_start2 + A_inc2 * begin, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1103  detail::vector_array_wrapper<value_type const> wrapper_vec(data_x, start1 + inc1 * begin, inc1);
1104 
1105  for (vcl_size_t col = 0; col < (end - begin); ++col) //run through matrix sequentially
1106  {
1107  value_type temp = wrapper_vec(col);
1108  for (vcl_size_t row = 0; row < A_size1; ++row)
1109  temp_array[A_size1 * id + row] += wrapper_mat(row , col) * temp;
1110  }
1111  }
1112  for (vcl_size_t id = 0; id < thread_count; ++id)
1113  for (vcl_size_t row = 0; row < A_size1; ++row)
1114  wrapper_res(row) += temp_array[A_size1 * id + row];
1115  }
1116  else
1117  {
1118 #ifdef VIENNACL_WITH_OPENMP
1119  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1120 #endif
1121  for (long row = 0; row < static_cast<long>(A_size2); ++row)
1122  {
1123  value_type temp = 0;
1124  for (vcl_size_t col = 0; col < A_size1; ++col)
1125  temp += data_A[viennacl::column_major::mem_index(col * A_inc1 + A_start1, static_cast<vcl_size_t>(row) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
1126 
1127  data_result[static_cast<vcl_size_t>(row) * inc2 + start2] = temp;
1128  }
1129  }
1130  }
1131 }
1132 
1133 
1134 
1135 //
1137 //
1138 
1139 namespace detail
1140 {
1141  template<typename MatrixAccT1, typename MatrixAccT2, typename MatrixAccT3, typename NumericT>
1142  void prod(MatrixAccT1 & A, MatrixAccT2 & B, MatrixAccT3 & C,
1143  vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2,
1144  NumericT alpha, NumericT beta)
1145  {
1146  if (C_size1 == 0 || C_size2 == 0 || A_size2 == 0)
1147  return;
1148 
1149  static const vcl_size_t blocksize = 64;
1150 
1151  vcl_size_t num_blocks_C1 = (C_size1 - 1) / blocksize + 1;
1152  vcl_size_t num_blocks_C2 = (C_size2 - 1) / blocksize + 1;
1153  vcl_size_t num_blocks_A2 = (A_size2 - 1) / blocksize + 1;
1154 
1155  //
1156  // outer loop pair: Run over all blocks with indices (block_idx_i, block_idx_j) of the result matrix C:
1157  //
1158 #ifdef VIENNACL_WITH_OPENMP
1159  #pragma omp parallel for if ((C_size1*C_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1160 #endif
1161  for (long block_idx_i2=0; block_idx_i2<static_cast<long>(num_blocks_C1); ++block_idx_i2)
1162  {
1163  // thread-local auxiliary buffers
1164  std::vector<NumericT> buffer_A(blocksize * blocksize); // row-major
1165  std::vector<NumericT> buffer_B(blocksize * blocksize); // column-major
1166  std::vector<NumericT> buffer_C(blocksize * blocksize); // row-major
1167 
1168  vcl_size_t block_idx_i = static_cast<vcl_size_t>(block_idx_i2);
1169  for (vcl_size_t block_idx_j=0; block_idx_j<num_blocks_C2; ++block_idx_j)
1170  {
1171  // Reset block matrix:
1172  std::fill(buffer_C.begin(), buffer_C.end(), NumericT(0));
1173 
1174  vcl_size_t offset_i = block_idx_i*blocksize;
1175  vcl_size_t offset_j = block_idx_j*blocksize;
1176 
1177  // C(block_idx_i, block_idx_i) += A(block_idx_i, block_idx_k) * B(block_idx_k, block_idx_j)
1178  for (vcl_size_t block_idx_k=0; block_idx_k<num_blocks_A2; ++block_idx_k)
1179  {
1180  // flush buffers:
1181  std::fill(buffer_A.begin(), buffer_A.end(), NumericT(0));
1182  std::fill(buffer_B.begin(), buffer_B.end(), NumericT(0));
1183 
1184  vcl_size_t offset_k = block_idx_k*blocksize;
1185 
1186  // load current data:
1187  for (vcl_size_t i = offset_i; i < std::min(offset_i + blocksize, C_size1); ++i)
1188  for (vcl_size_t k = offset_k; k < std::min(offset_k + blocksize, A_size2); ++k)
1189  buffer_A[(i - offset_i) * blocksize + (k - offset_k)] = A(i, k);
1190 
1191  for (vcl_size_t j = offset_j; j < std::min(offset_j + blocksize, C_size2); ++j)
1192  for (vcl_size_t k = offset_k; k < std::min(offset_k + blocksize, A_size2); ++k)
1193  buffer_B[(k - offset_k) + (j - offset_j) * blocksize] = B(k, j);
1194 
1195  // multiply (this is the hot spot in terms of flops)
1196  for (vcl_size_t i = 0; i < blocksize; ++i)
1197  {
1198  NumericT const * ptrA = &(buffer_A[i*blocksize]);
1199  for (vcl_size_t j = 0; j < blocksize; ++j)
1200  {
1201  NumericT const * ptrB = &(buffer_B[j*blocksize]);
1202 
1203  NumericT temp = NumericT(0);
1204  for (vcl_size_t k = 0; k < blocksize; ++k)
1205  temp += ptrA[k] * ptrB[k]; // buffer_A[i*blocksize + k] * buffer_B[k + j*blocksize];
1206 
1207  buffer_C[i*blocksize + j] += temp;
1208  }
1209  }
1210  }
1211 
1212  // write result:
1213  if (beta > 0 || beta < 0)
1214  {
1215  for (vcl_size_t i = offset_i; i < std::min(offset_i + blocksize, C_size1); ++i)
1216  for (vcl_size_t j = offset_j; j < std::min(offset_j + blocksize, C_size2); ++j)
1217  C(i,j) = beta * C(i,j) + alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1218  }
1219  else
1220  {
1221  for (vcl_size_t i = offset_i; i < std::min(offset_i + blocksize, C_size1); ++i)
1222  for (vcl_size_t j = offset_j; j < std::min(offset_j + blocksize, C_size2); ++j)
1223  C(i,j) = alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1224  }
1225 
1226  } // for block j
1227  } // for block i
1228 
1229  } // prod()
1230 
1231 } // namespace detail
1232 
1238 template<typename NumericT, typename ScalarT1, typename ScalarT2 >
1239 void prod_impl(const matrix_base<NumericT> & A, bool trans_A,
1240  const matrix_base<NumericT> & B, bool trans_B,
1242  ScalarT1 alpha,
1243  ScalarT2 beta)
1244 {
1245  typedef NumericT value_type;
1246 
1247  value_type const * data_A = detail::extract_raw_pointer<value_type>(A);
1248  value_type const * data_B = detail::extract_raw_pointer<value_type>(B);
1249  value_type * data_C = detail::extract_raw_pointer<value_type>(C);
1250 
1251  vcl_size_t A_start1 = viennacl::traits::start1(A);
1252  vcl_size_t A_start2 = viennacl::traits::start2(A);
1255  vcl_size_t A_size1 = viennacl::traits::size1(A);
1256  vcl_size_t A_size2 = viennacl::traits::size2(A);
1257  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1258  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1259 
1260  vcl_size_t B_start1 = viennacl::traits::start1(B);
1261  vcl_size_t B_start2 = viennacl::traits::start2(B);
1264  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B);
1265  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B);
1266 
1267  vcl_size_t C_start1 = viennacl::traits::start1(C);
1268  vcl_size_t C_start2 = viennacl::traits::start2(C);
1271  vcl_size_t C_size1 = viennacl::traits::size1(C);
1272  vcl_size_t C_size2 = viennacl::traits::size2(C);
1273  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C);
1274  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C);
1275 
1276  if (!trans_A && !trans_B)
1277  {
1278  if (A.row_major() && B.row_major() && C.row_major())
1279  {
1280  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1281  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1282  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1283 
1284  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1285  }
1286  else if (A.row_major() && B.row_major() && !C.row_major())
1287  {
1288  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1289  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1290  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1291 
1292  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1293  }
1294  else if (A.row_major() && !B.row_major() && C.row_major())
1295  {
1296  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1297  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1298  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1299 
1300  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1301  }
1302  else if (A.row_major() && !B.row_major() && !C.row_major())
1303  {
1304  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1305  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1306  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1307 
1308  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1309  }
1310  else if (!A.row_major() && B.row_major() && C.row_major())
1311  {
1312  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1313  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1314  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1315 
1316  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1317  }
1318  else if (!A.row_major() && B.row_major() && !C.row_major())
1319  {
1320  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1321  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1322  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1323 
1324  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1325  }
1326  else if (!A.row_major() && !B.row_major() && C.row_major())
1327  {
1328  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1329  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1330  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1331 
1332  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1333  }
1334  else
1335  {
1336  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1337  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1338  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1339 
1340  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1341  }
1342  }
1343  else if (!trans_A && trans_B)
1344  {
1345  if (A.row_major() && B.row_major() && C.row_major())
1346  {
1347  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1348  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1349  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1350 
1351  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1352  }
1353  else if (A.row_major() && B.row_major() && !C.row_major())
1354  {
1355  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1356  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1357  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1358 
1359  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1360  }
1361  else if (A.row_major() && !B.row_major() && C.row_major())
1362  {
1363  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1364  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1365  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1366 
1367  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1368  }
1369  else if (A.row_major() && !B.row_major() && !C.row_major())
1370  {
1371  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1372  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1373  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1374 
1375  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1376  }
1377  else if (!A.row_major() && B.row_major() && C.row_major())
1378  {
1379  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1380  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1381  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1382 
1383  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1384  }
1385  else if (!A.row_major() && B.row_major() && !C.row_major())
1386  {
1387  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1388  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1389  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1390 
1391  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1392  }
1393  else if (!A.row_major() && !B.row_major() && C.row_major())
1394  {
1395  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1396  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1397  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1398 
1399  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1400  }
1401  else
1402  {
1403  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1404  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1405  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1406 
1407  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1408  }
1409  }
1410  else if (trans_A && !trans_B)
1411  {
1412  if (A.row_major() && B.row_major() && C.row_major())
1413  {
1414  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1415  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1416  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1417 
1418  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1419  }
1420  else if (A.row_major() && B.row_major() && !C.row_major())
1421  {
1422  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1423  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1424  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1425 
1426  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1427  }
1428  else if (A.row_major() && !B.row_major() && C.row_major())
1429  {
1430  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1431  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1432  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1433 
1434  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1435  }
1436  else if (A.row_major() && !B.row_major() && !C.row_major())
1437  {
1438  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1439  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1440  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1441 
1442  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1443  }
1444  else if (!A.row_major() && B.row_major() && C.row_major())
1445  {
1446  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1447  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1448  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1449 
1450  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1451  }
1452  else if (!A.row_major() && B.row_major() && !C.row_major())
1453  {
1454  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1455  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1456  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1457 
1458  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1459  }
1460  else if (!A.row_major() && !B.row_major() && C.row_major())
1461  {
1462  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1463  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1464  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1465 
1466  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1467  }
1468  else
1469  {
1470  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1471  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1472  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1473 
1474  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1475  }
1476  }
1477  else if (trans_A && trans_B)
1478  {
1479  if (A.row_major() && B.row_major() && C.row_major())
1480  {
1481  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1482  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1483  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1484 
1485  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1486  }
1487  else if (A.row_major() && B.row_major() && !C.row_major())
1488  {
1489  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1490  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1491  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1492 
1493  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1494  }
1495  else if (A.row_major() && !B.row_major() && C.row_major())
1496  {
1497  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1498  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1499  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1500 
1501  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1502  }
1503  else if (A.row_major() && !B.row_major() && !C.row_major())
1504  {
1505  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1506  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1507  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1508 
1509  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1510  }
1511  else if (!A.row_major() && B.row_major() && C.row_major())
1512  {
1513  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1514  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1515  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1516 
1517  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1518  }
1519  else if (!A.row_major() && B.row_major() && !C.row_major())
1520  {
1521  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1522  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1523  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1524 
1525  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1526  }
1527  else if (!A.row_major() && !B.row_major() && C.row_major())
1528  {
1529  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1530  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1531  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1532 
1533  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1534  }
1535  else
1536  {
1537  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1538  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1539  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1540 
1541  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1542  }
1543  }
1544 }
1545 
1546 
1547 
1548 
1549 //
1551 //
1552 
1553 
1565 template<typename NumericT, typename ScalarT>
1567  ScalarT const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
1568  const vector_base<NumericT> & vec1,
1569  const vector_base<NumericT> & vec2)
1570 {
1571  typedef NumericT value_type;
1572 
1573  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
1574  value_type const * data_v1 = detail::extract_raw_pointer<value_type>(vec1);
1575  value_type const * data_v2 = detail::extract_raw_pointer<value_type>(vec2);
1576 
1577  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
1578  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
1579  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
1580  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
1581  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
1582  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
1583  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
1584  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
1585 
1587  vcl_size_t inc1 = viennacl::traits::stride(vec1);
1588 
1590  vcl_size_t inc2 = viennacl::traits::stride(vec2);
1591 
1592  value_type data_alpha = alpha;
1593  if (flip_sign_alpha)
1594  data_alpha = -data_alpha;
1595 
1596  if (mat1.row_major())
1597  {
1598  if(reciprocal_alpha)
1599  {
1600 #ifdef VIENNACL_WITH_OPENMP
1601  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1602 #endif
1603  for (long row = 0; row < static_cast<long>(A_size1); ++row)
1604  {
1605  value_type value_v1 = data_v1[static_cast<vcl_size_t>(row) * inc1 + start1] / data_alpha;
1606  for (vcl_size_t col = 0; col < A_size2; ++col)
1607  data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2];
1608  }
1609  }
1610  else
1611  {
1612 #ifdef VIENNACL_WITH_OPENMP
1613  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1614 #endif
1615  for (long row = 0; row < static_cast<long>(A_size1); ++row)
1616  {
1617  value_type value_v1 = data_v1[static_cast<vcl_size_t>(row) * inc1 + start1] * data_alpha;
1618  for (vcl_size_t col = 0; col < A_size2; ++col)
1619  data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2];
1620  }
1621  }
1622  }
1623  else
1624  {
1625  if(reciprocal_alpha)
1626  {
1627 #ifdef VIENNACL_WITH_OPENMP
1628  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1629 #endif
1630  for (long col = 0; col < static_cast<long>(A_size2); ++col) //run through matrix sequentially
1631  {
1632  value_type value_v2 = data_v2[static_cast<vcl_size_t>(col) * inc2 + start2] / data_alpha;
1633  for (vcl_size_t row = 0; row < A_size1; ++row)
1634  data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, static_cast<vcl_size_t>(col) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[row * inc1 + start1] * value_v2;
1635  }
1636  }
1637  else
1638  {
1639 #ifdef VIENNACL_WITH_OPENMP
1640  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1641 #endif
1642  for (long col = 0; col < static_cast<long>(A_size2); ++col) //run through matrix sequentially
1643  {
1644  value_type value_v2 = data_v2[static_cast<vcl_size_t>(col) * inc2 + start2] * data_alpha;
1645  for (vcl_size_t row = 0; row < A_size1; ++row)
1646  data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, static_cast<vcl_size_t>(col) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[row * inc1 + start1] * value_v2;
1647  }
1648  }
1649  }
1650 }
1651 
1652 
1660 template <typename NumericT, typename S1>
1662  vector_base<S1> & D,
1663  vector_base<S1> & S
1664  )
1665 
1666  {
1667  typedef NumericT value_type;
1668 
1669  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1670  value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1671  value_type * data_S = detail::extract_raw_pointer<value_type>(S);
1672 
1673  vcl_size_t A_start1 = viennacl::traits::start1(A);
1674  vcl_size_t A_start2 = viennacl::traits::start2(A);
1677  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1678  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1679 
1683 
1687 
1688  vcl_size_t size = std::min(size1, size2);
1689  if (A.row_major())
1690  {
1691 #ifdef VIENNACL_WITH_OPENMP
1692  #pragma omp parallel for if ((size1*size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1693 #endif
1694  for(long i2 = 0; i2 < long(size) - 1; i2++)
1695  {
1696  vcl_size_t i = vcl_size_t(i2);
1697  data_D[start1 + inc1 * i] = data_A[viennacl::row_major::mem_index(i * A_inc1 + A_start1, i * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1698  data_S[start2 + inc2 * (i + 1)] = data_A[viennacl::row_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1699  }
1700  data_D[start1 + inc1 * (size-1)] = data_A[viennacl::row_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1701 
1702  }
1703  else
1704  {
1705 #ifdef VIENNACL_WITH_OPENMP
1706  #pragma omp parallel for if ((size1*size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1707 #endif
1708  for(long i2 = 0; i2 < long(size) - 1; i2++)
1709  {
1710  vcl_size_t i = vcl_size_t(i2);
1711  data_D[start1 + inc1 * i] = data_A[viennacl::column_major::mem_index(i * A_inc1 + A_start1, i * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1712  data_S[start2 + inc2 * (i + 1)] = data_A[viennacl::column_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1713  }
1714  data_D[start1 + inc1 * (size-1)] = data_A[viennacl::column_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1715  }
1716 
1717  }
1718 
1719 
1720 
1721  template <typename NumericT, typename VectorType>
1723  VectorType & dh,
1724  VectorType & sh
1725  )
1726  {
1727 
1729 
1730  }
1731 
1738  template <typename NumericT>
1741  vcl_size_t start)
1742  {
1743  typedef NumericT value_type;
1744  NumericT ss = 0;
1745  vcl_size_t row_start = start + 1;
1746 
1747  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1748  value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1749 
1750  vcl_size_t A_start1 = viennacl::traits::start1(A);
1751  vcl_size_t A_start2 = viennacl::traits::start2(A);
1754  vcl_size_t A_size1 = viennacl::traits::size1(A);
1755  vcl_size_t A_size2 = viennacl::traits::size2(A);
1756  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1757  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1758 
1761 
1762  if (A.row_major())
1763  {
1764  for(vcl_size_t i = 0; i < A_size2; i++)
1765  {
1766  ss = 0;
1767  for(vcl_size_t j = row_start; j < A_size1; j++)
1768  ss = ss + data_D[start1 + inc1 * j] * data_A[viennacl::row_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1769 #ifdef VIENNACL_WITH_OPENMP
1770  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1771 #endif
1772  for(long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1773  data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1774  data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] -
1775  (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)]* ss);
1776  }
1777  }
1778  else
1779  {
1780  for(vcl_size_t i = 0; i < A_size2; i++)
1781  {
1782  ss = 0;
1783  for(vcl_size_t j = row_start; j < A_size1; j++)
1784  ss = ss + data_D[start1 + inc1 * j] * data_A[viennacl::column_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1785 #ifdef VIENNACL_WITH_OPENMP
1786  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1787 #endif
1788  for(long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1789  data_A[viennacl::column_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]=
1790  data_A[viennacl::column_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] -
1791  (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)]* ss);
1792  }
1793  }
1794 
1795  }
1796 
1803  template <typename NumericT>
1806  {
1807  typedef NumericT value_type;
1808  NumericT ss = 0;
1809 
1810  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1811  value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1812 
1813  vcl_size_t A_start1 = viennacl::traits::start1(A);
1814  vcl_size_t A_start2 = viennacl::traits::start2(A);
1817  vcl_size_t A_size1 = viennacl::traits::size1(A);
1818  vcl_size_t A_size2 = viennacl::traits::size2(A);
1819  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1820  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1821 
1824 
1825  if (A.row_major())
1826  {
1827  for(vcl_size_t i = 0; i < A_size1; i++)
1828  {
1829  ss = 0;
1830  for(vcl_size_t j = 0; j < A_size2; j++) // ss = ss + D[j] * A(i, j)
1831  ss = ss + (data_D[start1 + inc1 * j] * data_A[viennacl::row_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1832 
1833  NumericT sum_Av = ss;
1834 #ifdef VIENNACL_WITH_OPENMP
1835  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1836 #endif
1837  for(long j = 0; j < static_cast<long>(A_size2); j++) // A(i, j) = A(i, j) - 2 * D[j] * sum_Av
1838  data_A[viennacl::row_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1839  data_A[viennacl::row_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)] * sum_Av);
1840  }
1841  }
1842  else
1843  {
1844  for(vcl_size_t i = 0; i < A_size1; i++)
1845  {
1846  ss = 0;
1847  for(vcl_size_t j = 0; j < A_size2; j++) // ss = ss + D[j] * A(i, j)
1848  ss = ss + (data_D[start1 + inc1 * j] * data_A[viennacl::column_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1849 
1850  NumericT sum_Av = ss;
1851 #ifdef VIENNACL_WITH_OPENMP
1852  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1853 #endif
1854  for(long j = 0; j < static_cast<long>(A_size2); j++) // A(i, j) = A(i, j) - 2 * D[j] * sum_Av
1855  data_A[viennacl::column_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1856  data_A[viennacl::column_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)] * sum_Av);
1857  }
1858  }
1859 
1860 
1861  }
1862 
1869  template <typename NumericT>
1872  vcl_size_t A_size1)
1873 
1874  {
1875  NumericT beta = 2;
1877  viennacl::matrix<NumericT> Q_temp = Q;
1878  viennacl::vector<NumericT> vcl_D = D;
1879 
1880 
1881  viennacl::linalg::host_based::scaled_rank_1_update(vcl_P, beta, 1, 0, 1, vcl_D, vcl_D);
1882  Q = viennacl::linalg::prod(Q_temp, vcl_P);
1883 
1884  }
1885 
1895  template<typename NumericT>
1897  vector_base<NumericT> & tmp1,
1898  vector_base<NumericT> & tmp2,
1899  int l,
1900  int m
1901  )
1902  {
1903  typedef NumericT value_type;
1904 
1905  value_type * data_Q = detail::extract_raw_pointer<value_type>(Q);
1906  value_type * data_tmp1 = detail::extract_raw_pointer<value_type>(tmp1);
1907  value_type * data_tmp2 = detail::extract_raw_pointer<value_type>(tmp2);
1908 
1909  vcl_size_t Q_start1 = viennacl::traits::start1(Q);
1910  vcl_size_t Q_start2 = viennacl::traits::start2(Q);
1913  vcl_size_t Q_size1 = viennacl::traits::size1(Q);
1914  vcl_size_t Q_internal_size1 = viennacl::traits::internal_size1(Q);
1915  vcl_size_t Q_internal_size2 = viennacl::traits::internal_size2(Q);
1916 
1918  vcl_size_t inc1 = viennacl::traits::stride(tmp1);
1919 
1921  vcl_size_t inc2 = viennacl::traits::stride(tmp2);
1922 
1923  if (Q.row_major())
1924  {
1925  for( int i = m - 1; i >= l; i--)
1926  {
1927 #ifdef VIENNACL_WITH_OPENMP
1928  #pragma omp parallel for if ((Q_size1*Q_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1929 #endif
1930  for(long k = 0; k < static_cast<long>(Q_size1); k++)
1931  {
1932 
1933  // h = data_Q(k, i+1);
1934  NumericT h = data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)];
1935 
1936  // Q(k, i+1) = tmp2[i] * Q(k, i) + tmp1[i]*h;
1937  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp2[start2 + inc2 * vcl_size_t(i)] *
1938  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] + data_tmp1[start1 + inc1 * vcl_size_t(i)] * h;
1939 
1940  // Q(k, i) = tmp1[i] * Q(k, i) - tmp2[i]*h;
1941  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp1[start1 + inc1 * vcl_size_t(i)] *
1942  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] - data_tmp2[start2 + inc2 * vcl_size_t(i)]*h;
1943  }
1944  }
1945  }
1946  else // column_major
1947  {
1948  for( int i = m - 1; i >= l; i--)
1949  {
1950 #ifdef VIENNACL_WITH_OPENMP
1951  #pragma omp parallel for if ((Q_size1*Q_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
1952 #endif
1953  for(long k = 0; k < static_cast<long>(Q_size1); k++)
1954  {
1955 
1956  // h = data_Q(k, i+1);
1957  NumericT h = data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)];
1958 
1959  // Q(k, i+1) = tmp2[i] * Q(k, i) + tmp1[i]*h;
1960  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp2[start2 + inc2 * vcl_size_t(i)] *
1961  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] + data_tmp1[start1 + inc1 * vcl_size_t(i)] * h;
1962 
1963  // Q(k, i) = tmp1[i] * Q(k, i) - tmp2[i]*h;
1964  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp1[start1 + inc1 * vcl_size_t(i)] *
1965  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] - data_tmp2[start2 + inc2 * vcl_size_t(i)]*h;
1966  }
1967  }
1968  }
1969 
1970  }
1971 
1972 
1982 template <typename NumericT, typename S1>
1984  vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
1985 {
1986  typedef NumericT value_type;
1987 
1988  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1989  value_type * data_V = detail::extract_raw_pointer<value_type>(V);
1990 
1991  vcl_size_t A_start1 = viennacl::traits::start1(A);
1992  vcl_size_t A_start2 = viennacl::traits::start2(A);
1995  vcl_size_t A_size1 = viennacl::traits::size1(A);
1996  vcl_size_t A_size2 = viennacl::traits::size1(A);
1997  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1998  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1999 
2000  if(copy_col)
2001  {
2002  if (A.row_major())
2003  {
2004 #ifdef VIENNACL_WITH_OPENMP
2005  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
2006 #endif
2007  for(long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
2008  {
2009  data_V[i - static_cast<long>(row_start)] = data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
2010  }
2011  }
2012  else
2013  {
2014 #ifdef VIENNACL_WITH_OPENMP
2015  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
2016 #endif
2017  for(long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
2018  {
2019  data_V[i - static_cast<long>(row_start)] = data_A[viennacl::column_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
2020  }
2021  }
2022  }
2023  else
2024  {
2025  if (A.row_major())
2026  {
2027 #ifdef VIENNACL_WITH_OPENMP
2028  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
2029 #endif
2030  for(long i = static_cast<long>(col_start); i < static_cast<long>(A_size2); i++)
2031  {
2032  data_V[i - static_cast<long>(col_start)] = data_A[viennacl::row_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
2033  }
2034  }
2035  else
2036  {
2037 #ifdef VIENNACL_WITH_OPENMP
2038  #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE)
2039 #endif
2040  for(long i = static_cast<long>(col_start); i < static_cast<long>(A_size2); i++)
2041  {
2042  data_V[i - static_cast<long>(col_start)] = data_A[viennacl::column_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
2043  }
2044  }
2045  }
2046 }
2047 
2048 } // namespace host_based
2049 } //namespace linalg
2050 } //namespace viennacl
2051 
2052 #endif
void fill(MatrixType &matrix, vcl_size_t row_index, vcl_size_t col_index, NumericT value)
Generic filler routine for setting an entry of a matrix to a particular value.
Definition: fill.hpp:46
Helper class for accessing a strided subvector of a larger vector.
Definition: common.hpp:50
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:314
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
void bidiag_pack_impl(matrix_base< NumericT > &A, vector_base< S1 > &D, vector_base< S1 > &S)
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
void matrix_diag_to_vector(const matrix_base< NumericT > &mat, int k, vector_base< NumericT > &vec)
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:386
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
Worker class for decomposing expression templates.
Definition: op_applier.hpp:43
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:394
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:341
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:43
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
Determines row and column increments for matrices and matrix proxies.
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: matrix_def.hpp:69
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
float NumericT
Definition: bisect.cpp:40
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
Helper array for accessing a strided submatrix embedded in a larger matrix.
Definition: common.hpp:73
void copy_vec(matrix_base< NumericT > &A, vector_base< S1 > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
This function copies a row or a column from a matrix to a vector.
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
#define VIENNACL_OPENMP_MATRIX_MIN_SIZE
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
std::size_t vcl_size_t
Definition: forwards.h:75
Common routines for single-threaded or OpenMP-enabled execution on CPU.
Proxy classes for vectors.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
void scaled_rank_1_update(matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void matrix_row(const matrix_base< NumericT > &mat, unsigned int i, vector_base< NumericT > &vec)
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:248
void givens_next(matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
This function updates the matrix Q. It is part of the tql2 algorithm.
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
A tag class representing transposed matrices.
Definition: forwards.h:220
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:130
void prod(MatrixAccT1 &A, MatrixAccT2 &B, MatrixAccT3 &C, vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2, NumericT alpha, NumericT beta)
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:331
T min(const T &lhs, const T &rhs)
Minimum.
Definition: util.hpp:45
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:134
Implementation of the ViennaCL scalar class.
A collection of compile time type deductions.
Simple enable-if variant that uses the SFINAE pattern.