ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 
27 #include "viennacl/ocl/device.hpp"
28 #include "viennacl/ocl/handle.hpp"
29 #include "viennacl/ocl/kernel.hpp"
30 #include "viennacl/scalar.hpp"
31 #include "viennacl/vector.hpp"
33 #include "viennacl/tools/tools.hpp"
37 
38 #include "viennacl/traits/size.hpp"
42 
48 
49 namespace viennacl
50 {
51 namespace linalg
52 {
53 namespace opencl
54 {
55 
56 namespace detail
57 {
58 
59  template<typename NumericT>
60  viennacl::ocl::kernel & kernel_for_matrix(matrix_base<NumericT> const & M, std::string const & kernel_name)
61  {
62  viennacl::ocl::context & ctx = traits::opencl_context(M);
63  viennacl::ocl::program * program;
64  if (M.row_major())
65  {
67  KernelClass::init(ctx);
68  program = &ctx.get_program(KernelClass::program_name());
69  }
70  else
71  {
73  KernelClass::init(ctx);
74  program = &ctx.get_program(KernelClass::program_name());
75  }
76  return program->get_kernel(kernel_name);
77  }
78 
79  template<typename NumericT>
80  viennacl::ocl::kernel & element_kernel_for_matrix(matrix_base<NumericT> const & M, std::string const & kernel_name)
81  {
82  viennacl::ocl::context & ctx = traits::opencl_context(M);
83  viennacl::ocl::program * program;
84  if (M.row_major())
85  {
87  KernelClass::init(ctx);
88  program = &ctx.get_program(KernelClass::program_name());
89  }
90  else
91  {
93  KernelClass::init(ctx);
94  program = &ctx.get_program(KernelClass::program_name());
95  }
96  return program->get_kernel(kernel_name);
97  }
98 
99  template<typename NumericT>
100  viennacl::ocl::kernel & legacy_kernel_for_matrix(matrix_base<NumericT> const & M, std::string const & kernel_name)
101  {
102  viennacl::ocl::context & ctx = traits::opencl_context(M);
103  viennacl::ocl::program * program;
104  if (M.row_major())
105  {
107  KernelClass::init(ctx);
108  program = &ctx.get_program(KernelClass::program_name());
109  }
110  else
111  {
113  KernelClass::init(ctx);
114  program = &ctx.get_program(KernelClass::program_name());
115  }
116  return program->get_kernel(kernel_name);
117  }
118 
119 }
120 
121 //
122 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
123 //
124 
125 const std::string SVD_BIDIAG_PACK_KERNEL = "bidiag_pack";
126 const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL = "house_update_A_left";
127 const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL = "house_update_A_right";
128 const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL = "house_update_QL";
129 const std::string SVD_GIVENS_NEXT_KERNEL = "givens_next";
130 const std::string SVD_COPY_COL_KERNEL = "copy_col";
131 const std::string SVD_COPY_ROW_KERNEL = "copy_row";
132 
133 template<typename DestNumericT, typename SrcNumericT>
135 {
136  assert(dest.row_major() == src.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
137 
138  assert(viennacl::traits::opencl_handle(dest).context() == viennacl::traits::opencl_handle(src).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
139 
140  std::string kernel_name("convert_");
141  kernel_name += dest.row_major() ? "row_" : "col_";
143  kernel_name += "_";
145 
146  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(dest).context());
149 
150  viennacl::ocl::enqueue(k( dest, cl_uint(dest.start1()), cl_uint(dest.stride1()), cl_uint(dest.size1()), cl_uint(dest.internal_size1()), cl_uint(dest.start2()), cl_uint(dest.stride2()), cl_uint(dest.size2()), cl_uint(dest.internal_size2()),
151  src, cl_uint( src.start1()), cl_uint( src.stride1()), cl_uint( src.size1()), cl_uint( src.internal_size1()), cl_uint( src.start2()), cl_uint( src.stride2()), cl_uint( src.size2()), cl_uint( src.internal_size2())
152  ) );
153 }
154 
155 //
156 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
157 //
158 
159 template <typename NumericT,
160  typename ScalarT1>
162  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
163 {
165 
166  cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
167 
168  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat1),
169  cl_uint(viennacl::traits::start1(mat1)), cl_uint(viennacl::traits::start2(mat1)),
170  cl_uint(viennacl::traits::stride1(mat1)), cl_uint(viennacl::traits::stride2(mat1)),
171  cl_uint(viennacl::traits::size1(mat1)), cl_uint(viennacl::traits::size2(mat1)),
173 
174  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
175  options_alpha,
176  viennacl::traits::opencl_handle(mat2),
177  cl_uint(viennacl::traits::start1(mat2)), cl_uint(viennacl::traits::start2(mat2)),
178  cl_uint(viennacl::traits::stride1(mat2)), cl_uint(viennacl::traits::stride2(mat2)),
180  )
181  );
182 }
183 
184 
185 template <typename NumericT,
186  typename ScalarT1, typename ScalarT2>
188  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
189  matrix_base<NumericT> const & mat3, ScalarT2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
190 {
191  std::string kernel_name;
193  kernel_name = "ambm_cpu_cpu";
195  kernel_name = "ambm_cpu_gpu";
197  kernel_name = "ambm_gpu_cpu";
198  else
199  kernel_name = "ambm_gpu_gpu";
200 
201  viennacl::ocl::kernel & k = detail::kernel_for_matrix(mat1, kernel_name);
202 
203  cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
204  cl_uint options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta);
205 
206  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat1),
207  cl_uint(viennacl::traits::start1(mat1)), cl_uint(viennacl::traits::start2(mat1)),
208  cl_uint(viennacl::traits::stride1(mat1)), cl_uint(viennacl::traits::stride2(mat1)),
209  cl_uint(viennacl::traits::size1(mat1)), cl_uint(viennacl::traits::size2(mat1)),
211 
212  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
213  options_alpha,
214  viennacl::traits::opencl_handle(mat2),
215  cl_uint(viennacl::traits::start1(mat2)), cl_uint(viennacl::traits::start2(mat2)),
216  cl_uint(viennacl::traits::stride1(mat2)), cl_uint(viennacl::traits::stride2(mat2)),
218 
219  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(beta)),
220  options_beta,
221  viennacl::traits::opencl_handle(mat3),
222  cl_uint(viennacl::traits::start1(mat3)), cl_uint(viennacl::traits::start2(mat3)),
223  cl_uint(viennacl::traits::stride1(mat3)), cl_uint(viennacl::traits::stride2(mat3)),
225  )
226  );
227 }
228 
229 
230 template <typename NumericT,
231  typename ScalarT1, typename ScalarT2>
233  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
234  matrix_base<NumericT> const & mat3, ScalarT2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
235 {
236  std::string kernel_name;
238  kernel_name = "ambm_m_cpu_cpu";
240  kernel_name = "ambm_m_cpu_gpu";
242  kernel_name = "ambm_m_gpu_cpu";
243  else
244  kernel_name = "ambm_m_gpu_gpu";
245 
246  viennacl::ocl::kernel & k = detail::kernel_for_matrix(mat1, kernel_name);
247 
248  cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
249  cl_uint options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta);
250 
251  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat1),
252  cl_uint(viennacl::traits::start1(mat1)), cl_uint(viennacl::traits::start2(mat1)),
253  cl_uint(viennacl::traits::stride1(mat1)), cl_uint(viennacl::traits::stride2(mat1)),
254  cl_uint(viennacl::traits::size1(mat1)), cl_uint(viennacl::traits::size2(mat1)),
256 
257  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
258  options_alpha,
259  viennacl::traits::opencl_handle(mat2),
260  cl_uint(viennacl::traits::start1(mat2)), cl_uint(viennacl::traits::start2(mat2)),
261  cl_uint(viennacl::traits::stride1(mat2)), cl_uint(viennacl::traits::stride2(mat2)),
263 
264  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(beta)),
265  options_beta,
266  viennacl::traits::opencl_handle(mat3),
267  cl_uint(viennacl::traits::start1(mat3)), cl_uint(viennacl::traits::start2(mat3)),
268  cl_uint(viennacl::traits::stride1(mat3)), cl_uint(viennacl::traits::stride2(mat3)),
270  )
271  );
272 }
273 
274 template<typename NumericT,
275  typename SizeT, typename DistanceT>
277  matrix_base<NumericT> & temp_trans)
278 {
279  std::string kernel_name("trans_kernel");
280  viennacl::ocl::kernel& kernel = detail::legacy_kernel_for_matrix(proxy.lhs(),kernel_name);
281  viennacl::ocl::enqueue(kernel(proxy.lhs(),
282  static_cast<cl_uint>(proxy.lhs().start1()), static_cast<cl_uint>(proxy.lhs().start2()),
283  static_cast<cl_uint>(proxy.lhs().internal_size1()), static_cast<cl_uint>(proxy.lhs().internal_size2()),
284  static_cast<cl_uint>(proxy.lhs().size1()), static_cast<cl_uint>(proxy.lhs().size2()),
285  static_cast<cl_uint>(proxy.lhs().stride1()), static_cast<cl_uint>(proxy.lhs().stride2()),
286 
287  temp_trans,
288  static_cast<cl_uint>(temp_trans.start1()), static_cast<cl_uint>(temp_trans.start2()),
289  static_cast<cl_uint>(temp_trans.internal_size1()), static_cast<cl_uint>(temp_trans.internal_size2()),
290  static_cast<cl_uint>(temp_trans.stride1()), static_cast<cl_uint>(temp_trans.stride2())));
291 }
292 
293 template <typename NumericT>
294 void matrix_assign(matrix_base<NumericT> & mat, NumericT s, bool clear = false)
295 {
296  cl_uint s1 = clear ? cl_uint(viennacl::traits::internal_size1(mat)) : cl_uint(viennacl::traits::size1(mat));
297  cl_uint s2 = clear ? cl_uint(viennacl::traits::internal_size2(mat)) : cl_uint(viennacl::traits::size2(mat));
298 
299  viennacl::ocl::kernel & k = detail::kernel_for_matrix(mat, "assign_cpu");
300  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat),
301  cl_uint(viennacl::traits::start1(mat)), cl_uint(viennacl::traits::start2(mat)),
302  cl_uint(viennacl::traits::stride1(mat)), cl_uint(viennacl::traits::stride2(mat)),
303  s1, s2,
305  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(s))
306  )
307  );
308 }
309 
310 template <typename NumericT>
312 {
313  viennacl::ocl::kernel & k = detail::kernel_for_matrix(mat, "diagonal_assign_cpu");
314  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat),
315  cl_uint(viennacl::traits::start1(mat)), cl_uint(viennacl::traits::start2(mat)),
316  cl_uint(viennacl::traits::stride1(mat)), cl_uint(viennacl::traits::stride2(mat)),
317  cl_uint(viennacl::traits::size1(mat)), cl_uint(viennacl::traits::size2(mat)),
319  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(s))
320  )
321  );
322 }
323 
324 template <typename NumericT>
326 {
327  // Step 1: set everything to zero
328  matrix_assign(mat, NumericT(0));
329 
330  // Step 2: set the diagonal:
331 
332  // reuse vector ambm kernel for assigning the elements:
333  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
335  KernelClass::init(ctx);
336 
337  cl_uint options_alpha = 0;
339  if (mat.row_major())
340  {
341  vcl_size_t first_row_index = 0;
342  vcl_size_t first_col_index = 0;
343  if (k < 0)
344  first_row_index = vcl_size_t(-k);
345  else
346  first_col_index = vcl_size_t(k);
347  size_mat.start = cl_uint( (viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat)) * viennacl::traits::internal_size2(mat)
348  + viennacl::traits::start2(mat) + first_col_index * viennacl::traits::stride2(mat));
350  size_mat.size = cl_uint(viennacl::traits::size(vec));
351  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
352  }
353  else
354  {
355  vcl_size_t first_row_index = 0;
356  vcl_size_t first_col_index = 0;
357  if (k < 0)
358  first_row_index = vcl_size_t(-k);
359  else
360  first_col_index = vcl_size_t(k);
361  size_mat.start = cl_uint( viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat)
364  size_mat.size = cl_uint(viennacl::traits::size(vec));
365  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
366  }
367 
369  size_vec.start = cl_uint(viennacl::traits::start(vec));
370  size_vec.stride = cl_uint(viennacl::traits::stride(vec));
371  size_vec.size = cl_uint(viennacl::traits::size(vec));
372  size_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
373 
374  viennacl::ocl::kernel & kern = ctx.get_kernel(KernelClass::program_name(), "av_cpu");
375  viennacl::ocl::enqueue(kern(viennacl::traits::opencl_handle(mat),
376  size_mat,
377 
378  viennacl::traits::opencl_handle(NumericT(1)),
379  options_alpha,
380  viennacl::traits::opencl_handle(vec),
381  size_vec)
382  );
383 }
384 
385 template <typename NumericT>
387 {
388  // reuse vector ambm kernel for assigning the elements:
389  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
391  KernelClass::init(ctx);
392 
393  cl_uint options_alpha = 0;
395  if (mat.row_major())
396  {
397  vcl_size_t first_row_index = 0;
398  vcl_size_t first_col_index = 0;
399  if (k < 0)
400  first_row_index = vcl_size_t(-k);
401  else
402  first_col_index = vcl_size_t(k);
403  size_mat.start = cl_uint( (viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat)) * viennacl::traits::internal_size2(mat)
404  + viennacl::traits::start2(mat) + first_col_index * viennacl::traits::stride2(mat));
406  size_mat.size = cl_uint(viennacl::traits::size(vec));
407  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
408  }
409  else
410  {
411  vcl_size_t first_row_index = 0;
412  vcl_size_t first_col_index = 0;
413  if (k < 0)
414  first_row_index = vcl_size_t(-k);
415  else
416  first_col_index = vcl_size_t(k);
417  size_mat.start = cl_uint( viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat)
420  size_mat.size = cl_uint(viennacl::traits::size(vec));
421  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
422  }
423 
425  size_vec.start = cl_uint(viennacl::traits::start(vec));
426  size_vec.stride = cl_uint(viennacl::traits::stride(vec));
427  size_vec.size = cl_uint(viennacl::traits::size(vec));
428  size_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
429 
430 
431  viennacl::ocl::kernel & kern = ctx.get_kernel(KernelClass::program_name(), "av_cpu");
432  viennacl::ocl::enqueue(kern(viennacl::traits::opencl_handle(vec),
433  size_vec,
434 
435  viennacl::traits::opencl_handle(NumericT(1)),
436  options_alpha,
437  viennacl::traits::opencl_handle(mat),
438  size_mat)
439  );
440 }
441 
442 template <typename NumericT>
443 void matrix_row(matrix_base<NumericT> const & mat, unsigned int i, vector_base<NumericT> & vec)
444 {
445  // reuse vector ambm kernel for assigning the elements:
446  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
448  KernelClass::init(ctx);
449 
450  cl_uint options_alpha = 0;
452  if (mat.row_major())
453  {
455  size_mat.stride = cl_uint(viennacl::traits::stride2(mat));
456  size_mat.size = cl_uint(viennacl::traits::size(vec));
457  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
458  }
459  else
460  {
462  size_mat.stride = cl_uint(viennacl::traits::stride2(mat) * viennacl::traits::internal_size1(mat));
463  size_mat.size = cl_uint(viennacl::traits::size(vec));
464  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
465  }
466 
468  size_vec.start = cl_uint(viennacl::traits::start(vec));
469  size_vec.stride = cl_uint(viennacl::traits::stride(vec));
470  size_vec.size = cl_uint(viennacl::traits::size(vec));
471  size_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
472 
473 
474  viennacl::ocl::kernel & kern = ctx.get_kernel(KernelClass::program_name(), "av_cpu");
475  viennacl::ocl::enqueue(kern(viennacl::traits::opencl_handle(vec),
476  size_vec,
477 
478  viennacl::traits::opencl_handle(NumericT(1)),
479  options_alpha,
480  viennacl::traits::opencl_handle(mat),
481  size_mat)
482  );
483 }
484 
485 template <typename NumericT>
486 void matrix_column(const matrix_base<NumericT> & mat, unsigned int j, vector_base<NumericT> & vec)
487 {
488  // reuse vector ambm kernel for assigning the elements:
489  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
491  KernelClass::init(ctx);
492 
493  cl_uint options_alpha = 0;
495  if (mat.row_major())
496  {
498  size_mat.stride = cl_uint(viennacl::traits::stride2(mat) * viennacl::traits::internal_size2(mat));
499  size_mat.size = cl_uint(viennacl::traits::size(vec));
500  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
501  }
502  else
503  {
505  size_mat.stride = cl_uint(viennacl::traits::stride2(mat));
506  size_mat.size = cl_uint(viennacl::traits::size(vec));
507  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
508  }
509 
511  size_vec.start = cl_uint(viennacl::traits::start(vec));
512  size_vec.stride = cl_uint(viennacl::traits::stride(vec));
513  size_vec.size = cl_uint(viennacl::traits::size(vec));
514  size_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
515 
516 
517  viennacl::ocl::kernel & kern = ctx.get_kernel(KernelClass::program_name(), "av_cpu");
518  viennacl::ocl::enqueue(kern(viennacl::traits::opencl_handle(vec),
519  size_vec,
520 
521  viennacl::traits::opencl_handle(NumericT(1)),
522  options_alpha,
523  viennacl::traits::opencl_handle(mat),
524  size_mat)
525  );
526 }
527 
528 
529 //
531 //
532 
533 // Binary operations A = B .* C and A = B ./ C
539 template <typename T, typename OP>
542 {
543  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
544  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
545 
546  viennacl::ocl::kernel & k = detail::kernel_for_matrix(A, "element_op");
547 
548  cl_uint op_type = 2; //0: product, 1: division, 2: power
550  op_type = 1;
552  op_type = 0;
553 
554  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A),
555  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
556  cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
557  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
559 
560  viennacl::traits::opencl_handle(proxy.lhs()),
561  cl_uint(viennacl::traits::start1(proxy.lhs())), cl_uint(viennacl::traits::start2(proxy.lhs())),
562  cl_uint(viennacl::traits::stride1(proxy.lhs())), cl_uint(viennacl::traits::stride2(proxy.lhs())),
563  cl_uint(viennacl::traits::internal_size1(proxy.lhs())), cl_uint(viennacl::traits::internal_size2(proxy.lhs())),
564 
565  viennacl::traits::opencl_handle(proxy.rhs()),
566  cl_uint(viennacl::traits::start1(proxy.rhs())), cl_uint(viennacl::traits::start2(proxy.rhs())),
567  cl_uint(viennacl::traits::stride1(proxy.rhs())), cl_uint(viennacl::traits::stride2(proxy.rhs())),
568  cl_uint(viennacl::traits::internal_size1(proxy.rhs())), cl_uint(viennacl::traits::internal_size2(proxy.rhs())),
569 
570  op_type)
571  );
572 }
573 
574 
575 // Unary operations
576 
582 template <typename T, typename OP>
585 {
586  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
587  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
588 
590 
591  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A),
592  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
593  cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
594  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
596 
597  viennacl::traits::opencl_handle(proxy.lhs()),
598  cl_uint(viennacl::traits::start1(proxy.lhs())), cl_uint(viennacl::traits::start2(proxy.lhs())),
599  cl_uint(viennacl::traits::stride1(proxy.lhs())), cl_uint(viennacl::traits::stride2(proxy.lhs())),
600  cl_uint(viennacl::traits::internal_size1(proxy.lhs())), cl_uint(viennacl::traits::internal_size2(proxy.lhs())))
601  );
602 }
603 
604 
605 //
607 //
608 
609 // A * x
610 
619 template <typename NumericT>
620 void prod_impl(const matrix_base<NumericT> & mat, bool trans_A,
621  const vector_base<NumericT> & vec,
622  vector_base<NumericT> & result)
623 {
624  assert(viennacl::traits::handle(vec) != viennacl::traits::handle(result) && bool("No direct inplace transposed matrix-vector product possible. Introduce a temporary!"));
625 
626  viennacl::ocl::kernel & k = detail::kernel_for_matrix(mat, trans_A ? "trans_vec_mul" : "vec_mul");
627 
628  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat),
629  cl_uint(viennacl::traits::start1(mat)), cl_uint(viennacl::traits::start2(mat)),
630  cl_uint(viennacl::traits::stride1(mat)), cl_uint(viennacl::traits::stride2(mat)),
631  cl_uint(viennacl::traits::size1(mat)), cl_uint(viennacl::traits::size2(mat)),
633 
634  viennacl::traits::opencl_handle(vec),
635  cl_uint(viennacl::traits::start(vec)),
636  cl_uint(viennacl::traits::stride(vec)),
637  cl_uint(viennacl::traits::size(vec)),
638 
639  viennacl::traits::opencl_handle(result),
640  cl_uint(viennacl::traits::start(result)),
641  cl_uint(viennacl::traits::stride(result)),
642  cl_uint(viennacl::traits::size(result)),
643 
645  ) );
646 }
647 
648 
649 //
650 
651 
657 template<typename NumericT, typename ScalarType >
658 void prod_impl(matrix_base<NumericT> const & A, bool A_trans,
659  matrix_base<NumericT> const & B, bool B_trans,
661  ScalarType alpha,
662  ScalarType beta)
663 {
664  bool effective_A_trans = A_trans ^ A.row_major();
665  bool effective_B_trans = B_trans ^ B.row_major();
666 
667  char cAt = effective_A_trans ? 'T' : 'N';
668  char cBt = effective_B_trans ? 'T' : 'N';
669 
670  std::string kernel_prefix("prod_");
671  kernel_prefix+=cAt;
672  kernel_prefix+=cBt;
673 
674  scheduler::statement statement = scheduler::preset::mat_mat_prod(alpha, &A, effective_A_trans, &B, effective_B_trans, beta, &C);
675  kernels::matrix_prod<NumericT>::execution_handler(C.row_major(), viennacl::traits::opencl_context(C)).execute(kernel_prefix, statement);
676 }
677 
678 //
680 //
681 
682 
695 template<typename NumericT, typename ScalarT1>
697  ScalarT1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
698  const vector_base<NumericT> & vec1,
699  const vector_base<NumericT> & vec2)
700 {
701  assert( (viennacl::traits::size1(A) == viennacl::traits::size(vec1)) && bool("Size mismatch in scaled_rank_1_update: size1(A) != size(v1)"));
702  assert( (viennacl::traits::size2(A) == viennacl::traits::size(vec2)) && bool("Size mismatch in scaled_rank_1_update: size2(A) != size(v2)"));
703 
704  cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
706  viennacl::ocl::kernel& kernel= detail::legacy_kernel_for_matrix(A, is_cpu ? "scaled_rank1_update_cpu" : "scaled_rank1_update_gpu");
707 
708  viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(A),
709  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
710  cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
711  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
713 
714  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
715  options_alpha,
716 
717  viennacl::traits::opencl_handle(vec1),
718  cl_uint(viennacl::traits::start(vec1)),
719  cl_uint(viennacl::traits::stride(vec1)),
720  cl_uint(viennacl::traits::size(vec1)),
721 
722  viennacl::traits::opencl_handle(vec2),
723  cl_uint(viennacl::traits::start(vec2)),
724  cl_uint(viennacl::traits::stride(vec2)),
725  cl_uint(viennacl::traits::size(vec2))
726  )
727  );
728 }
729 
730 //
731 template <typename SCALARTYPE, typename VectorType>
733  VectorType & dh,
734  VectorType & sh
735  )
736 {
737  viennacl::vector<SCALARTYPE> D(dh.size());
738  viennacl::vector<SCALARTYPE> S(sh.size());
739 
740  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
742 
743  viennacl::ocl::enqueue(kernel(
744  A,
745  D,
746  S,
747  static_cast<cl_uint>(A.size1()),
748  static_cast<cl_uint>(A.size2()),
749  static_cast<cl_uint>(A.internal_size2())
750  ));
751 
752  fast_copy(D, dh);
753  fast_copy(S, sh);
754 }
755 
756 
757 template <typename NumericT>
761  )
762 {
763  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
764 
765  if(A.row_major())
766  {
769 
770  viennacl::ocl::enqueue(kernel(
771  A,
772  dh,
773  sh,
774  cl_uint(viennacl::traits::size1(A)),
775  cl_uint(viennacl::traits::size2(A)),
777  ));
778  }
779  else
780  {
783 
784  viennacl::ocl::enqueue(kernel(
785  A,
786  dh,
787  sh,
788  cl_uint(viennacl::traits::size1(A)),
789  cl_uint(viennacl::traits::size2(A)),
791  ));
792  }
793 }
794 
795 
796 template <typename NumericT>
800 {
801 
802  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
803  if(A.row_major())
804  {
807  viennacl::ocl::enqueue(kernel(
808  A,
809  D,
810  static_cast<cl_uint>(start + 1),
811  static_cast<cl_uint>(start),
812  cl_uint(viennacl::traits::size1(A)),
813  cl_uint(viennacl::traits::size2(A)),
815  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * 4))
816  ));
817  }
818  else
819  {
822  viennacl::ocl::enqueue(kernel(
823  A,
824  D,
825  static_cast<cl_uint>(start + 1),
826  static_cast<cl_uint>(start),
827  cl_uint(viennacl::traits::size1(A)),
828  cl_uint(viennacl::traits::size2(A)),
830  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * 4))
831  ));
832  }
833 
834 
835 
836 
837 }
838 
839 template <typename NumericT>
842 {
843  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
844 
845  if(A.row_major())
846  {
849 
850  viennacl::ocl::enqueue(kernel(
851  A,
852  D,
853  static_cast<cl_uint>(0),
854  static_cast<cl_uint>(0),
855  cl_uint(viennacl::traits::size1(A)),
856  cl_uint(viennacl::traits::size2(A)),
858  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
859  ));
860  }
861  else
862  {
865 
866  viennacl::ocl::enqueue(kernel(
867  A,
868  D,
869  static_cast<cl_uint>(0),
870  static_cast<cl_uint>(0),
871  cl_uint(viennacl::traits::size1(A)),
872  cl_uint(viennacl::traits::size2(A)),
874  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
875  ));
876  }
877 
878 
879 }
880 
881 
882 
883 template <typename NumericT>
886  vcl_size_t A_size1)
887 
888 {
889  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(Q).context());
890 
891  if(Q.row_major())
892  {
895 
896  viennacl::ocl::enqueue(kernel(
897  Q,
898  D,
899  cl_uint(A_size1),
901  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
902  ));
903  }
904  else
905  {
908 
909  viennacl::ocl::enqueue(kernel(
910  Q,
911  D,
912  cl_uint(A_size1),
914  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
915  ));
916  }
917 
918 }
919 
920 
921 template<typename NumericT>
923  vector_base<NumericT>& tmp1,
924  vector_base<NumericT>& tmp2,
925  int l,
926  int m
927  )
928  {
929  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
930 
931  if(matrix.row_major())
932  {
935  kernel.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size1(matrix)), 256));
936  kernel.local_work_size(0, 256);
937 
938  viennacl::ocl::enqueue(kernel(
939  matrix,
940  tmp1,
941  tmp2,
942  cl_uint(viennacl::traits::size1(matrix)),
943  cl_uint(viennacl::traits::internal_size2(matrix)),
944  static_cast<cl_uint>(l),
945  static_cast<cl_uint>(m - 1)
946  ));
947  }
948  else
949  {
952  kernel.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size1(matrix)), 256));
953  kernel.local_work_size(0, 256);
954 
955  viennacl::ocl::enqueue(kernel(
956  matrix,
957  tmp1,
958  tmp2,
959  cl_uint(viennacl::traits::size1(matrix)),
960  cl_uint(viennacl::traits::internal_size2(matrix)),
961  static_cast<cl_uint>(l),
962  static_cast<cl_uint>(m - 1)
963  ));
964  }
965 
966 
967  }
968 
969  template <typename NumericT>
972  vcl_size_t row_start,
973  vcl_size_t col_start,
974  bool copy_col
975  )
976  {
977  std::string kernel_name = copy_col ? SVD_COPY_COL_KERNEL : SVD_COPY_ROW_KERNEL;
978  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
979 
980  if(A.row_major())
981  {
984 
985  viennacl::ocl::enqueue(kernel(
986  A,
987  V,
988  static_cast<cl_uint>(row_start),
989  static_cast<cl_uint>(col_start),
990  copy_col ? cl_uint(viennacl::traits::size1(A))
991  : cl_uint(viennacl::traits::size2(A)),
992  static_cast<cl_uint>(A.internal_size2())
993  ));
994  }
995  else
996  {
999 
1000  viennacl::ocl::enqueue(kernel(
1001  A,
1002  V,
1003  static_cast<cl_uint>(row_start),
1004  static_cast<cl_uint>(col_start),
1005  copy_col ? cl_uint(viennacl::traits::size1(A))
1006  : cl_uint(viennacl::traits::size2(A)),
1007  static_cast<cl_uint>(A.internal_size2())
1008  ));
1009  }
1010 
1011 
1012  }
1013 
1014 } // namespace opencl
1015 } //namespace linalg
1016 } //namespace viennacl
1017 
1018 
1019 #endif
cl_uint stride
Increment between integers.
Definition: kernel.hpp:50
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Definition: kernel.hpp:45
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
void matrix_diag_to_vector(const matrix_base< NumericT > &mat, int k, vector_base< NumericT > &vec)
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void prod_impl(const matrix_base< NumericT > &mat, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
Generic size and resize functionality for different vector and matrix types.
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
viennacl::ocl::program & get_program(std::string const &name)
Returns the program with the provided name.
Definition: context.hpp:532
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
cl_uint start
Starting value of the integer stride.
Definition: kernel.hpp:48
Various little tools used here and there in ViennaCL.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:386
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
const std::string SVD_BIDIAG_PACK_KERNEL
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:394
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:341
size_type stride2() const
Returns the number of columns.
Definition: matrix_def.hpp:234
const std::string SVD_GIVENS_NEXT_KERNEL
const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:43
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
A dense matrix class.
Definition: forwards.h:375
static void init(viennacl::ocl::context &ctx)
Definition: svd.hpp:652
Determines row and column increments for matrices and matrix proxies.
void bidiag_pack(matrix_base< NumericT > &A, viennacl::vector< NumericT > &dh, viennacl::vector< NumericT > &sh)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:375
viennacl::ocl::kernel & element_kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
void scaled_rank_1_update(matrix_base< NumericT > &A, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
OpenCL kernel file for singular value decomposition.
const std::string SVD_COPY_ROW_KERNEL
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Definition: kernel.hpp:54
Common implementations shared by OpenCL-based operations.
float NumericT
Definition: bisect.cpp:40
void copy_vec(matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
Main kernel class for generating OpenCL kernels for elementwise-operations such as element_sin() on/w...
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
static device_specific::execution_handler & execution_handler(bool is_row_major, viennacl::ocl::context &ctx)
Definition: matrix.hpp:972
viennacl::ocl::kernel & kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Definition: forwards.h:448
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
Definition: svd.hpp:644
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
OpenCL kernel file for vector operations.
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Definition: common.hpp:42
Main kernel class for generating OpenCL kernels for operations on/with dense matrix objects of type v...
Definition: matrix.hpp:1018
const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
size_type stride1() const
Returns the number of rows.
Definition: matrix_def.hpp:232
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
std::size_t vcl_size_t
Definition: forwards.h:75
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:226
void bidiag_pack_svd(viennacl::matrix< SCALARTYPE > &A, VectorType &dh, VectorType &sh)
Wrapper class for an OpenCL program.
Definition: program.hpp:42
void execute(template_base const &T, statements_container const &statements, viennacl::ocl::context &ctx=viennacl::ocl::current_context(), bool force_compilation=false)
Definition: execute.hpp:44
Helper metafunction for checking whether the provided type is viennacl::op_div (for division) ...
Definition: predicate.hpp:466
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, op_element_binary< OP > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:224
Proxy classes for vectors.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
statement mat_mat_prod(NumericT alpha, viennacl::matrix_base< NumericT > const *A, bool A_trans, viennacl::matrix_base< NumericT > const *B, bool B_trans, NumericT beta, viennacl::matrix_base< NumericT > const *C)
Definition: preset.hpp:33
Main kernel class for generating OpenCL kernels for operations on/with dense matrix objects of type v...
Definition: matrix.hpp:926
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
viennacl::ocl::kernel & legacy_kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:248
float ScalarType
Definition: fft_1d.cpp:42
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
void givens_next(matrix_base< NumericT > &matrix, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
A tag class representing transposed matrices.
Definition: forwards.h:220
size_type start2() const
Returns the number of columns.
Definition: matrix_def.hpp:230
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:130
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:502
const std::string SVD_COPY_COL_KERNEL
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
viennacl::ocl::kernel & get_kernel(std::string const &name)
Returns the kernel with the provided name.
Definition: context.hpp:773
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
Helper metafunction for checking whether the provided type is viennacl::op_prod (for products/multipl...
Definition: predicate.hpp:436
std::string op_to_string(op_abs)
Definition: common.hpp:78
static void init(viennacl::ocl::context &ctx)
Definition: matrix.hpp:1115
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
OpenCL kernel file for element-wise matrix operations.
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:134
Implementation of the ViennaCL scalar class.
A collection of compile time type deductions.
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Definition: vector.hpp:679
Simple enable-if variant that uses the SFINAE pattern.
size_type start1() const
Returns the number of rows.
Definition: matrix_def.hpp:228
cl_uint size
Number of values in the stride.
Definition: kernel.hpp:52
Runtime generation of OpenCL kernels for matrix operations.
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
void matrix_row(matrix_base< NumericT > const &mat, unsigned int i, vector_base< NumericT > &vec)