ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
vector_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <cmath>
26 #include <algorithm> //for std::max and std::min
27 
28 #include "viennacl/forwards.h"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
33 #include "viennacl/traits/size.hpp"
38 
39 #ifdef VIENNACL_WITH_OPENMP
40 #include <omp.h>
41 #endif
42 
43 // Minimum vector size for using OpenMP on vector operations:
44 #ifndef VIENNACL_OPENMP_VECTOR_MIN_SIZE
45  #define VIENNACL_OPENMP_VECTOR_MIN_SIZE 5000
46 #endif
47 
48 namespace viennacl
49 {
50 namespace linalg
51 {
52 namespace host_based
53 {
54 namespace detail
55 {
56  template<typename NumericT>
57  NumericT flip_sign(NumericT val) { return -val; }
58  inline unsigned long flip_sign(unsigned long val) { return val; }
59  inline unsigned int flip_sign(unsigned int val) { return val; }
60  inline unsigned short flip_sign(unsigned short val) { return val; }
61  inline unsigned char flip_sign(unsigned char val) { return val; }
62 }
63 
64 //
65 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
66 //
67 template<typename DestNumericT, typename SrcNumericT>
69 {
70  DestNumericT * data_dest = detail::extract_raw_pointer<DestNumericT>(dest);
71  SrcNumericT const * data_src = detail::extract_raw_pointer<SrcNumericT>(src);
72 
73  vcl_size_t start_dest = viennacl::traits::start(dest);
74  vcl_size_t inc_dest = viennacl::traits::stride(dest);
75  vcl_size_t size_dest = viennacl::traits::size(dest);
76 
77  vcl_size_t start_src = viennacl::traits::start(src);
78  vcl_size_t inc_src = viennacl::traits::stride(src);
79 
80 #ifdef VIENNACL_WITH_OPENMP
81  #pragma omp parallel for if (size_dest > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
82 #endif
83  for (long i = 0; i < static_cast<long>(size_dest); ++i)
84  data_dest[static_cast<vcl_size_t>(i)*inc_dest+start_dest] = static_cast<DestNumericT>(data_src[static_cast<vcl_size_t>(i)*inc_src+start_src]);
85 }
86 
87 template<typename NumericT, typename ScalarT1>
89  vector_base<NumericT> const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha)
90 {
91  typedef NumericT value_type;
92 
93  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
94  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
95 
96  value_type data_alpha = alpha;
97  if (flip_sign_alpha)
98  data_alpha = detail::flip_sign(data_alpha);
99 
103 
106 
107  if (reciprocal_alpha)
108  {
109 #ifdef VIENNACL_WITH_OPENMP
110  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
111 #endif
112  for (long i = 0; i < static_cast<long>(size1); ++i)
113  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha;
114  }
115  else
116  {
117 #ifdef VIENNACL_WITH_OPENMP
118  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
119 #endif
120  for (long i = 0; i < static_cast<long>(size1); ++i)
121  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha;
122  }
123 }
124 
125 
126 template<typename NumericT, typename ScalarT1, typename ScalarT2>
128  vector_base<NumericT> const & vec2, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,
129  vector_base<NumericT> const & vec3, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta)
130 {
131  typedef NumericT value_type;
132 
133  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
134  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
135  value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
136 
137  value_type data_alpha = alpha;
138  if (flip_sign_alpha)
139  data_alpha = detail::flip_sign(data_alpha);
140 
141  value_type data_beta = beta;
142  if (flip_sign_beta)
143  data_beta = detail::flip_sign(data_beta);
144 
148 
151 
152  vcl_size_t start3 = viennacl::traits::start(vec3);
154 
155  if (reciprocal_alpha)
156  {
157  if (reciprocal_beta)
158  {
159 #ifdef VIENNACL_WITH_OPENMP
160  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
161 #endif
162  for (long i = 0; i < static_cast<long>(size1); ++i)
163  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
164  }
165  else
166  {
167 #ifdef VIENNACL_WITH_OPENMP
168  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
169 #endif
170  for (long i = 0; i < static_cast<long>(size1); ++i)
171  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
172  }
173  }
174  else
175  {
176  if (reciprocal_beta)
177  {
178 #ifdef VIENNACL_WITH_OPENMP
179  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
180 #endif
181  for (long i = 0; i < static_cast<long>(size1); ++i)
182  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
183  }
184  else
185  {
186 #ifdef VIENNACL_WITH_OPENMP
187  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
188 #endif
189  for (long i = 0; i < static_cast<long>(size1); ++i)
190  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
191  }
192  }
193 }
194 
195 
196 template<typename NumericT, typename ScalarT1, typename ScalarT2>
198  vector_base<NumericT> const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
199  vector_base<NumericT> const & vec3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
200 {
201  typedef NumericT value_type;
202 
203  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
204  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
205  value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
206 
207  value_type data_alpha = alpha;
208  if (flip_sign_alpha)
209  data_alpha = detail::flip_sign(data_alpha);
210 
211  value_type data_beta = beta;
212  if (flip_sign_beta)
213  data_beta = detail::flip_sign(data_beta);
214 
218 
221 
222  vcl_size_t start3 = viennacl::traits::start(vec3);
224 
225  if (reciprocal_alpha)
226  {
227  if (reciprocal_beta)
228  {
229 #ifdef VIENNACL_WITH_OPENMP
230  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
231 #endif
232  for (long i = 0; i < static_cast<long>(size1); ++i)
233  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
234  }
235  else
236  {
237 #ifdef VIENNACL_WITH_OPENMP
238  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
239 #endif
240  for (long i = 0; i < static_cast<long>(size1); ++i)
241  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
242  }
243  }
244  else
245  {
246  if (reciprocal_beta)
247  {
248 #ifdef VIENNACL_WITH_OPENMP
249  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
250 #endif
251  for (long i = 0; i < static_cast<long>(size1); ++i)
252  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
253  }
254  else
255  {
256 #ifdef VIENNACL_WITH_OPENMP
257  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
258 #endif
259  for (long i = 0; i < static_cast<long>(size1); ++i)
260  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
261  }
262  }
263 }
264 
265 
266 
267 
274 template<typename NumericT>
275 void vector_assign(vector_base<NumericT> & vec1, const NumericT & alpha, bool up_to_internal_size = false)
276 {
277  typedef NumericT value_type;
278 
279  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
280 
284  vcl_size_t loop_bound = up_to_internal_size ? vec1.internal_size() : size1; //Note: Do NOT use traits::internal_size() here, because vector proxies don't require padding.
285 
286  value_type data_alpha = static_cast<value_type>(alpha);
287 
288 #ifdef VIENNACL_WITH_OPENMP
289  #pragma omp parallel for if (loop_bound > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
290 #endif
291  for (long i = 0; i < static_cast<long>(loop_bound); ++i)
292  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_alpha;
293 }
294 
295 
301 template<typename NumericT>
303 {
304  typedef NumericT value_type;
305 
306  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
307  value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
308 
312 
315 
316 #ifdef VIENNACL_WITH_OPENMP
317  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
318 #endif
319  for (long i = 0; i < static_cast<long>(size1); ++i)
320  {
321  value_type temp = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2];
322  data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1];
323  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = temp;
324  }
325 }
326 
327 
329 
335 template<typename NumericT, typename OpT>
338 {
339  typedef NumericT value_type;
341 
342  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
343  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
344  value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(proxy.rhs());
345 
349 
351  vcl_size_t inc2 = viennacl::traits::stride(proxy.lhs());
352 
353  vcl_size_t start3 = viennacl::traits::start(proxy.rhs());
354  vcl_size_t inc3 = viennacl::traits::stride(proxy.rhs());
355 
356 #ifdef VIENNACL_WITH_OPENMP
357  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
358 #endif
359  for (long i = 0; i < static_cast<long>(size1); ++i)
360  OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1], data_vec2[static_cast<vcl_size_t>(i)*inc2+start2], data_vec3[static_cast<vcl_size_t>(i)*inc3+start3]);
361 }
362 
368 template<typename NumericT, typename OpT>
371 {
372  typedef NumericT value_type;
374 
375  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
376  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
377 
381 
383  vcl_size_t inc2 = viennacl::traits::stride(proxy.lhs());
384 
385 #ifdef VIENNACL_WITH_OPENMP
386  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
387 #endif
388  for (long i = 0; i < static_cast<long>(size1); ++i)
389  OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1], data_vec2[static_cast<vcl_size_t>(i)*inc2+start2]);
390 }
391 
392 
394 
395 
396 //implementation of inner product:
397 
398 namespace detail
399 {
400 
401 // the following circumvents problems when trying to use a variable of template parameter type for a reduction.
402 // Such a behavior is not covered by the OpenMP standard, hence we manually apply some preprocessor magic to resolve the problem.
403 // See https://github.com/viennacl/viennacl-dev/issues/112 for a detailed explanation and discussion.
404 
405 #define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART) \
406  inline RESULTSCALART inner_prod_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1, \
407  RESULTSCALART const * data_vec2, vcl_size_t start2, vcl_size_t inc2) { \
408  TEMPSCALART temp = 0;
409 
410 #define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART) \
411  for (long i = 0; i < static_cast<long>(size1); ++i) \
412  temp += data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] * data_vec2[static_cast<vcl_size_t>(i)*inc2+start2]; \
413  return static_cast<RESULTSCALART>(temp); \
414  }
415 
416 // char
418 #ifdef VIENNACL_WITH_OPENMP
419  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
420 #endif
422 
423 VIENNACL_INNER_PROD_IMPL_1(unsigned char, int)
424 #ifdef VIENNACL_WITH_OPENMP
425  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
426 #endif
427 VIENNACL_INNER_PROD_IMPL_2(unsigned char)
428 
429 
430 // short
431 VIENNACL_INNER_PROD_IMPL_1(short, int)
432 #ifdef VIENNACL_WITH_OPENMP
433  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
434 #endif
436 
437 VIENNACL_INNER_PROD_IMPL_1(unsigned short, int)
438 #ifdef VIENNACL_WITH_OPENMP
439  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
440 #endif
441 VIENNACL_INNER_PROD_IMPL_2(unsigned short)
442 
443 
444 // int
446 #ifdef VIENNACL_WITH_OPENMP
447  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
448 #endif
450 
451 VIENNACL_INNER_PROD_IMPL_1(unsigned int, unsigned int)
452 #ifdef VIENNACL_WITH_OPENMP
453  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
454 #endif
455 VIENNACL_INNER_PROD_IMPL_2(unsigned int)
456 
457 
458 // long
459 VIENNACL_INNER_PROD_IMPL_1(long, long)
460 #ifdef VIENNACL_WITH_OPENMP
461  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
462 #endif
464 
465 VIENNACL_INNER_PROD_IMPL_1(unsigned long, unsigned long)
466 #ifdef VIENNACL_WITH_OPENMP
467  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
468 #endif
469 VIENNACL_INNER_PROD_IMPL_2(unsigned long)
470 
471 
472 // float
473 VIENNACL_INNER_PROD_IMPL_1(float, float)
474 #ifdef VIENNACL_WITH_OPENMP
475  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
476 #endif
478 
479 // double
480 VIENNACL_INNER_PROD_IMPL_1(double, double)
481 #ifdef VIENNACL_WITH_OPENMP
482  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
483 #endif
485 
486 #undef VIENNACL_INNER_PROD_IMPL_1
487 #undef VIENNACL_INNER_PROD_IMPL_2
488 }
489 
496 template<typename NumericT, typename ScalarT>
498  vector_base<NumericT> const & vec2,
499  ScalarT & result)
500 {
501  typedef NumericT value_type;
502 
503  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
504  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
505 
509 
512 
513  result = detail::inner_prod_impl(data_vec1, start1, inc1, size1,
514  data_vec2, start2, inc2); //Note: Assignment to result might be expensive, thus a temporary is introduced here
515 }
516 
517 template<typename NumericT>
519  vector_tuple<NumericT> const & vec_tuple,
520  vector_base<NumericT> & result)
521 {
522  typedef NumericT value_type;
523 
524  value_type const * data_x = detail::extract_raw_pointer<value_type>(x);
525 
526  vcl_size_t start_x = viennacl::traits::start(x);
529 
530  std::vector<value_type> temp(vec_tuple.const_size());
531  std::vector<value_type const *> data_y(vec_tuple.const_size());
532  std::vector<vcl_size_t> start_y(vec_tuple.const_size());
533  std::vector<vcl_size_t> stride_y(vec_tuple.const_size());
534 
535  for (vcl_size_t j=0; j<vec_tuple.const_size(); ++j)
536  {
537  data_y[j] = detail::extract_raw_pointer<value_type>(vec_tuple.const_at(j));
538  start_y[j] = viennacl::traits::start(vec_tuple.const_at(j));
539  stride_y[j] = viennacl::traits::stride(vec_tuple.const_at(j));
540  }
541 
542  // Note: No OpenMP here because it cannot perform a reduction on temp-array. Savings in memory bandwidth are expected to still justify this approach...
543  for (vcl_size_t i = 0; i < size_x; ++i)
544  {
545  value_type entry_x = data_x[i*inc_x+start_x];
546  for (vcl_size_t j=0; j < vec_tuple.const_size(); ++j)
547  temp[j] += entry_x * data_y[j][i*stride_y[j]+start_y[j]];
548  }
549 
550  for (vcl_size_t j=0; j < vec_tuple.const_size(); ++j)
551  result[j] = temp[j]; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
552 }
553 
554 
555 namespace detail
556 {
557 
558 #define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART) \
559  inline RESULTSCALART norm_1_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \
560  TEMPSCALART temp = 0;
561 
562 #define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART) \
563  for (long i = 0; i < static_cast<long>(size1); ++i) \
564  temp += static_cast<TEMPSCALART>(std::fabs(static_cast<double>(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]))); \
565  return static_cast<RESULTSCALART>(temp); \
566  }
567 
568 // char
569 VIENNACL_NORM_1_IMPL_1(char, int)
570 #ifdef VIENNACL_WITH_OPENMP
571  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
572 #endif
573 VIENNACL_NORM_1_IMPL_2(char, int)
574 
575 VIENNACL_NORM_1_IMPL_1(unsigned char, int)
576 #ifdef VIENNACL_WITH_OPENMP
577  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
578 #endif
579 VIENNACL_NORM_1_IMPL_2(unsigned char, int)
580 
581 // short
582 VIENNACL_NORM_1_IMPL_1(short, int)
583 #ifdef VIENNACL_WITH_OPENMP
584  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
585 #endif
586 VIENNACL_NORM_1_IMPL_2(short, int)
587 
588 VIENNACL_NORM_1_IMPL_1(unsigned short, int)
589 #ifdef VIENNACL_WITH_OPENMP
590  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
591 #endif
592 VIENNACL_NORM_1_IMPL_2(unsigned short, int)
593 
594 
595 // int
596 VIENNACL_NORM_1_IMPL_1(int, int)
597 #ifdef VIENNACL_WITH_OPENMP
598  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
599 #endif
600 VIENNACL_NORM_1_IMPL_2(int, int)
601 
602 VIENNACL_NORM_1_IMPL_1(unsigned int, unsigned int)
603 #ifdef VIENNACL_WITH_OPENMP
604  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
605 #endif
606 VIENNACL_NORM_1_IMPL_2(unsigned int, unsigned int)
607 
608 
609 // long
610 VIENNACL_NORM_1_IMPL_1(long, long)
611 #ifdef VIENNACL_WITH_OPENMP
612  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
613 #endif
614 VIENNACL_NORM_1_IMPL_2(long, long)
615 
616 VIENNACL_NORM_1_IMPL_1(unsigned long, unsigned long)
617 #ifdef VIENNACL_WITH_OPENMP
618  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
619 #endif
620 VIENNACL_NORM_1_IMPL_2(unsigned long, unsigned long)
621 
622 
623 // float
624 VIENNACL_NORM_1_IMPL_1(float, float)
625 #ifdef VIENNACL_WITH_OPENMP
626  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
627 #endif
628 VIENNACL_NORM_1_IMPL_2(float, float)
629 
630 // double
631 VIENNACL_NORM_1_IMPL_1(double, double)
632 #ifdef VIENNACL_WITH_OPENMP
633  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
634 #endif
635 VIENNACL_NORM_1_IMPL_2(double, double)
636 
637 #undef VIENNACL_NORM_1_IMPL_1
638 #undef VIENNACL_NORM_1_IMPL_2
639 
640 }
641 
647 template<typename NumericT, typename ScalarT>
649  ScalarT & result)
650 {
651  typedef NumericT value_type;
652 
653  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
654 
658 
659  result = detail::norm_1_impl(data_vec1, start1, inc1, size1); //Note: Assignment to result might be expensive, thus using a temporary for accumulation
660 }
661 
662 
663 
664 namespace detail
665 {
666 
667 #define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART) \
668  inline RESULTSCALART norm_2_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \
669  TEMPSCALART temp = 0;
670 
671 #define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART) \
672  for (long i = 0; i < static_cast<long>(size1); ++i) { \
673  RESULTSCALART data = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]; \
674  temp += static_cast<TEMPSCALART>(data * data); \
675  } \
676  return static_cast<RESULTSCALART>(temp); \
677  }
678 
679 // char
680 VIENNACL_NORM_2_IMPL_1(char, int)
681 #ifdef VIENNACL_WITH_OPENMP
682  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
683 #endif
684 VIENNACL_NORM_2_IMPL_2(char, int)
685 
686 VIENNACL_NORM_2_IMPL_1(unsigned char, int)
687 #ifdef VIENNACL_WITH_OPENMP
688  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
689 #endif
690 VIENNACL_NORM_2_IMPL_2(unsigned char, int)
691 
692 
693 // short
694 VIENNACL_NORM_2_IMPL_1(short, int)
695 #ifdef VIENNACL_WITH_OPENMP
696  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
697 #endif
698 VIENNACL_NORM_2_IMPL_2(short, int)
699 
700 VIENNACL_NORM_2_IMPL_1(unsigned short, int)
701 #ifdef VIENNACL_WITH_OPENMP
702  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
703 #endif
704 VIENNACL_NORM_2_IMPL_2(unsigned short, int)
705 
706 
707 // int
708 VIENNACL_NORM_2_IMPL_1(int, int)
709 #ifdef VIENNACL_WITH_OPENMP
710  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
711 #endif
712 VIENNACL_NORM_2_IMPL_2(int, int)
713 
714 VIENNACL_NORM_2_IMPL_1(unsigned int, unsigned int)
715 #ifdef VIENNACL_WITH_OPENMP
716  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
717 #endif
718 VIENNACL_NORM_2_IMPL_2(unsigned int, unsigned int)
719 
720 
721 // long
722 VIENNACL_NORM_2_IMPL_1(long, long)
723 #ifdef VIENNACL_WITH_OPENMP
724  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
725 #endif
726 VIENNACL_NORM_2_IMPL_2(long, long)
727 
728 VIENNACL_NORM_2_IMPL_1(unsigned long, unsigned long)
729 #ifdef VIENNACL_WITH_OPENMP
730  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
731 #endif
732 VIENNACL_NORM_2_IMPL_2(unsigned long, unsigned long)
733 
734 
735 // float
736 VIENNACL_NORM_2_IMPL_1(float, float)
737 #ifdef VIENNACL_WITH_OPENMP
738  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
739 #endif
740 VIENNACL_NORM_2_IMPL_2(float, float)
741 
742 // double
743 VIENNACL_NORM_2_IMPL_1(double, double)
744 #ifdef VIENNACL_WITH_OPENMP
745  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
746 #endif
747 VIENNACL_NORM_2_IMPL_2(double, double)
748 
749 #undef VIENNACL_NORM_2_IMPL_1
750 #undef VIENNACL_NORM_2_IMPL_2
751 
752 }
753 
754 
760 template<typename NumericT, typename ScalarT>
762  ScalarT & result)
763 {
764  typedef NumericT value_type;
765 
766  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
767 
771 
772  result = std::sqrt(detail::norm_2_impl(data_vec1, start1, inc1, size1)); //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
773 }
774 
780 template<typename NumericT, typename ScalarT>
782  ScalarT & result)
783 {
784  typedef NumericT value_type;
785 
786  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
787 
791 
792  vcl_size_t thread_count=1;
793 
794  #ifdef VIENNACL_WITH_OPENMP
796  thread_count = omp_get_max_threads();
797  #endif
798 
799  std::vector<value_type> temp(thread_count);
800 
801 #ifdef VIENNACL_WITH_OPENMP
802  #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
803 #endif
804  {
805  vcl_size_t id = 0;
806 #ifdef VIENNACL_WITH_OPENMP
807  id = omp_get_thread_num();
808 #endif
809 
810  vcl_size_t begin = (size1 * id) / thread_count;
811  vcl_size_t end = (size1 * (id + 1)) / thread_count;
812  temp[id] = 0;
813 
814  for (vcl_size_t i = begin; i < end; ++i)
815  temp[id] = std::max<value_type>(temp[id], static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1])))); //casting to double in order to avoid problems if T is an integer type
816  }
817  for (vcl_size_t i = 1; i < thread_count; ++i)
818  temp[0] = std::max<value_type>( temp[0], temp[i]);
819  result = temp[0];
820 }
821 
822 //This function should return a CPU scalar, otherwise statements like
823 // vcl_rhs[index_norm_inf(vcl_rhs)]
824 // are ambiguous
830 template<typename NumericT>
832 {
833  typedef NumericT value_type;
834 
835  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
836 
840  vcl_size_t thread_count=1;
841 
842 #ifdef VIENNACL_WITH_OPENMP
844  thread_count = omp_get_max_threads();
845 #endif
846 
847  std::vector<value_type> temp(thread_count);
848  std::vector<vcl_size_t> index(thread_count);
849 
850 #ifdef VIENNACL_WITH_OPENMP
851  #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
852 #endif
853  {
854  vcl_size_t id = 0;
855 #ifdef VIENNACL_WITH_OPENMP
856  id = omp_get_thread_num();
857 #endif
858  vcl_size_t begin = (size1 * id) / thread_count;
859  vcl_size_t end = (size1 * (id + 1)) / thread_count;
860  index[id] = start1;
861  temp[id] = 0;
862  value_type data;
863 
864  for (vcl_size_t i = begin; i < end; ++i)
865  {
866  data = static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1]))); //casting to double in order to avoid problems if T is an integer type
867  if (data > temp[id])
868  {
869  index[id] = i;
870  temp[id] = data;
871  }
872  }
873  }
874  for (vcl_size_t i = 1; i < thread_count; ++i)
875  {
876  if (temp[i] > temp[0])
877  {
878  index[0] = index[i];
879  temp[0] = temp[i];
880  }
881  }
882  return index[0];
883 }
884 
890 template<typename NumericT, typename ScalarT>
891 void max_impl(vector_base<NumericT> const & vec1,
892  ScalarT & result)
893 {
894  typedef NumericT value_type;
895 
896  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
897 
901 
902  vcl_size_t thread_count=1;
903 
904 #ifdef VIENNACL_WITH_OPENMP
906  thread_count = omp_get_max_threads();
907 #endif
908 
909  std::vector<value_type> temp(thread_count);
910 
911 #ifdef VIENNACL_WITH_OPENMP
912  #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
913 #endif
914  {
915  vcl_size_t id = 0;
916 #ifdef VIENNACL_WITH_OPENMP
917  id = omp_get_thread_num();
918 #endif
919  vcl_size_t begin = (size1 * id) / thread_count;
920  vcl_size_t end = (size1 * (id + 1)) / thread_count;
921  temp[id] = data_vec1[start1];
922 
923  for (vcl_size_t i = begin; i < end; ++i)
924  {
925  value_type v = data_vec1[i*inc1+start1];//Note: Assignment to 'vec1' in std::min might be expensive, thus 'v' is used for the function
926  temp[id] = std::max<value_type>(temp[id],v);
927  }
928  }
929  for (vcl_size_t i = 1; i < thread_count; ++i)
930  temp[0] = std::max<value_type>( temp[0], temp[i]);
931  result = temp[0];//Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
932 }
933 
939 template<typename NumericT, typename ScalarT>
940 void min_impl(vector_base<NumericT> const & vec1,
941  ScalarT & result)
942 {
943  typedef NumericT value_type;
944 
945  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
946 
950 
951  vcl_size_t thread_count=1;
952 
953 #ifdef VIENNACL_WITH_OPENMP
955  thread_count = omp_get_max_threads();
956 #endif
957 
958  std::vector<value_type> temp(thread_count);
959 
960 #ifdef VIENNACL_WITH_OPENMP
961  #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
962 #endif
963  {
964  vcl_size_t id = 0;
965 #ifdef VIENNACL_WITH_OPENMP
966  id = omp_get_thread_num();
967 #endif
968  vcl_size_t begin = (size1 * id) / thread_count;
969  vcl_size_t end = (size1 * (id + 1)) / thread_count;
970  temp[id] = data_vec1[start1];
971 
972  for (vcl_size_t i = begin; i < end; ++i)
973  {
974  value_type v = data_vec1[i*inc1+start1];//Note: Assignment to 'vec1' in std::min might be expensive, thus 'v' is used for the function
975  temp[id] = std::min<value_type>(temp[id],v);
976  }
977  }
978  for (vcl_size_t i = 1; i < thread_count; ++i)
979  temp[0] = std::min<value_type>( temp[0], temp[i]);
980  result = temp[0];//Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
981 }
982 
988 template<typename NumericT, typename ScalarT>
989 void sum_impl(vector_base<NumericT> const & vec1,
990  ScalarT & result)
991 {
992  typedef NumericT value_type;
993 
994  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
995 
999 
1000  value_type temp = 0;
1001 #ifdef VIENNACL_WITH_OPENMP
1002  #pragma omp parallel for reduction(+:temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
1003 #endif
1004  for (long i = 0; i < static_cast<long>(size1); ++i)
1005  temp += data_vec1[static_cast<vcl_size_t>(i)*inc1+start1];
1006 
1007  result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
1008 }
1009 
1019 template<typename NumericT>
1021  vector_base<NumericT> & vec2,
1022  NumericT alpha, NumericT beta)
1023 {
1024  typedef NumericT value_type;
1025 
1026  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
1027  value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
1028 
1030  vcl_size_t inc1 = viennacl::traits::stride(vec1);
1032 
1034  vcl_size_t inc2 = viennacl::traits::stride(vec2);
1035 
1036  value_type data_alpha = alpha;
1037  value_type data_beta = beta;
1038 
1039 #ifdef VIENNACL_WITH_OPENMP
1040  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
1041 #endif
1042  for (long i = 0; i < static_cast<long>(size1); ++i)
1043  {
1044  value_type temp1 = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1];
1045  value_type temp2 = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2];
1046 
1047  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_alpha * temp1 + data_beta * temp2;
1048  data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] = data_alpha * temp2 - data_beta * temp1;
1049  }
1050 }
1051 
1052 namespace detail
1053 {
1055  template<typename NumericT>
1057  vector_base<NumericT> & vec2,
1058  bool is_inclusive)
1059  {
1060  NumericT const * data_vec1 = detail::extract_raw_pointer<NumericT>(vec1);
1061  NumericT * data_vec2 = detail::extract_raw_pointer<NumericT>(vec2);
1062 
1064  vcl_size_t inc1 = viennacl::traits::stride(vec1);
1066  if (size1 < 1)
1067  return;
1068 
1070  vcl_size_t inc2 = viennacl::traits::stride(vec2);
1071 
1072 #ifdef VIENNACL_WITH_OPENMP
1073  if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
1074  {
1075  std::vector<NumericT> thread_results(omp_get_max_threads());
1076 
1077  // inclusive scan each thread segment:
1078  #pragma omp parallel
1079  {
1080  vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
1081  vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
1082  vcl_size_t thread_stop = std::min<vcl_size_t>(thread_start + work_per_thread, size1);
1083 
1084  NumericT thread_sum = 0;
1085  for(vcl_size_t i = thread_start; i < thread_stop; i++)
1086  thread_sum += data_vec1[i * inc1 + start1];
1087 
1088  thread_results[omp_get_thread_num()] = thread_sum;
1089  }
1090 
1091  // exclusive-scan of thread results:
1092  NumericT current_offset = 0;
1093  for (vcl_size_t i=0; i<thread_results.size(); ++i)
1094  {
1095  NumericT tmp = thread_results[i];
1096  thread_results[i] = current_offset;
1097  current_offset += tmp;
1098  }
1099 
1100  // exclusive/inclusive scan of each segment with correct offset:
1101  #pragma omp parallel
1102  {
1103  vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
1104  vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
1105  vcl_size_t thread_stop = std::min<vcl_size_t>(thread_start + work_per_thread, size1);
1106 
1107  NumericT thread_sum = thread_results[omp_get_thread_num()];
1108  if (is_inclusive)
1109  {
1110  for(vcl_size_t i = thread_start; i < thread_stop; i++)
1111  {
1112  thread_sum += data_vec1[i * inc1 + start1];
1113  data_vec2[i * inc2 + start2] = thread_sum;
1114  }
1115  }
1116  else
1117  {
1118  for(vcl_size_t i = thread_start; i < thread_stop; i++)
1119  {
1120  NumericT tmp = data_vec1[i * inc1 + start1];
1121  data_vec2[i * inc2 + start2] = thread_sum;
1122  thread_sum += tmp;
1123  }
1124  }
1125  }
1126  } else
1127 #endif
1128  {
1129  NumericT sum = 0;
1130  if (is_inclusive)
1131  {
1132  for(vcl_size_t i = 0; i < size1; i++)
1133  {
1134  sum += data_vec1[i * inc1 + start1];
1135  data_vec2[i * inc2 + start2] = sum;
1136  }
1137  }
1138  else
1139  {
1140  for(vcl_size_t i = 0; i < size1; i++)
1141  {
1142  NumericT tmp = data_vec1[i * inc1 + start1];
1143  data_vec2[i * inc2 + start2] = sum;
1144  sum += tmp;
1145  }
1146  }
1147  }
1148 
1149  }
1150 }
1151 
1160 template<typename NumericT>
1162  vector_base<NumericT> & vec2)
1163 {
1164  detail::vector_scan_impl(vec1, vec2, true);
1165 }
1166 
1175 template<typename NumericT>
1177  vector_base<NumericT> & vec2)
1178 {
1179  detail::vector_scan_impl(vec1, vec2, false);
1180 }
1181 
1182 
1183 } //namespace host_based
1184 } //namespace linalg
1185 } //namespace viennacl
1186 
1187 
1188 #endif
vcl_size_t const_size() const
Definition: vector.hpp:1143
#define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART)
#define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART)
#define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART)
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
void inclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan on the host using OpenMP.
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
Definition: sum.hpp:45
void norm_1_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^1-norm of a vector.
Generic size and resize functionality for different vector and matrix types.
void norm_inf_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the supremum-norm of a vector.
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void sum_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the sum of all elements from the vector.
#define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART)
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
Worker class for decomposing expression templates.
Definition: op_applier.hpp:43
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
Determines row and column increments for matrices and matrix proxies.
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:239
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
float NumericT
Definition: bisect.cpp:40
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
void norm_2_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^2-norm of a vector - implementation.
#define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART)
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
Definition: forwards.h:269
void min_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the minimum of a vector.
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied.
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Common base class for dense vectors, vector ranges, and vector slices.
Definition: vector_def.hpp:104
std::size_t vcl_size_t
Definition: forwards.h:75
Common routines for single-threaded or OpenMP-enabled execution on CPU.
void exclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan on the host using OpenMP.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void vector_scan_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2, bool is_inclusive)
Implementation of inclusive_scan and exclusive_scan for the host (OpenMP) backend.
void max_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
#define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART)
VectorType const & const_at(vcl_size_t i) const
Definition: vector.hpp:1146
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:130
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Definition: vector_def.hpp:120
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:134
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void inner_prod_impl(vector_base< T > const &x, vector_tuple< T > const &y_tuple, vector_base< T > &result)
Computes the inner products , , ..., and writes the result to a (sub-)vector...
Implementation of the ViennaCL scalar class.
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_1_impl(viennacl::vector_expression< LHS, RHS, OP > const &vec, S2 &result)
Computes the l^1-norm of a vector - interface for a vector expression. Creates a temporary.
#define VIENNACL_OPENMP_VECTOR_MIN_SIZE
Simple enable-if variant that uses the SFINAE pattern.