ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
vector_int.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2016, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 
23 //
24 // *** System
25 //
26 #include <iostream>
27 #include <iomanip>
28 
29 //
30 // *** ViennaCL
31 //
32 #include "viennacl/vector.hpp"
39 #include "viennacl/linalg/sum.hpp"
40 
41 
42 //
43 // -------------------------------------------------------------
44 //
45 template<typename ScalarType>
47 {
49  return s1 - s2;
50 }
51 //
52 // -------------------------------------------------------------
53 //
54 template<typename ScalarType>
56 {
58  return s1 - s2;
59 }
60 //
61 // -------------------------------------------------------------
62 //
63 template<typename ScalarType>
65 {
67  return s1 - s2;
68 }
69 //
70 // -------------------------------------------------------------
71 //
72 template<typename ScalarType, typename VCLVectorType>
73 ScalarType diff(std::vector<ScalarType> const & v1, VCLVectorType const & v2)
74 {
75  std::vector<ScalarType> v2_cpu(v2.size());
76  viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8)
77  viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin());
78 
79  for (unsigned int i=0;i<v1.size(); ++i)
80  {
81  if (v2_cpu[i] != v1[i])
82  return 1;
83  }
84 
85  return 0;
86 }
87 
88 template<typename T1, typename T2>
89 int check(T1 const & t1, T2 const & t2)
90 {
91  int retval = EXIT_SUCCESS;
92 
93  if (diff(t1, t2) != 0)
94  {
95  std::cout << "# Error! Difference: " << std::abs(diff(t1, t2)) << std::endl;
96  retval = EXIT_FAILURE;
97  }
98  return retval;
99 }
100 
101 
102 //
103 // -------------------------------------------------------------
104 //
105 template< typename NumericT, typename STLVectorType, typename ViennaCLVectorType1, typename ViennaCLVectorType2 >
106 int test(STLVectorType & std_v1, STLVectorType & std_v2,
107  ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
108 {
109  int retval = EXIT_SUCCESS;
110 
111  NumericT cpu_result = 42;
112  viennacl::scalar<NumericT> gpu_result = 43;
113 
114  //
115  // Initializer:
116  //
117  std::cout << "Checking for zero_vector initializer..." << std::endl;
118  for (std::size_t i=0; i<std_v1.size(); ++i)
119  std_v1[i] = 0;
120  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
121  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
122  return EXIT_FAILURE;
123 
124  std::cout << "Checking for scalar_vector initializer..." << std::endl;
125  for (std::size_t i=0; i<std_v1.size(); ++i)
126  std_v1[i] = cpu_result;
127  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
128  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
129  return EXIT_FAILURE;
130 
131  for (std::size_t i=0; i<std_v1.size(); ++i)
132  std_v1[i] = cpu_result + 1;
133  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
134  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
135  return EXIT_FAILURE;
136 
137  std::cout << "Checking for unit_vector initializer..." << std::endl;
138  for (std::size_t i=0; i<std_v1.size(); ++i)
139  std_v1[i] = (i == 5) ? 1 : 0;
140  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
141  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
142  return EXIT_FAILURE;
143 
144  for (std::size_t i=0; i<std_v1.size(); ++i)
145  {
146  std_v1[i] = NumericT(i);
147  std_v2[i] = NumericT(i+42);
148  }
149 
150  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); //resync
151  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
152 
153  std::cout << "Checking for successful copy..." << std::endl;
154  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
155  return EXIT_FAILURE;
156  if (check(std_v2, vcl_v2) != EXIT_SUCCESS)
157  return EXIT_FAILURE;
158 
159  //
160  // Part 1: Norms and inner product
161  //
162 
163  // --------------------------------------------------------------------------
164  std::cout << "Testing inner_prod..." << std::endl;
165  cpu_result = 0;
166  for (std::size_t i=0; i<std_v1.size(); ++i)
167  cpu_result += std_v1[i] * std_v2[i];
168  NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
169  gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
170 
171  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
172  return EXIT_FAILURE;
173  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
174  return EXIT_FAILURE;
175 
176  cpu_result = 0;
177  for (std::size_t i=0; i<std_v1.size(); ++i)
178  cpu_result += (std_v1[i] + std_v2[i]) * (std_v2[i] - std_v1[i]);
179  NumericT cpu_result3 = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2 - vcl_v1);
180  gpu_result = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2 - vcl_v1);
181 
182  if (check(cpu_result, cpu_result3) != EXIT_SUCCESS)
183  return EXIT_FAILURE;
184  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
185  return EXIT_FAILURE;
186 
187  // --------------------------------------------------------------------------
188  // --------------------------------------------------------------------------
189  std::cout << "Testing norm_1..." << std::endl;
190  cpu_result = 0;
191  for (std::size_t i=0; i<std_v1.size(); ++i) //note: norm_1 broken for unsigned ints on MacOS
192  cpu_result += std::abs(std_v1[i]);
193  gpu_result = viennacl::linalg::norm_1(vcl_v1);
194 
195  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
196  return EXIT_FAILURE;
197 
198  cpu_result2 = 0; //reset
199  for (std::size_t i=0; i<std_v1.size(); ++i) //note: norm_1 broken for unsigned ints on MacOS
200  cpu_result2 += std::abs(std_v1[i]);
201  cpu_result = viennacl::linalg::norm_1(vcl_v1);
202 
203  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
204  return EXIT_FAILURE;
205 
206  cpu_result2 = 0;
207  for (std::size_t i=0; i<std_v1.size(); ++i) //note: norm_1 broken for unsigned ints on MacOS
208  cpu_result2 += std::abs(std_v1[i] + std_v2[i]);
209  cpu_result = viennacl::linalg::norm_1(vcl_v1 + vcl_v2);
210 
211  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
212  return EXIT_FAILURE;
213 
214  // --------------------------------------------------------------------------
215  std::cout << "Testing norm_inf..." << std::endl;
216  cpu_result = 0;
217  for (std::size_t i=0; i<std_v1.size(); ++i)
218  if (std::abs(std_v1[i]) > cpu_result)
219  cpu_result = std::abs(std_v1[i]);
220  gpu_result = viennacl::linalg::norm_inf(vcl_v1);
221 
222  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
223  return EXIT_FAILURE;
224 
225  cpu_result2 = cpu_result;
226  cpu_result = 0;
227  cpu_result = viennacl::linalg::norm_inf(vcl_v1);
228 
229  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
230  return EXIT_FAILURE;
231 
232  cpu_result2 = 0;
233  for (std::size_t i=0; i<std_v1.size(); ++i)
234  if (std_v1[i] + std_v2[i] > cpu_result2)
235  cpu_result2 = std::abs(std_v1[i] + std_v2[i]);
236  cpu_result = viennacl::linalg::norm_inf(vcl_v1 + vcl_v2);
237 
238  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
239  return EXIT_FAILURE;
240 
241  // --------------------------------------------------------------------------
242  std::cout << "Testing index_norm_inf..." << std::endl;
243 
244  std::size_t cpu_index = 0;
245  cpu_result = 0;
246  for (std::size_t i=0; i<std_v1.size(); ++i)
247  if (std::abs(std_v1[i]) > cpu_result)
248  {
249  cpu_result = std::abs(std_v1[i]);
250  cpu_index = i;
251  }
252  std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1);
253 
254  if (check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index)) != EXIT_SUCCESS)
255  return EXIT_FAILURE;
256  // --------------------------------------------------------------------------
257  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)];
258 
259  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
260  return EXIT_FAILURE;
261 
262  cpu_index = 0;
263  cpu_result = 0;
264  for (std::size_t i=0; i<std_v1.size(); ++i)
265  if (std::abs(std_v1[i] + std_v2[i]) > cpu_result)
266  {
267  cpu_result = std::abs(std_v1[i] + std_v2[i]);
268  cpu_index = i;
269  }
270  cpu_result = std_v1[cpu_index];
271  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)];
272 
273  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
274  return EXIT_FAILURE;
275 
276  // --------------------------------------------------------------------------
277  std::cout << "Testing max..." << std::endl;
278  cpu_result = std_v1[0];
279  for (std::size_t i=0; i<std_v1.size(); ++i)
280  cpu_result = std::max<NumericT>(cpu_result, std_v1[i]);
281  gpu_result = viennacl::linalg::max(vcl_v1);
282 
283  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
284  return EXIT_FAILURE;
285 
286  cpu_result = std_v1[0];
287  for (std::size_t i=0; i<std_v1.size(); ++i)
288  cpu_result = std::max<NumericT>(cpu_result, std_v1[i]);
289  gpu_result = cpu_result;
290  cpu_result *= 2; //reset
291  cpu_result = viennacl::linalg::max(vcl_v1);
292 
293  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
294  return EXIT_FAILURE;
295 
296  cpu_result = std_v1[0] + std_v2[0];
297  for (std::size_t i=0; i<std_v1.size(); ++i)
298  cpu_result = std::max<NumericT>(cpu_result, std_v1[i] + std_v2[i]);
299  gpu_result = cpu_result;
300  cpu_result *= 2; //reset
301  cpu_result = viennacl::linalg::max(vcl_v1 + vcl_v2);
302 
303  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
304  return EXIT_FAILURE;
305 
306 
307  // --------------------------------------------------------------------------
308  std::cout << "Testing min..." << std::endl;
309  cpu_result = std_v1[0];
310  for (std::size_t i=0; i<std_v1.size(); ++i)
311  cpu_result = std::min<NumericT>(cpu_result, std_v1[i]);
312  gpu_result = viennacl::linalg::min(vcl_v1);
313 
314  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
315  return EXIT_FAILURE;
316 
317  cpu_result = std_v1[0];
318  for (std::size_t i=0; i<std_v1.size(); ++i)
319  cpu_result = std::min<NumericT>(cpu_result, std_v1[i]);
320  gpu_result = cpu_result;
321  cpu_result *= 2; //reset
322  cpu_result = viennacl::linalg::min(vcl_v1);
323 
324  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
325  return EXIT_FAILURE;
326 
327  cpu_result = std_v1[0] + std_v2[0];
328  for (std::size_t i=0; i<std_v1.size(); ++i)
329  cpu_result = std::min<NumericT>(cpu_result, std_v1[i] + std_v2[i]);
330  gpu_result = cpu_result;
331  cpu_result *= 2; //reset
332  cpu_result = viennacl::linalg::min(vcl_v1 + vcl_v2);
333 
334  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
335  return EXIT_FAILURE;
336 
337  // --------------------------------------------------------------------------
338  std::cout << "Testing sum..." << std::endl;
339  cpu_result = 0;
340  for (std::size_t i=0; i<std_v1.size(); ++i)
341  cpu_result += std_v1[i];
342  cpu_result2 = viennacl::linalg::sum(vcl_v1);
343  gpu_result = viennacl::linalg::sum(vcl_v1);
344 
345  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
346  return EXIT_FAILURE;
347  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
348  return EXIT_FAILURE;
349 
350  cpu_result = 0;
351  for (std::size_t i=0; i<std_v1.size(); ++i)
352  cpu_result += std_v1[i] + std_v2[i];
353  cpu_result3 = viennacl::linalg::sum(vcl_v1 + vcl_v2);
354  gpu_result = viennacl::linalg::sum(vcl_v1 + vcl_v2);
355 
356  if (check(cpu_result, cpu_result3) != EXIT_SUCCESS)
357  return EXIT_FAILURE;
358  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
359  return EXIT_FAILURE;
360 
361 
362  //
363  // Plane rotation and assignments
364  //
365 
366  // --------------------------------------------------------------------------
367 
368  std::vector<NumericT> x = std_v1;
369  std::vector<NumericT> y = std_v2;
370  for (std::size_t i=0; i<std_v1.size(); ++i)
371  {
372  NumericT tmp;
373  tmp = NumericT(1) * x[i] + NumericT(2) * y[i];
374  y[i] = - NumericT(2) * x[i] + NumericT(1) * y[i];
375  x[i] = tmp;
376  }
377 
378  viennacl::linalg::plane_rotation(vcl_v1, vcl_v2, NumericT(1), NumericT(2));
379 
380  if (check(x, vcl_v1) != EXIT_SUCCESS)
381  return EXIT_FAILURE;
382  if (check(y, vcl_v2) != EXIT_SUCCESS)
383  return EXIT_FAILURE;
384 
385  // --------------------------------------------------------------------------
386 
387  std::cout << "Testing assignments..." << std::endl;
388  NumericT val = static_cast<NumericT>(1);
389  for (size_t i=0; i < std_v1.size(); ++i)
390  std_v1[i] = val;
391 
392  for (size_t i=0; i < vcl_v1.size(); ++i)
393  vcl_v1(i) = val;
394 
395  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
396  return EXIT_FAILURE;
397 
398 
399  //
400  // multiplication and division of vectors by scalars
401  //
402  std::cout << "Testing scaling with CPU scalar..." << std::endl;
403  NumericT alpha = static_cast<NumericT>(3);
404  viennacl::scalar<NumericT> gpu_alpha = alpha;
405 
406  for (std::size_t i=0; i<std_v1.size(); ++i)
407  std_v1[i] *= alpha;
408  vcl_v1 *= alpha;
409 
410  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
411  return EXIT_FAILURE;
412 
413  std::cout << "Testing scaling with GPU scalar..." << std::endl;
414  for (std::size_t i=0; i<std_v1.size(); ++i)
415  std_v1[i] *= alpha;
416  vcl_v1 *= gpu_alpha;
417 
418  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
419  return EXIT_FAILURE;
420 
421  NumericT beta = static_cast<NumericT>(2);
422  viennacl::scalar<NumericT> gpu_beta = beta;
423 
424  std::cout << "Testing shrinking with CPU scalar..." << std::endl;
425  for (std::size_t i=0; i<std_v1.size(); ++i)
426  std_v1[i] /= beta;
427  vcl_v1 /= beta;
428 
429  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
430  return EXIT_FAILURE;
431 
432  std::cout << "Testing shrinking with GPU scalar..." << std::endl;
433  for (std::size_t i=0; i<std_v1.size(); ++i)
434  std_v1[i] /= beta;
435  vcl_v1 /= gpu_beta;
436 
437  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
438  return EXIT_FAILURE;
439 
440 
441  //
442  // add and inplace_add of vectors
443  //
444  for (size_t i=0; i < std_v1.size(); ++i)
445  std_v1[i] = NumericT(i);
446  for (std::size_t i=0; i<std_v1.size(); ++i)
447  std_v2[i] = 3 * std_v1[i];
448  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); //resync
449  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
450 
451  std::cout << "Testing add on vector..." << std::endl;
452 
453  std::cout << "Checking for successful copy..." << std::endl;
454  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
455  return EXIT_FAILURE;
456  if (check(std_v2, vcl_v2) != EXIT_SUCCESS)
457  return EXIT_FAILURE;
458 
459  for (std::size_t i=0; i<std_v1.size(); ++i)
460  std_v1[i] = std_v1[i] + std_v2[i];
461  vcl_v1 = vcl_v1 + vcl_v2;
462 
463  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
464  return EXIT_FAILURE;
465 
466  std::cout << "Testing add on vector with flipsign..." << std::endl;
467  for (std::size_t i=0; i<std_v1.size(); ++i)
468  std_v1[i] = - std_v1[i] + std_v2[i];
469  vcl_v1 = - vcl_v1 + vcl_v2;
470 
471  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
472  return EXIT_FAILURE;
473 
474  std::cout << "Testing inplace-add on vector..." << std::endl;
475  for (std::size_t i=0; i<std_v1.size(); ++i)
476  std_v1[i] += std_v2[i];
477  vcl_v1 += vcl_v2;
478 
479  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
480  return EXIT_FAILURE;
481 
482  //
483  // subtract and inplace_subtract of vectors
484  //
485  std::cout << "Testing sub on vector..." << std::endl;
486  for (std::size_t i=0; i<std_v1.size(); ++i)
487  std_v2[i] = 3 * std_v1[i];
488  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
489  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
490 
491  for (std::size_t i=0; i<std_v1.size(); ++i)
492  std_v1[i] = std_v1[i] - std_v2[i];
493  vcl_v1 = vcl_v1 - vcl_v2;
494 
495  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
496  return EXIT_FAILURE;
497 
498  std::cout << "Testing inplace-sub on vector..." << std::endl;
499  for (std::size_t i=0; i<std_v1.size(); ++i)
500  std_v1[i] -= std_v2[i];
501  vcl_v1 -= vcl_v2;
502 
503  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
504  return EXIT_FAILURE;
505 
506 
507 
508  //
509  // multiply-add
510  //
511  std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
512  for (size_t i=0; i < std_v1.size(); ++i)
513  std_v1[i] = NumericT(i);
514  for (std::size_t i=0; i<std_v1.size(); ++i)
515  std_v2[i] = 3 * std_v1[i];
516  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
517  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
518 
519  for (std::size_t i=0; i<std_v1.size(); ++i)
520  std_v1[i] = std_v1[i] + alpha * std_v2[i];
521  vcl_v1 = vcl_v1 + alpha * vcl_v2;
522 
523  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
524  return EXIT_FAILURE;
525 
526  std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
527  for (std::size_t i=0; i<std_v1.size(); ++i)
528  std_v2[i] = 3 * std_v1[i];
529  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
530  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
531 
532  for (std::size_t i=0; i<std_v1.size(); ++i)
533  std_v1[i] = alpha * std_v1[i] + std_v2[i];
534  vcl_v1 = alpha * vcl_v1 + vcl_v2;
535 
536  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
537  return EXIT_FAILURE;
538 
539  std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
540  for (std::size_t i=0; i<std_v1.size(); ++i)
541  std_v2[i] = 3 * std_v1[i];
542  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
543  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
544 
545  for (std::size_t i=0; i<std_v1.size(); ++i)
546  std_v1[i] = alpha * std_v1[i] + beta * std_v2[i];
547  vcl_v1 = alpha * vcl_v1 + beta * vcl_v2;
548 
549  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
550  return EXIT_FAILURE;
551 
552 
553  std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
554  for (std::size_t i=0; i<std_v1.size(); ++i)
555  std_v2[i] = 3 * std_v1[i];
556  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
557  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
558 
559  for (std::size_t i=0; i<std_v1.size(); ++i)
560  std_v1[i] += alpha * std_v2[i];
561  vcl_v1 += alpha * vcl_v2;
562 
563  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
564  return EXIT_FAILURE;
565 
566 
567  std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
568  for (std::size_t i=0; i<std_v1.size(); ++i)
569  std_v2[i] = 3 * std_v1[i];
570  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
571  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
572 
573  for (std::size_t i=0; i<std_v1.size(); ++i)
574  std_v1[i] = std_v1[i] + alpha * std_v2[i];
575  vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
576 
577  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
578  return EXIT_FAILURE;
579 
580  std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
581  for (std::size_t i=0; i<std_v1.size(); ++i)
582  std_v2[i] = 3 * std_v1[i];
583  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
584  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
585 
586  for (std::size_t i=0; i<std_v1.size(); ++i)
587  std_v1[i] = std_v1[i] + alpha * std_v2[i];
588  vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
589 
590  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
591  return EXIT_FAILURE;
592 
593  std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
594  for (std::size_t i=0; i<std_v1.size(); ++i)
595  std_v2[i] = 3 * std_v1[i];
596  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
597  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
598 
599  for (std::size_t i=0; i<std_v1.size(); ++i)
600  std_v1[i] = alpha * std_v1[i] + beta * std_v2[i];
601  vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
602 
603  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
604  return EXIT_FAILURE;
605 
606 
607  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
608  for (std::size_t i=0; i<std_v1.size(); ++i)
609  std_v2[i] = 3 * std_v1[i];
610  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
611  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
612 
613  for (std::size_t i=0; i<std_v1.size(); ++i)
614  std_v1[i] += alpha * std_v1[i] + beta * std_v2[i];
615  vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
616 
617  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
618  return EXIT_FAILURE;
619 
620  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
621  for (std::size_t i=0; i<std_v1.size(); ++i)
622  std_v2[i] = 3 * std_v1[i];
623  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
624  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
625 
626  for (std::size_t i=0; i<std_v1.size(); ++i)
627  std_v1[i] += alpha * std_v1[i] - beta * std_v2[i];
628  vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
629 
630  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
631  return EXIT_FAILURE;
632 
633 
634 
635  std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
636  for (std::size_t i=0; i<std_v1.size(); ++i)
637  std_v2[i] = 3 * std_v1[i];
638  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
639  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
640 
641  for (std::size_t i=0; i<std_v1.size(); ++i)
642  std_v1[i] += alpha * std_v2[i];
643  vcl_v1 += gpu_alpha * vcl_v2;
644 
645  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
646  return EXIT_FAILURE;
647 
648 
649  //
650  // division-add
651  //
652  std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl;
653  for (size_t i=0; i < std_v1.size(); ++i)
654  std_v1[i] = NumericT(i);
655  for (std::size_t i=0; i<std_v1.size(); ++i)
656  std_v2[i] = 3 * std_v1[i];
657  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
658  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
659 
660  for (std::size_t i=0; i<std_v1.size(); ++i)
661  std_v1[i] = std_v1[i] + std_v2[i] / alpha;
662  vcl_v1 = vcl_v1 + vcl_v2 / alpha;
663 
664  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
665  return EXIT_FAILURE;
666 
667 
668  std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl;
669  for (std::size_t i=0; i<std_v1.size(); ++i)
670  std_v2[i] = 3 * std_v1[i];
671  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
672  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
673 
674  for (std::size_t i=0; i<std_v1.size(); ++i)
675  std_v1[i] = std_v1[i] / alpha + std_v2[i];
676  vcl_v1 = vcl_v1 / alpha + vcl_v2;
677 
678  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
679  return EXIT_FAILURE;
680 
681  std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl;
682  for (std::size_t i=0; i<std_v1.size(); ++i)
683  std_v2[i] = 3 * std_v1[i];
684  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
685  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
686 
687  for (std::size_t i=0; i<std_v1.size(); ++i)
688  std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta;
689  vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta;
690 
691  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
692  return EXIT_FAILURE;
693 
694  std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl;
695  for (std::size_t i=0; i<std_v1.size(); ++i)
696  std_v2[i] = 3 * std_v1[i];
697  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
698  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
699 
700  for (std::size_t i=0; i<std_v1.size(); ++i)
701  std_v1[i] = std_v1[i] / alpha + std_v2[i] * beta;
702  vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;
703 
704  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
705  return EXIT_FAILURE;
706 
707 
708  std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl;
709  for (std::size_t i=0; i<std_v1.size(); ++i)
710  std_v2[i] = 3 * std_v1[i];
711  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
712  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
713 
714  for (std::size_t i=0; i<std_v1.size(); ++i)
715  std_v1[i] = std_v1[i] * alpha + std_v2[i] / beta;
716  vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;
717 
718  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
719  return EXIT_FAILURE;
720 
721 
722 
723  std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl;
724  for (std::size_t i=0; i<std_v1.size(); ++i)
725  std_v2[i] = 3 * std_v1[i];
726  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
727  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
728 
729  for (std::size_t i=0; i<std_v1.size(); ++i)
730  std_v1[i] += std_v2[i] / alpha;
731  vcl_v1 += vcl_v2 / alpha;
732 
733  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
734  return EXIT_FAILURE;
735 
736 
737  std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl;
738  for (std::size_t i=0; i<std_v1.size(); ++i)
739  std_v2[i] = 3 * std_v1[i];
740  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
741  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
742 
743  for (std::size_t i=0; i<std_v1.size(); ++i)
744  std_v1[i] = std_v1[i] + std_v2[i] / alpha;
745  vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
746 
747  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
748  return EXIT_FAILURE;
749 
750  std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl;
751  for (std::size_t i=0; i<std_v1.size(); ++i)
752  std_v2[i] = 3 * std_v1[i];
753  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
754  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
755 
756  for (std::size_t i=0; i<std_v1.size(); ++i)
757  std_v1[i] = std_v1[i] + std_v2[i] / alpha;
758  vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
759 
760  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
761  return EXIT_FAILURE;
762 
763  std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl;
764  for (std::size_t i=0; i<std_v1.size(); ++i)
765  std_v2[i] = 3 * std_v1[i];
766  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
767  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
768 
769  for (std::size_t i=0; i<std_v1.size(); ++i)
770  std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta;
771  vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
772 
773  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
774  return EXIT_FAILURE;
775 
776 
777  std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
778  for (std::size_t i=0; i<std_v1.size(); ++i)
779  std_v2[i] = 3 * std_v1[i];
780  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
781  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
782 
783  for (std::size_t i=0; i<std_v1.size(); ++i)
784  std_v1[i] += std_v1[i] / alpha + std_v2[i] / beta;
785  vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
786 
787  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
788  return EXIT_FAILURE;
789 
790  std::cout << "Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl;
791  for (std::size_t i=0; i<std_v1.size(); ++i)
792  std_v2[i] = 3 * std_v1[i];
793  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
794  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
795 
796  for (std::size_t i=0; i<std_v1.size(); ++i)
797  std_v1[i] += std_v1[i] / alpha - std_v2[i] / beta;
798  vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
799 
800  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
801  return EXIT_FAILURE;
802 
803  std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
804  for (std::size_t i=0; i<std_v1.size(); ++i)
805  std_v2[i] = 3 * std_v1[i];
806  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
807  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
808 
809  for (std::size_t i=0; i<std_v1.size(); ++i)
810  std_v1[i] += std_v1[i] / alpha + std_v2[i] * beta;
811  vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
812 
813  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
814  return EXIT_FAILURE;
815 
816  std::cout << "Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl;
817  for (std::size_t i=0; i<std_v1.size(); ++i)
818  std_v2[i] = 3 * std_v1[i];
819  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
820  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
821 
822  for (std::size_t i=0; i<std_v1.size(); ++i)
823  std_v1[i] += std_v1[i] * alpha - std_v2[i] / beta;
824  vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
825 
826  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
827  return EXIT_FAILURE;
828 
829 
830 
831  std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl;
832  for (std::size_t i=0; i<std_v1.size(); ++i)
833  std_v2[i] = 3 * std_v1[i];
834  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
835  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
836 
837  for (std::size_t i=0; i<std_v1.size(); ++i)
838  std_v1[i] += std_v2[i] * alpha;
839  vcl_v1 += vcl_v2 * gpu_alpha;
840 
841  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
842  return EXIT_FAILURE;
843 
844  //
845  // multiply-subtract
846  //
847  std::cout << "Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
848  for (size_t i=0; i < std_v1.size(); ++i)
849  std_v1[i] = NumericT(i);
850  for (std::size_t i=0; i<std_v1.size(); ++i)
851  std_v2[i] = 3 * std_v1[i];
852  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
853  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
854 
855  for (std::size_t i=0; i<std_v1.size(); ++i)
856  std_v1[i] = std_v1[i] - alpha * std_v2[i];
857  vcl_v1 = vcl_v1 - alpha * vcl_v2;
858 
859  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
860  return EXIT_FAILURE;
861 
862 
863  std::cout << "Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
864  for (std::size_t i=0; i<std_v1.size(); ++i)
865  std_v2[i] = 3 * std_v1[i];
866  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
867  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
868 
869  for (std::size_t i=0; i<std_v1.size(); ++i)
870  std_v1[i] = alpha * std_v1[i] - std_v2[i];
871  vcl_v1 = alpha * vcl_v1 - vcl_v2;
872 
873  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
874  return EXIT_FAILURE;
875 
876  std::cout << "Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
877  for (std::size_t i=0; i<std_v1.size(); ++i)
878  std_v2[i] = 3 * std_v1[i];
879  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
880  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
881 
882  for (std::size_t i=0; i<std_v1.size(); ++i)
883  std_v1[i] = alpha * std_v1[i] - beta * std_v2[i];
884  vcl_v1 = alpha * vcl_v1 - beta * vcl_v2;
885 
886  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
887  return EXIT_FAILURE;
888 
889 
890  std::cout << "Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
891  for (std::size_t i=0; i<std_v1.size(); ++i)
892  std_v2[i] = 3 * std_v1[i];
893  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
894  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
895 
896  for (std::size_t i=0; i<std_v1.size(); ++i)
897  std_v1[i] -= alpha * std_v2[i];
898  vcl_v1 -= alpha * vcl_v2;
899 
900  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
901  return EXIT_FAILURE;
902 
903 
904  std::cout << "Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
905  for (std::size_t i=0; i<std_v1.size(); ++i)
906  std_v2[i] = 3 * std_v1[i];
907  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
908  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
909 
910  for (std::size_t i=0; i<std_v1.size(); ++i)
911  std_v1[i] = std_v1[i] - alpha * std_v2[i];
912  vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
913 
914  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
915  return EXIT_FAILURE;
916 
917  std::cout << "Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
918  for (std::size_t i=0; i<std_v1.size(); ++i)
919  std_v2[i] = 3 * std_v1[i];
920  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
921  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
922 
923  for (std::size_t i=0; i<std_v1.size(); ++i)
924  std_v1[i] = std_v1[i] - alpha * std_v2[i];
925  vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
926 
927  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
928  return EXIT_FAILURE;
929 
930  std::cout << "Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
931  for (std::size_t i=0; i<std_v1.size(); ++i)
932  std_v2[i] = 3 * std_v1[i];
933  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
934  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
935 
936  for (std::size_t i=0; i<std_v1.size(); ++i)
937  std_v1[i] = alpha * std_v1[i] - beta * std_v2[i];
938  vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
939 
940  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
941  return EXIT_FAILURE;
942 
943  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
944  for (std::size_t i=0; i<std_v1.size(); ++i)
945  std_v2[i] = 3 * std_v1[i];
946  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
947  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
948 
949  for (std::size_t i=0; i<std_v1.size(); ++i)
950  std_v1[i] -= alpha * std_v1[i] + beta * std_v2[i];
951  vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
952 
953  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
954  return EXIT_FAILURE;
955 
956  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
957  for (std::size_t i=0; i<std_v1.size(); ++i)
958  std_v2[i] = 3 * std_v1[i];
959  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
960  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
961 
962  for (std::size_t i=0; i<std_v1.size(); ++i)
963  std_v1[i] -= alpha * std_v1[i] - beta * std_v2[i];
964  vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
965 
966  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
967  return EXIT_FAILURE;
968 
969 
970  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
971  for (std::size_t i=0; i<std_v1.size(); ++i)
972  std_v2[i] = 3 * std_v1[i];
973  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
974  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
975 
976  for (std::size_t i=0; i<std_v1.size(); ++i)
977  std_v1[i] -= alpha * std_v2[i];
978  vcl_v1 -= gpu_alpha * vcl_v2;
979 
980  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
981  return EXIT_FAILURE;
982 
983 
984 
985  //
986  // division-subtract
987  //
988  std::cout << "Testing division-subtract on vector with CPU scalar (right)..." << std::endl;
989  for (size_t i=0; i < std_v1.size(); ++i)
990  std_v1[i] = NumericT(i);
991  for (std::size_t i=0; i<std_v1.size(); ++i)
992  std_v2[i] = 3 * std_v1[i];
993  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
994  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
995 
996  for (std::size_t i=0; i<std_v1.size(); ++i)
997  std_v1[i] = std_v1[i] - std_v2[i] / alpha;
998  vcl_v1 = vcl_v1 - vcl_v2 / alpha;
999 
1000  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1001  return EXIT_FAILURE;
1002 
1003 
1004  std::cout << "Testing division-subtract on vector with CPU scalar (left)..." << std::endl;
1005  for (std::size_t i=0; i<std_v1.size(); ++i)
1006  std_v2[i] = 3 * std_v1[i];
1007  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1008  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1009 
1010  for (std::size_t i=0; i<std_v1.size(); ++i)
1011  std_v1[i] = std_v1[i] / alpha - std_v2[i];
1012  vcl_v1 = vcl_v1 / alpha - vcl_v2;
1013 
1014  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1015  return EXIT_FAILURE;
1016 
1017  std::cout << "Testing division-subtract on vector with CPU scalar (both)..." << std::endl;
1018  for (std::size_t i=0; i<std_v1.size(); ++i)
1019  std_v2[i] = 3 * std_v1[i];
1020  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1021  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1022 
1023  for (std::size_t i=0; i<std_v1.size(); ++i)
1024  std_v1[i] = std_v1[i] / alpha - std_v2[i] / alpha;
1025  vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha;
1026 
1027  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1028  return EXIT_FAILURE;
1029 
1030 
1031  std::cout << "Testing inplace division-subtract on vector with CPU scalar..." << std::endl;
1032  for (std::size_t i=0; i<std_v1.size(); ++i)
1033  std_v2[i] = 3 * std_v1[i];
1034  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1035  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1036 
1037  for (std::size_t i=0; i<std_v1.size(); ++i)
1038  std_v1[i] -= std_v2[i] / alpha;
1039  vcl_v1 -= vcl_v2 / alpha;
1040 
1041  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1042  return EXIT_FAILURE;
1043 
1044  std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1045  for (std::size_t i=0; i<std_v1.size(); ++i)
1046  std_v2[i] = 3 * std_v1[i];
1047  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1048  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1049 
1050  for (std::size_t i=0; i<std_v1.size(); ++i)
1051  std_v1[i] -= std_v2[i] / alpha;
1052  vcl_v1 -= vcl_v2 / gpu_alpha;
1053 
1054  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1055  return EXIT_FAILURE;
1056 
1057 
1058  std::cout << "Testing division-subtract on vector with GPU scalar (right)..." << std::endl;
1059  for (std::size_t i=0; i<std_v1.size(); ++i)
1060  std_v2[i] = 3 * std_v1[i];
1061  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1062  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1063 
1064  for (std::size_t i=0; i<std_v1.size(); ++i)
1065  std_v1[i] = std_v1[i] - std_v2[i] / alpha;
1066  vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1067 
1068  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1069  return EXIT_FAILURE;
1070 
1071  std::cout << "Testing division-subtract on vector with GPU scalar (left)..." << std::endl;
1072  for (std::size_t i=0; i<std_v1.size(); ++i)
1073  std_v2[i] = 3 * std_v1[i];
1074  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1075  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1076 
1077  for (std::size_t i=0; i<std_v1.size(); ++i)
1078  std_v1[i] = std_v1[i] - std_v2[i] / alpha;
1079  vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1080 
1081  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1082  return EXIT_FAILURE;
1083 
1084  std::cout << "Testing division-subtract on vector with GPU scalar (both)..." << std::endl;
1085  for (std::size_t i=0; i<std_v1.size(); ++i)
1086  std_v2[i] = 3 * std_v1[i];
1087  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1088  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1089 
1090  for (std::size_t i=0; i<std_v1.size(); ++i)
1091  std_v1[i] = std_v1[i] / alpha - std_v2[i] / beta;
1092  vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1093 
1094  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1095  return EXIT_FAILURE;
1096 
1097  std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1098  for (std::size_t i=0; i<std_v1.size(); ++i)
1099  std_v2[i] = 3 * std_v1[i];
1100  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1101  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1102 
1103  for (std::size_t i=0; i<std_v1.size(); ++i)
1104  std_v1[i] -= std_v1[i] / alpha + std_v2[i] / beta;
1105  vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1106 
1107  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1108  return EXIT_FAILURE;
1109 
1110  std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1111  for (std::size_t i=0; i<std_v1.size(); ++i)
1112  std_v2[i] = 3 * std_v1[i];
1113  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1114  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1115 
1116  for (std::size_t i=0; i<std_v1.size(); ++i)
1117  std_v1[i] -= std_v1[i] / alpha - std_v2[i] / beta;
1118  vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1119 
1120  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1121  return EXIT_FAILURE;
1122 
1123  std::cout << "Testing multiply-division-subtract on vector with GPU scalar..." << std::endl;
1124  for (std::size_t i=0; i<std_v1.size(); ++i)
1125  std_v2[i] = 3 * std_v1[i];
1126  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1127  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1128 
1129  for (std::size_t i=0; i<std_v1.size(); ++i)
1130  std_v1[i] = std_v1[i] * alpha - std_v2[i] / beta;
1131  vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1132 
1133  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1134  return EXIT_FAILURE;
1135 
1136  std::cout << "Testing division-multiply-subtract on vector with GPU scalar..." << std::endl;
1137  for (std::size_t i=0; i<std_v1.size(); ++i)
1138  std_v2[i] = 3 * std_v1[i];
1139  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1140  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1141 
1142  for (std::size_t i=0; i<std_v1.size(); ++i)
1143  std_v1[i] = std_v1[i] / alpha - std_v2[i] * beta;
1144  vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1145 
1146  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1147  return EXIT_FAILURE;
1148 
1149  std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl;
1150  for (std::size_t i=0; i<std_v1.size(); ++i)
1151  std_v2[i] = 3 * std_v1[i];
1152  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1153  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1154 
1155  for (std::size_t i=0; i<std_v1.size(); ++i)
1156  std_v1[i] -= std_v1[i] * alpha + std_v2[i] / beta;
1157  vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta;
1158 
1159  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1160  return EXIT_FAILURE;
1161 
1162  std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl;
1163  for (std::size_t i=0; i<std_v1.size(); ++i)
1164  std_v2[i] = 3 * std_v1[i];
1165  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1166  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1167 
1168  for (std::size_t i=0; i<std_v1.size(); ++i)
1169  std_v1[i] -= std_v1[i] / alpha + std_v2[i] * beta;
1170  vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1171 
1172  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1173  return EXIT_FAILURE;
1174 
1175  std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1176  for (std::size_t i=0; i<std_v1.size(); ++i)
1177  std_v2[i] = 3 * std_v1[i];
1178  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1179  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1180 
1181  for (std::size_t i=0; i<std_v1.size(); ++i)
1182  std_v1[i] -= std_v1[i] * alpha - std_v2[i] / beta;
1183  vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1184 
1185  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1186  return EXIT_FAILURE;
1187 
1188  std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1189  for (std::size_t i=0; i<std_v1.size(); ++i)
1190  std_v2[i] = 3 * std_v1[i];
1191  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1192  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1193 
1194  for (std::size_t i=0; i<std_v1.size(); ++i)
1195  std_v1[i] -= std_v1[i] / alpha - std_v2[i] * beta;
1196  vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1197 
1198  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1199  return EXIT_FAILURE;
1200 
1201 
1202  std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1203  for (std::size_t i=0; i<std_v1.size(); ++i)
1204  std_v2[i] = 3 * std_v1[i];
1205  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1206  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1207 
1208  for (std::size_t i=0; i<std_v1.size(); ++i)
1209  std_v1[i] -= alpha * std_v2[i];
1210  vcl_v1 -= gpu_alpha * vcl_v2;
1211 
1212  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1213  return EXIT_FAILURE;
1214 
1215 
1216 
1217  //
1218  // More complicated expressions (for ensuring the operator overloads work correctly)
1219  //
1220  for (size_t i=0; i < std_v1.size(); ++i)
1221  std_v1[i] = NumericT(i);
1222  for (std::size_t i=0; i<std_v1.size(); ++i)
1223  std_v2[i] = 3 * std_v1[i];
1224  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1225  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1226 
1227  std::cout << "Testing three vector additions..." << std::endl;
1228  for (std::size_t i=0; i<std_v1.size(); ++i)
1229  std_v1[i] = std_v2[i] + std_v1[i] + std_v2[i];
1230  vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;
1231 
1232  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1233  return EXIT_FAILURE;
1234 
1235 
1236  for (std::size_t i=0; i<std_v1.size(); ++i)
1237  std_v2[i] = 3 * std_v1[i];
1238  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1239  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1240 
1241  std::cout << "Testing complicated vector expression with CPU scalar..." << std::endl;
1242  for (std::size_t i=0; i<std_v1.size(); ++i)
1243  std_v1[i] = beta * (std_v1[i] - alpha * std_v2[i]);
1244  vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2);
1245 
1246  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1247  return EXIT_FAILURE;
1248 
1249  std::cout << "Testing complicated vector expression with GPU scalar..." << std::endl;
1250  for (std::size_t i=0; i<std_v1.size(); ++i)
1251  std_v1[i] = beta * (std_v1[i] - alpha * std_v2[i]);
1252  vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2);
1253 
1254  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1255  return EXIT_FAILURE;
1256 
1257  // --------------------------------------------------------------------------
1258  for (std::size_t i=0; i<std_v1.size(); ++i)
1259  std_v2[i] = 3 * std_v1[i];
1260  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1261  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1262 
1263  std::cout << "Testing swap..." << std::endl;
1264  swap(std_v1, std_v2);
1265  swap(vcl_v1, vcl_v2);
1266 
1267  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1268  return EXIT_FAILURE;
1269 
1270  // --------------------------------------------------------------------------
1271  for (std::size_t i=0; i<std_v1.size(); ++i)
1272  {
1273  std_v1[i] = NumericT(1.0) + NumericT(i);
1274  std_v2[i] = NumericT(5.0) + NumericT(i);
1275  }
1276 
1277  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1278  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1279 
1280  std::cout << "Testing unary operator-..." << std::endl;
1281  for (std::size_t i=0; i<std_v1.size(); ++i)
1282  std_v1[i] = -std_v2[i];
1283  vcl_v1 = -vcl_v2;
1284 
1285  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1286  return EXIT_FAILURE;
1287 
1288 
1289  std::cout << "Testing elementwise multiplication..." << std::endl;
1290  std::cout << " v1 = element_prod(v1, v2);" << std::endl;
1291  for (std::size_t i=0; i<std_v1.size(); ++i)
1292  std_v1[i] = std_v1[i] * std_v2[i];
1293  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1294 
1295  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1296  return EXIT_FAILURE;
1297 
1298  std::cout << " v1 += element_prod(v1, v2);" << std::endl;
1299  for (std::size_t i=0; i<std_v1.size(); ++i)
1300  std_v1[i] += std_v1[i] * std_v2[i];
1301  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1302 
1303  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1304  return EXIT_FAILURE;
1305 
1306  std::cout << " v1 -= element_prod(v1, v2);" << std::endl;
1307  for (std::size_t i=0; i<std_v1.size(); ++i)
1308  std_v1[i] -= std_v1[i] * std_v2[i];
1309  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1310 
1311  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1312  return EXIT_FAILURE;
1313 
1315  std::cout << " v1 = element_prod(v1 + v2, v2);" << std::endl;
1316  for (std::size_t i=0; i<std_v1.size(); ++i)
1317  std_v1[i] = (std_v1[i] + std_v2[i]) * std_v2[i];
1318  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1319 
1320  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1321  return EXIT_FAILURE;
1322 
1323  std::cout << " v1 += element_prod(v1 + v2, v2);" << std::endl;
1324  for (std::size_t i=0; i<std_v1.size(); ++i)
1325  std_v1[i] += (std_v1[i] + std_v2[i]) * std_v2[i];
1326  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1327 
1328  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1329  return EXIT_FAILURE;
1330 
1331  std::cout << " v1 -= element_prod(v1 + v2, v2);" << std::endl;
1332  for (std::size_t i=0; i<std_v1.size(); ++i)
1333  std_v1[i] -= (std_v1[i] + std_v2[i]) * std_v2[i];
1334  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1335 
1336  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1337  return EXIT_FAILURE;
1338 
1340  std::cout << " v1 = element_prod(v1, v2 + v1);" << std::endl;
1341  for (std::size_t i=0; i<std_v1.size(); ++i)
1342  std_v1[i] = std_v1[i] * (std_v2[i] + std_v1[i]);
1343  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1344 
1345  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1346  return EXIT_FAILURE;
1347 
1348  std::cout << " v1 += element_prod(v1, v2 + v1);" << std::endl;
1349  for (std::size_t i=0; i<std_v1.size(); ++i)
1350  std_v1[i] += std_v1[i] * (std_v2[i] + std_v1[i]);
1351  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1352 
1353  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1354  return EXIT_FAILURE;
1355 
1356  std::cout << " v1 -= element_prod(v1, v2 + v1);" << std::endl;
1357  for (std::size_t i=0; i<std_v1.size(); ++i)
1358  std_v1[i] -= std_v1[i] * (std_v2[i] + std_v1[i]);
1359  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1360 
1361  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1362  return EXIT_FAILURE;
1363 
1365  std::cout << " v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
1366  for (std::size_t i=0; i<std_v1.size(); ++i)
1367  std_v1[i] = (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
1368  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1369 
1370  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1371  return EXIT_FAILURE;
1372 
1373  std::cout << " v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
1374  for (std::size_t i=0; i<std_v1.size(); ++i)
1375  std_v1[i] += (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
1376  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1377 
1378  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1379  return EXIT_FAILURE;
1380 
1381  std::cout << " v1 -= element_prod(v1 + v2, v2 + v1);" << std::endl;
1382  for (std::size_t i=0; i<std_v1.size(); ++i)
1383  std_v1[i] -= (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
1384  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1385 
1386  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1387  return EXIT_FAILURE;
1388 
1389 
1390  std::cout << "Testing elementwise division..." << std::endl;
1391  for (std::size_t i=0; i<std_v1.size(); ++i)
1392  {
1393  std_v1[i] = NumericT(1 + i);
1394  std_v2[i] = NumericT(5 + i);
1395  }
1396 
1397  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1398  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1399 
1400  for (std::size_t i=0; i<std_v1.size(); ++i)
1401  std_v1[i] = std_v1[i] / std_v2[i];
1402  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2);
1403 
1404  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1405  return EXIT_FAILURE;
1406 
1407  for (std::size_t i=0; i<std_v1.size(); ++i)
1408  std_v1[i] += std_v1[i] / std_v2[i];
1409  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2);
1410 
1411  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1412  return EXIT_FAILURE;
1413 
1414  for (std::size_t i=0; i<std_v1.size(); ++i)
1415  std_v1[i] -= std_v1[i] / std_v2[i];
1416  vcl_v1 -= viennacl::linalg::element_div(vcl_v1, vcl_v2);
1417 
1418  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1419  return EXIT_FAILURE;
1420 
1422  for (std::size_t i=0; i<std_v1.size(); ++i)
1423  std_v1[i] = (std_v1[i] + std_v2[i]) / std_v2[i];
1424  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1425 
1426  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1427  return EXIT_FAILURE;
1428 
1429  for (std::size_t i=0; i<std_v1.size(); ++i)
1430  std_v1[i] += (std_v1[i] + std_v2[i]) / std_v2[i];
1431  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1432 
1433  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1434  return EXIT_FAILURE;
1435 
1436  for (std::size_t i=0; i<std_v1.size(); ++i)
1437  std_v1[i] -= (std_v1[i] + std_v2[i]) / std_v2[i];
1438  vcl_v1 -= viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1439 
1440  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1441  return EXIT_FAILURE;
1442 
1444  for (std::size_t i=0; i<std_v1.size(); ++i)
1445  std_v1[i] = std_v1[i] / (std_v2[i] + std_v1[i]);
1446  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1447 
1448  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1449  return EXIT_FAILURE;
1450 
1451  for (std::size_t i=0; i<std_v1.size(); ++i)
1452  std_v1[i] += std_v1[i] / (std_v2[i] + std_v1[i]);
1453  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1454 
1455  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1456  return EXIT_FAILURE;
1457 
1458  for (std::size_t i=0; i<std_v1.size(); ++i)
1459  std_v1[i] -= std_v1[i] / (std_v2[i] + std_v1[i]);
1460  vcl_v1 -= viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1461 
1462  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1463  return EXIT_FAILURE;
1464 
1466  for (std::size_t i=0; i<std_v1.size(); ++i)
1467  std_v1[i] = (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
1468  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1469 
1470  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1471  return EXIT_FAILURE;
1472 
1473  for (std::size_t i=0; i<std_v1.size(); ++i)
1474  std_v1[i] += (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
1475  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1476 
1477  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1478  return EXIT_FAILURE;
1479 
1480  for (std::size_t i=0; i<std_v1.size(); ++i)
1481  std_v1[i] -= (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
1482  vcl_v1 -= viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1483 
1484  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1485  return EXIT_FAILURE;
1486 
1487  std::cout << "Testing unary elementwise operations..." << std::endl;
1488 
1489 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \
1490  for (std::size_t i=0; i<std_v1.size(); ++i) \
1491  std_v2[i] = 3 * std_v1[i]; \
1492  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); \
1493  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); \
1494  \
1495  for (std::size_t i=0; i<std_v1.size(); ++i) \
1496  std_v1[i] = std::FUNCNAME(std_v2[i]); \
1497  vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1498  \
1499  if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1500  { \
1501  std::cout << "Failure at v1 = " << #FUNCNAME << "(v2)" << std::endl; \
1502  return EXIT_FAILURE; \
1503  } \
1504  \
1505  for (std::size_t i=0; i<std_v1.size(); ++i) \
1506  std_v1[i] = std::FUNCNAME(std_v1[i] + std_v2[i]); \
1507  vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1508  \
1509  if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1510  { \
1511  std::cout << "Failure at v1 = " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1512  return EXIT_FAILURE; \
1513  } \
1514  \
1515  for (std::size_t i=0; i<std_v1.size(); ++i) \
1516  std_v1[i] += std::FUNCNAME(std_v1[i]); \
1517  vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1); \
1518  \
1519  if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1520  { \
1521  std::cout << "Failure at v1 += " << #FUNCNAME << "(v2)" << std::endl; \
1522  return EXIT_FAILURE; \
1523  } \
1524  \
1525  for (std::size_t i=0; i<std_v1.size(); ++i) \
1526  std_v1[i] += std::FUNCNAME(std_v1[i] + std_v2[i]); \
1527  vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1528  \
1529  if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1530  { \
1531  std::cout << "Failure at v1 += " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1532  return EXIT_FAILURE; \
1533  } \
1534  \
1535  for (std::size_t i=0; i<std_v1.size(); ++i) \
1536  std_v1[i] -= std::FUNCNAME(std_v2[i]); \
1537  vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1538  \
1539  if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1540  { \
1541  std::cout << "Failure at v1 -= " << #FUNCNAME << "(v2)" << std::endl; \
1542  return EXIT_FAILURE; \
1543  } \
1544  \
1545  for (std::size_t i=0; i<std_v1.size(); ++i) \
1546  std_v1[i] -= std::FUNCNAME(std_v1[i] + std_v2[i]); \
1547  vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1548  \
1549  if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \
1550  { \
1551  std::cout << "Failure at v1 -= " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1552  return EXIT_FAILURE; \
1553  } \
1554 
1555  //GENERATE_UNARY_OP_TEST(cos);
1556  //GENERATE_UNARY_OP_TEST(cosh);
1557  //GENERATE_UNARY_OP_TEST(exp);
1558  //GENERATE_UNARY_OP_TEST(floor);
1559  //GENERATE_UNARY_OP_TEST(fabs);
1560  //GENERATE_UNARY_OP_TEST(log);
1561  //GENERATE_UNARY_OP_TEST(log10);
1562  //GENERATE_UNARY_OP_TEST(sin);
1563  //GENERATE_UNARY_OP_TEST(sinh);
1564  //GENERATE_UNARY_OP_TEST(fabs);
1566  //GENERATE_UNARY_OP_TEST(sqrt);
1567  //GENERATE_UNARY_OP_TEST(tan);
1568  //GENERATE_UNARY_OP_TEST(tanh);
1569 
1570  std::cout << "Testing lenghty sum of scaled vectors..." << std::endl;
1571  for (std::size_t i=0; i<std_v1.size(); ++i)
1572  std_v2[i] = 3 * std_v1[i];
1573  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
1574  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());
1575 
1576  for (std::size_t i=0; i<std_v1.size(); ++i)
1577  std_v1[i] = std_v2[i] / alpha + beta * std_v1[i] - alpha * std_v2[i] + beta * std_v1[i] - alpha * std_v1[i];
1578  vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1;
1579 
1580  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
1581  return EXIT_FAILURE;
1582 
1583  // --------------------------------------------------------------------------
1584  return retval;
1585 }
1586 
1587 
1588 template< typename NumericT >
1589 int test()
1590 {
1591  int retval = EXIT_SUCCESS;
1592  std::size_t size = 12345;
1593 
1594  std::cout << "Running tests for vector of size " << size << std::endl;
1595 
1596  //
1597  // Set up STL objects
1598  //
1599  std::vector<NumericT> std_full_vec(size);
1600  std::vector<NumericT> std_full_vec2(std_full_vec.size());
1601 
1602  for (std::size_t i=0; i<std_full_vec.size(); ++i)
1603  {
1604  std_full_vec[i] = NumericT(1.0) + NumericT(i);
1605  std_full_vec2[i] = NumericT(2.0) + NumericT(i) / NumericT(2);
1606  }
1607 
1608  std::vector<NumericT> std_range_vec (2 * std_full_vec.size() / 4 - std_full_vec.size() / 4);
1609  std::vector<NumericT> std_range_vec2(2 * std_full_vec.size() / 4 - std_full_vec.size() / 4);
1610 
1611  for (std::size_t i=0; i<std_range_vec.size(); ++i)
1612  std_range_vec[i] = std_full_vec[i + std_full_vec.size() / 4];
1613  for (std::size_t i=0; i<std_range_vec2.size(); ++i)
1614  std_range_vec2[i] = std_full_vec2[i + 2 * std_full_vec2.size() / 4];
1615 
1616  std::vector<NumericT> std_slice_vec (std_full_vec.size() / 4);
1617  std::vector<NumericT> std_slice_vec2(std_full_vec.size() / 4);
1618 
1619  for (std::size_t i=0; i<std_slice_vec.size(); ++i)
1620  std_slice_vec[i] = std_full_vec[3*i + std_full_vec.size() / 4];
1621  for (std::size_t i=0; i<std_slice_vec2.size(); ++i)
1622  std_slice_vec2[i] = std_full_vec2[2*i + 2 * std_full_vec2.size() / 4];
1623 
1624  //
1625  // Set up ViennaCL objects
1626  //
1627  viennacl::vector<NumericT> vcl_full_vec(std_full_vec.size());
1628  viennacl::vector<NumericT> vcl_full_vec2(std_full_vec2.size());
1629 
1630  viennacl::fast_copy(std_full_vec.begin(), std_full_vec.end(), vcl_full_vec.begin());
1631  viennacl::copy(std_full_vec2.begin(), std_full_vec2.end(), vcl_full_vec2.begin());
1632 
1633  viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4);
1634  viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4);
1635  viennacl::vector_range< viennacl::vector<NumericT> > vcl_range_vec(vcl_full_vec, vcl_r1);
1636  viennacl::vector_range< viennacl::vector<NumericT> > vcl_range_vec2(vcl_full_vec2, vcl_r2);
1637 
1638  {
1639  viennacl::vector<NumericT> vcl_short_vec(vcl_range_vec);
1640  viennacl::vector<NumericT> vcl_short_vec2 = vcl_range_vec2;
1641 
1642  std::vector<NumericT> std_short_vec(std_range_vec);
1643  std::vector<NumericT> std_short_vec2(std_range_vec2);
1644 
1645  std::cout << "Testing creation of vectors from range..." << std::endl;
1646  if (check(std_short_vec, vcl_short_vec) != EXIT_SUCCESS)
1647  return EXIT_FAILURE;
1648  if (check(std_short_vec2, vcl_short_vec2) != EXIT_SUCCESS)
1649  return EXIT_FAILURE;
1650  }
1651 
1652  viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4);
1653  viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4);
1654  viennacl::vector_slice< viennacl::vector<NumericT> > vcl_slice_vec(vcl_full_vec, vcl_s1);
1655  viennacl::vector_slice< viennacl::vector<NumericT> > vcl_slice_vec2(vcl_full_vec2, vcl_s2);
1656 
1657  viennacl::vector<NumericT> vcl_short_vec(vcl_slice_vec);
1658  viennacl::vector<NumericT> vcl_short_vec2 = vcl_slice_vec2;
1659 
1660  std::vector<NumericT> std_short_vec(std_slice_vec);
1661  std::vector<NumericT> std_short_vec2(std_slice_vec2);
1662 
1663  std::cout << "Testing creation of vectors from slice..." << std::endl;
1664  if (check(std_short_vec, vcl_short_vec) != EXIT_SUCCESS)
1665  return EXIT_FAILURE;
1666  if (check(std_short_vec2, vcl_short_vec2) != EXIT_SUCCESS)
1667  return EXIT_FAILURE;
1668 
1669 
1670  //
1671  // Now start running tests for vectors, ranges and slices:
1672  //
1673 
1674  std::cout << " ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl;
1675  retval = test<NumericT>(std_short_vec, std_short_vec2,
1676  vcl_short_vec, vcl_short_vec2);
1677  if (retval != EXIT_SUCCESS)
1678  return EXIT_FAILURE;
1679 
1680  std::cout << " ** vcl_v1 = vector, vcl_v2 = range **" << std::endl;
1681  retval = test<NumericT>(std_short_vec, std_short_vec2,
1682  vcl_short_vec, vcl_range_vec2);
1683  if (retval != EXIT_SUCCESS)
1684  return EXIT_FAILURE;
1685 
1686  std::cout << " ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl;
1687  retval = test<NumericT>(std_short_vec, std_short_vec2,
1688  vcl_short_vec, vcl_slice_vec2);
1689  if (retval != EXIT_SUCCESS)
1690  return EXIT_FAILURE;
1691 
1693 
1694  std::cout << " ** vcl_v1 = range, vcl_v2 = vector **" << std::endl;
1695  retval = test<NumericT>(std_short_vec, std_short_vec2,
1696  vcl_range_vec, vcl_short_vec2);
1697  if (retval != EXIT_SUCCESS)
1698  return EXIT_FAILURE;
1699 
1700  std::cout << " ** vcl_v1 = range, vcl_v2 = range **" << std::endl;
1701  retval = test<NumericT>(std_short_vec, std_short_vec2,
1702  vcl_range_vec, vcl_range_vec2);
1703  if (retval != EXIT_SUCCESS)
1704  return EXIT_FAILURE;
1705 
1706  std::cout << " ** vcl_v1 = range, vcl_v2 = slice **" << std::endl;
1707  retval = test<NumericT>(std_short_vec, std_short_vec2,
1708  vcl_range_vec, vcl_slice_vec2);
1709  if (retval != EXIT_SUCCESS)
1710  return EXIT_FAILURE;
1711 
1713 
1714  std::cout << " ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl;
1715  retval = test<NumericT>(std_short_vec, std_short_vec2,
1716  vcl_slice_vec, vcl_short_vec2);
1717  if (retval != EXIT_SUCCESS)
1718  return EXIT_FAILURE;
1719 
1720  std::cout << " ** vcl_v1 = slice, vcl_v2 = range **" << std::endl;
1721  retval = test<NumericT>(std_short_vec, std_short_vec2,
1722  vcl_slice_vec, vcl_range_vec2);
1723  if (retval != EXIT_SUCCESS)
1724  return EXIT_FAILURE;
1725 
1726  std::cout << " ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl;
1727  retval = test<NumericT>(std_short_vec, std_short_vec2,
1728  vcl_slice_vec, vcl_slice_vec2);
1729  if (retval != EXIT_SUCCESS)
1730  return EXIT_FAILURE;
1731 
1732  return EXIT_SUCCESS;
1733 }
1734 
1735 
1736 
1737 //
1738 // -------------------------------------------------------------
1739 //
1740 int main()
1741 {
1742  std::cout << std::endl;
1743  std::cout << "----------------------------------------------" << std::endl;
1744  std::cout << "----------------------------------------------" << std::endl;
1745  std::cout << "## Test :: Vector with Integer types" << std::endl;
1746  std::cout << "----------------------------------------------" << std::endl;
1747  std::cout << "----------------------------------------------" << std::endl;
1748  std::cout << std::endl;
1749 
1750  int retval = EXIT_SUCCESS;
1751 
1752  std::cout << std::endl;
1753  std::cout << "----------------------------------------------" << std::endl;
1754  std::cout << std::endl;
1755  {
1756  std::cout << "# Testing setup:" << std::endl;
1757  std::cout << " numeric: int" << std::endl;
1758  retval = test<int>();
1759  if ( retval == EXIT_SUCCESS )
1760  std::cout << "# Test passed" << std::endl;
1761  else
1762  return retval;
1763  }
1764  std::cout << std::endl;
1765  std::cout << "----------------------------------------------" << std::endl;
1766  std::cout << std::endl;
1767  {
1768  std::cout << "# Testing setup:" << std::endl;
1769  std::cout << " numeric: long" << std::endl;
1770  retval = test<long>();
1771  if ( retval == EXIT_SUCCESS )
1772  std::cout << "# Test passed" << std::endl;
1773  else
1774  return retval;
1775  }
1776  std::cout << std::endl;
1777  std::cout << "----------------------------------------------" << std::endl;
1778  std::cout << std::endl;
1779 
1780  std::cout << std::endl;
1781  std::cout << "------- Test completed --------" << std::endl;
1782  std::cout << std::endl;
1783 
1784  return retval;
1785 }
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
Definition: sum.hpp:45
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:100
viennacl::scalar< int > s2
viennacl::scalar< float > s1
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
float NumericT
Definition: bisect.cpp:40
int check(T1 const &t1, T2 const &t2)
Definition: vector_int.cpp:89
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239
int test(STLVectorType &std_v1, STLVectorType &std_v2, ViennaCLVectorType1 &vcl_v1, ViennaCLVectorType2 &vcl_v2)
Definition: vector_int.cpp:106
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:434
int main()
Class for representing strided subvectors of a bigger vector x.
Definition: forwards.h:437
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
Definition: vector_int.cpp:46
Proxy classes for vectors.
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied.
Represents a vector consisting of 1 at a given index and zeros otherwise.
Definition: vector_def.hpp:76
Stub routines for the summation of elements in a vector, or all elements in either a row or column of...
viennacl::vector< int > v2
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
NumericT max(std::vector< NumericT > const &v1)
Definition: maxmin.hpp:47
T norm_inf(std::vector< T, A > const &v1)
Definition: norm_inf.hpp:60
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
T norm_1(std::vector< T, A > const &v1)
Definition: norm_1.hpp:61
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:424
float ScalarType
Definition: fft_1d.cpp:42
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:429
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:233
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...
NumericT min(std::vector< NumericT > const &v1)
Definition: maxmin.hpp:91
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)