ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
scheduler_matrix_matrix.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2016, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 
23 //
24 // *** System
25 //
26 #include <iostream>
27 #include <vector>
28 
29 //
30 // *** ViennaCL
31 //
32 #include "viennacl/scalar.hpp"
33 #include "viennacl/matrix.hpp"
35 #include "viennacl/vector.hpp"
36 #include "viennacl/linalg/prod.hpp"
40 
43 
44 
45 //
46 // -------------------------------------------------------------
47 //
48 template<typename ScalarType>
50 {
52  if (s1 != s2)
53  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
54  return 0;
55 }
56 
57 template<typename ScalarType, typename VCLVectorType>
58 ScalarType diff(std::vector<ScalarType> const & v1, VCLVectorType const & v2)
59 {
60  std::vector<ScalarType> v2_cpu(v2.size());
61  viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8)
62  viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin());
63 
64  ScalarType norm_inf = 0;
65  for (unsigned int i=0;i<v1.size(); ++i)
66  {
67  if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
68  {
69  ScalarType tmp = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
70  if (tmp > norm_inf)
71  norm_inf = tmp;
72  }
73  }
74 
75  return norm_inf;
76 }
77 
78 template<typename ScalarType, typename VCLMatrixType>
79 ScalarType diff(std::vector<std::vector<ScalarType> > const & mat1, VCLMatrixType const & mat2)
80 {
81  std::vector<std::vector<ScalarType> > mat2_cpu(mat2.size1(), std::vector<ScalarType>(mat2.size2()));
82  viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8)
83  viennacl::copy(mat2, mat2_cpu);
84  ScalarType ret = 0;
85  ScalarType act = 0;
86 
87  for (std::size_t i = 0; i < mat2_cpu.size(); ++i)
88  {
89  for (std::size_t j = 0; j < mat2_cpu[i].size(); ++j)
90  {
91  act = std::fabs(mat2_cpu[i][j] - mat1[i][j]) / std::max( std::fabs(mat2_cpu[i][j]), std::fabs(mat1[i][j]) );
92  if (act > ret)
93  ret = act;
94  }
95  }
96  //std::cout << ret << std::endl;
97  return ret;
98 }
99 
100 
101 
102 
103 
104 //
105 // Part 1: Matrix-matrix multiplications
106 //
107 
108 
109 template< typename NumericT, typename Epsilon,
110  typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC,
111  typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC>
112 int test_prod(Epsilon const& epsilon,
113 
114  ReferenceMatrixTypeA const & A, ReferenceMatrixTypeA const & A_trans,
115  ReferenceMatrixTypeB const & B, ReferenceMatrixTypeB const & B_trans,
116  ReferenceMatrixTypeC & C,
117 
118  MatrixTypeA const & vcl_A, MatrixTypeA const & vcl_A_trans,
119  MatrixTypeB const & vcl_B, MatrixTypeB const & vcl_B_trans,
120  MatrixTypeC & vcl_C
121  )
122 {
123  int retval = EXIT_SUCCESS;
124  NumericT act_diff = 0;
125 
126 
127  // Test: C +-= A * B --------------------------------------------------------------------------
128  for (std::size_t i=0; i<C.size(); ++i)
129  for (std::size_t j=0; j<C[i].size(); ++j)
130  {
131  NumericT tmp = 0;
132  for (std::size_t k=0; k<A[i].size(); ++k)
133  tmp += A[i][k] * B[k][j];
134  C[i][j] = tmp;
135  }
136  {
137  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(vcl_A, vcl_B));
138  viennacl::scheduler::execute(my_statement);
139  }
140  act_diff = std::fabs(diff(C, vcl_C));
141 
142  if ( act_diff > epsilon )
143  {
144  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
145  std::cout << " diff: " << act_diff << std::endl;
146  retval = EXIT_FAILURE;
147  }
148  else
149  std::cout << "Test C = A * B passed!" << std::endl;
150 
151 
152  for (std::size_t i=0; i<C.size(); ++i)
153  for (std::size_t j=0; j<C[i].size(); ++j)
154  {
155  NumericT tmp = 0;
156  for (std::size_t k=0; k<A[i].size(); ++k)
157  tmp += A[i][k] * B[k][j];
158  C[i][j] += tmp;
159  }
160  {
162  viennacl::scheduler::execute(my_statement);
163  }
164  act_diff = std::fabs(diff(C, vcl_C));
165 
166  if ( act_diff > epsilon )
167  {
168  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
169  std::cout << " diff: " << act_diff << std::endl;
170  retval = EXIT_FAILURE;
171  }
172  else
173  std::cout << "Test C += A * B passed!" << std::endl;
174 
175  for (std::size_t i=0; i<C.size(); ++i)
176  for (std::size_t j=0; j<C[i].size(); ++j)
177  {
178  NumericT tmp = 0;
179  for (std::size_t k=0; k<A[i].size(); ++k)
180  tmp += A[i][k] * B[k][j];
181  C[i][j] -= tmp;
182  }
183  {
185  viennacl::scheduler::execute(my_statement);
186  }
187  act_diff = std::fabs(diff(C, vcl_C));
188 
189  if ( act_diff > epsilon )
190  {
191  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
192  std::cout << " diff: " << act_diff << std::endl;
193  retval = EXIT_FAILURE;
194  }
195  else
196  std::cout << "Test C -= A * B passed!" << std::endl;
197 
198 
199 
200 
201 
202  // Test: C +-= A * trans(B) --------------------------------------------------------------------------
203  for (std::size_t i=0; i<C.size(); ++i)
204  for (std::size_t j=0; j<C[i].size(); ++j)
205  {
206  NumericT tmp = 0;
207  for (std::size_t k=0; k<A[i].size(); ++k)
208  tmp += A[i][k] * B_trans[j][k];
209  C[i][j] = tmp;
210  }
211  {
212  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(vcl_A, trans(vcl_B_trans)));
213  viennacl::scheduler::execute(my_statement);
214  }
215  act_diff = std::fabs(diff(C, vcl_C));
216 
217  if ( act_diff > epsilon )
218  {
219  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
220  std::cout << " diff: " << act_diff << std::endl;
221  retval = EXIT_FAILURE;
222  }
223  else
224  std::cout << "Test C = A * trans(B) passed!" << std::endl;
225 
226 
227  for (std::size_t i=0; i<C.size(); ++i)
228  for (std::size_t j=0; j<C[i].size(); ++j)
229  {
230  NumericT tmp = 0;
231  for (std::size_t k=0; k<A[i].size(); ++k)
232  tmp += A[i][k] * B_trans[j][k];
233  C[i][j] += tmp;
234  }
235  {
236  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), viennacl::linalg::prod(vcl_A, trans(vcl_B_trans)));
237  viennacl::scheduler::execute(my_statement);
238  }
239  act_diff = std::fabs(diff(C, vcl_C));
240 
241  if ( act_diff > epsilon )
242  {
243  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
244  std::cout << " diff: " << act_diff << std::endl;
245  retval = EXIT_FAILURE;
246  }
247  else
248  std::cout << "Test C += A * trans(B) passed!" << std::endl;
249 
250 
251  for (std::size_t i=0; i<C.size(); ++i)
252  for (std::size_t j=0; j<C[i].size(); ++j)
253  {
254  NumericT tmp = 0;
255  for (std::size_t k=0; k<A[i].size(); ++k)
256  tmp += A[i][k] * B_trans[j][k];
257  C[i][j] -= tmp;
258  }
259  {
260  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), viennacl::linalg::prod(vcl_A, trans(vcl_B_trans)));
261  viennacl::scheduler::execute(my_statement);
262  }
263  act_diff = std::fabs(diff(C, vcl_C));
264 
265  if ( act_diff > epsilon )
266  {
267  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
268  std::cout << " diff: " << act_diff << std::endl;
269  retval = EXIT_FAILURE;
270  }
271  else
272  std::cout << "Test C -= A * trans(B) passed!" << std::endl;
273 
274 
275 
276  // Test: C +-= trans(A) * B --------------------------------------------------------------------------
277  for (std::size_t i=0; i<C.size(); ++i)
278  for (std::size_t j=0; j<C[i].size(); ++j)
279  {
280  NumericT tmp = 0;
281  for (std::size_t k=0; k<A[i].size(); ++k)
282  tmp += A_trans[k][i] * B[k][j];
283  C[i][j] = tmp;
284  }
285  {
286  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(trans(vcl_A_trans), vcl_B));
287  viennacl::scheduler::execute(my_statement);
288  }
289  act_diff = std::fabs(diff(C, vcl_C));
290 
291  if ( act_diff > epsilon )
292  {
293  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
294  std::cout << " diff: " << act_diff << std::endl;
295  retval = EXIT_FAILURE;
296  }
297  else
298  std::cout << "Test C = trans(A) * B passed!" << std::endl;
299 
300 
301  for (std::size_t i=0; i<C.size(); ++i)
302  for (std::size_t j=0; j<C[i].size(); ++j)
303  {
304  NumericT tmp = 0;
305  for (std::size_t k=0; k<A[i].size(); ++k)
306  tmp += A_trans[k][i] * B[k][j];
307  C[i][j] += tmp;
308  }
309  {
310  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), viennacl::linalg::prod(trans(vcl_A_trans), vcl_B));
311  viennacl::scheduler::execute(my_statement);
312  }
313  act_diff = std::fabs(diff(C, vcl_C));
314 
315  if ( act_diff > epsilon )
316  {
317  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
318  std::cout << " diff: " << act_diff << std::endl;
319  retval = EXIT_FAILURE;
320  }
321  else
322  std::cout << "Test C += trans(A) * B passed!" << std::endl;
323 
324 
325  for (std::size_t i=0; i<C.size(); ++i)
326  for (std::size_t j=0; j<C[i].size(); ++j)
327  {
328  NumericT tmp = 0;
329  for (std::size_t k=0; k<A[i].size(); ++k)
330  tmp += A_trans[k][i] * B[k][j];
331  C[i][j] -= tmp;
332  }
333  {
334  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), viennacl::linalg::prod(trans(vcl_A_trans), vcl_B));
335  viennacl::scheduler::execute(my_statement);
336  }
337  act_diff = std::fabs(diff(C, vcl_C));
338 
339  if ( act_diff > epsilon )
340  {
341  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
342  std::cout << " diff: " << act_diff << std::endl;
343  retval = EXIT_FAILURE;
344  }
345  else
346  std::cout << "Test C -= trans(A) * B passed!" << std::endl;
347 
348 
349 
350 
351 
352  // Test: C +-= trans(A) * trans(B) --------------------------------------------------------------------------
353  for (std::size_t i=0; i<C.size(); ++i)
354  for (std::size_t j=0; j<C[i].size(); ++j)
355  {
356  NumericT tmp = 0;
357  for (std::size_t k=0; k<A[i].size(); ++k)
358  tmp += A_trans[k][i] * B_trans[j][k];
359  C[i][j] = tmp;
360  }
361  {
362  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans)));
363  viennacl::scheduler::execute(my_statement);
364  }
365  act_diff = std::fabs(diff(C, vcl_C));
366 
367  if ( act_diff > epsilon )
368  {
369  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
370  std::cout << " diff: " << act_diff << std::endl;
371  retval = EXIT_FAILURE;
372  }
373  else
374  std::cout << "Test C = trans(A) * trans(B) passed!" << std::endl;
375 
376  for (std::size_t i=0; i<C.size(); ++i)
377  for (std::size_t j=0; j<C[i].size(); ++j)
378  {
379  NumericT tmp = 0;
380  for (std::size_t k=0; k<A[i].size(); ++k)
381  tmp += A_trans[k][i] * B_trans[j][k];
382  C[i][j] += tmp;
383  }
384  {
385  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans)));
386  viennacl::scheduler::execute(my_statement);
387  }
388  act_diff = std::fabs(diff(C, vcl_C));
389 
390  if ( act_diff > epsilon )
391  {
392  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
393  std::cout << " diff: " << act_diff << std::endl;
394  retval = EXIT_FAILURE;
395  }
396  else
397  std::cout << "Test C += trans(A) * trans(B) passed!" << std::endl;
398 
399 
400  for (std::size_t i=0; i<C.size(); ++i)
401  for (std::size_t j=0; j<C[i].size(); ++j)
402  {
403  NumericT tmp = 0;
404  for (std::size_t k=0; k<A[i].size(); ++k)
405  tmp += A_trans[k][i] * B_trans[j][k];
406  C[i][j] -= tmp;
407  }
408  {
409  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans)));
410  viennacl::scheduler::execute(my_statement);
411  }
412  act_diff = std::fabs(diff(C, vcl_C));
413 
414  if ( act_diff > epsilon )
415  {
416  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
417  std::cout << " diff: " << act_diff << std::endl;
418  retval = EXIT_FAILURE;
419  }
420  else
421  std::cout << "Test C -= trans(A) * trans(B) passed!" << std::endl;
422 
423 
424 
425 
426  return retval;
427 }
428 
429 
430 
431 template< typename NumericT, typename F_A, typename F_B, typename F_C, typename Epsilon >
432 int test_prod(Epsilon const& epsilon)
433 {
434  int ret;
435 
437 
438  std::size_t matrix_size1 = 29; //some odd number, not too large
439  std::size_t matrix_size2 = 47; //some odd number, not too large
440  std::size_t matrix_size3 = 33; //some odd number, not too large
441  //std::size_t matrix_size1 = 128; //some odd number, not too large
442  //std::size_t matrix_size2 = 64; //some odd number, not too large
443  //std::size_t matrix_size3 = 128; //some odd number, not too large
444  //std::size_t matrix_size1 = 256; // for testing AMD kernels
445  //std::size_t matrix_size2 = 256; // for testing AMD kernels
446  //std::size_t matrix_size3 = 256; // for testing AMD kernels
447 
448  // --------------------------------------------------------------------------
449 
450  // ublas reference:
451  std::vector<std::vector<NumericT> > A(matrix_size1, std::vector<NumericT>(matrix_size2));
452  std::vector<std::vector<NumericT> > big_A(4*matrix_size1, std::vector<NumericT>(4*matrix_size2, NumericT(3.1415)));
453 
454  std::vector<std::vector<NumericT> > B(matrix_size2, std::vector<NumericT>(matrix_size3));
455  std::vector<std::vector<NumericT> > big_B(4*matrix_size2, std::vector<NumericT>(4*matrix_size3, NumericT(42.0)));
456 
457  std::vector<std::vector<NumericT> > C(matrix_size1, std::vector<NumericT>(matrix_size3));
458 
459  //fill A and B:
460  for (std::size_t i = 0; i < A.size(); ++i)
461  for (std::size_t j = 0; j < A[0].size(); ++j)
462  A[i][j] = static_cast<NumericT>(0.1) * randomNumber();
463  for (std::size_t i = 0; i < B.size(); ++i)
464  for (std::size_t j = 0; j < B[0].size(); ++j)
465  B[i][j] = static_cast<NumericT>(0.1) * randomNumber();
466 
467  std::vector<std::vector<NumericT> > A_trans(A[0].size(), std::vector<NumericT>(A.size()));
468  for (std::size_t i = 0; i < A.size(); ++i)
469  for (std::size_t j = 0; j < A[0].size(); ++j)
470  A_trans[j][i] = A[i][j];
471 
472  std::vector<std::vector<NumericT> > big_A_trans(big_A[0].size(), std::vector<NumericT>(big_A.size()));
473  for (std::size_t i = 0; i < big_A.size(); ++i)
474  for (std::size_t j = 0; j < big_A[0].size(); ++j)
475  big_A_trans[j][i] = big_A[i][j];
476 
477 
478  std::vector<std::vector<NumericT> > B_trans(B[0].size(), std::vector<NumericT>(B.size()));
479  for (std::size_t i = 0; i < B.size(); ++i)
480  for (std::size_t j = 0; j < B[0].size(); ++j)
481  B_trans[j][i] = B[i][j];
482 
483  std::vector<std::vector<NumericT> > big_B_trans(big_B[0].size(), std::vector<NumericT>(big_B.size()));
484  for (std::size_t i = 0; i < big_B.size(); ++i)
485  for (std::size_t j = 0; j < big_B[0].size(); ++j)
486  big_B_trans[j][i] = big_B[i][j];
487 
488  //
489  // ViennaCL objects
490  //
491 
492  // A
493  viennacl::range range1_A(matrix_size1, 2*matrix_size1);
494  viennacl::range range2_A(matrix_size2, 2*matrix_size2);
495  viennacl::slice slice1_A(matrix_size1, 2, matrix_size1);
496  viennacl::slice slice2_A(matrix_size2, 3, matrix_size2);
497 
498  viennacl::matrix<NumericT, F_A> vcl_A(matrix_size1, matrix_size2);
499  viennacl::copy(A, vcl_A);
500 
501  viennacl::matrix<NumericT, F_A> vcl_big_range_A(4*matrix_size1, 4*matrix_size2);
502  viennacl::matrix_range<viennacl::matrix<NumericT, F_A> > vcl_range_A(vcl_big_range_A, range1_A, range2_A);
503  viennacl::copy(A, vcl_range_A);
504 
505  viennacl::matrix<NumericT, F_A> vcl_big_slice_A(4*matrix_size1, 4*matrix_size2);
506  viennacl::matrix_slice<viennacl::matrix<NumericT, F_A> > vcl_slice_A(vcl_big_slice_A, slice1_A, slice2_A);
507  viennacl::copy(A, vcl_slice_A);
508 
509 
510  // A^T
511  viennacl::matrix<NumericT, F_A> vcl_A_trans(matrix_size2, matrix_size1);
512  viennacl::copy(A_trans, vcl_A_trans);
513 
514  viennacl::matrix<NumericT, F_A> vcl_big_range_A_trans(4*matrix_size2, 4*matrix_size1);
515  viennacl::matrix_range<viennacl::matrix<NumericT, F_A> > vcl_range_A_trans(vcl_big_range_A_trans, range2_A, range1_A);
516  viennacl::copy(A_trans, vcl_range_A_trans);
517 
518  viennacl::matrix<NumericT, F_A> vcl_big_slice_A_trans(4*matrix_size2, 4*matrix_size1);
519  viennacl::matrix_slice<viennacl::matrix<NumericT, F_A> > vcl_slice_A_trans(vcl_big_slice_A_trans, slice2_A, slice1_A);
520  viennacl::copy(A_trans, vcl_slice_A_trans);
521 
522 
523 
524  // B
525  viennacl::range range1_B(2*matrix_size2, 3*matrix_size2);
526  viennacl::range range2_B(2*matrix_size3, 3*matrix_size3);
527  viennacl::slice slice1_B(matrix_size2, 3, matrix_size2);
528  viennacl::slice slice2_B(matrix_size3, 2, matrix_size3);
529 
530  viennacl::matrix<NumericT, F_B> vcl_B(matrix_size2, matrix_size3);
531  viennacl::copy(B, vcl_B);
532 
533  viennacl::matrix<NumericT, F_B> vcl_big_range_B(4*matrix_size2, 4*matrix_size3);
534  viennacl::matrix_range<viennacl::matrix<NumericT, F_B> > vcl_range_B(vcl_big_range_B, range1_B, range2_B);
535  viennacl::copy(B, vcl_range_B);
536 
537  viennacl::matrix<NumericT, F_B> vcl_big_slice_B(4*matrix_size2, 4*matrix_size3);
538  viennacl::matrix_slice<viennacl::matrix<NumericT, F_B> > vcl_slice_B(vcl_big_slice_B, slice1_B, slice2_B);
539  viennacl::copy(B, vcl_slice_B);
540 
541 
542  // B^T
543 
544  viennacl::matrix<NumericT, F_B> vcl_B_trans(matrix_size3, matrix_size2);
545  viennacl::copy(B_trans, vcl_B_trans);
546 
547  viennacl::matrix<NumericT, F_B> vcl_big_range_B_trans(4*matrix_size3, 4*matrix_size2);
548  viennacl::matrix_range<viennacl::matrix<NumericT, F_B> > vcl_range_B_trans(vcl_big_range_B_trans, range2_B, range1_B);
549  viennacl::copy(B_trans, vcl_range_B_trans);
550 
551  viennacl::matrix<NumericT, F_B> vcl_big_slice_B_trans(4*matrix_size3, 4*matrix_size2);
552  viennacl::matrix_slice<viennacl::matrix<NumericT, F_B> > vcl_slice_B_trans(vcl_big_slice_B_trans, slice2_B, slice1_B);
553  viennacl::copy(B_trans, vcl_slice_B_trans);
554 
555 
556  // C
557 
558  viennacl::range range1_C(matrix_size1-1, 2*matrix_size1-1);
559  viennacl::range range2_C(matrix_size3-1, 2*matrix_size3-1);
560  viennacl::slice slice1_C(matrix_size1-1, 3, matrix_size1);
561  viennacl::slice slice2_C(matrix_size3-1, 3, matrix_size3);
562 
563  viennacl::matrix<NumericT, F_C> vcl_C(matrix_size1, matrix_size3);
564 
565  viennacl::matrix<NumericT, F_C> vcl_big_range_C(4*matrix_size1, 4*matrix_size3);
566  viennacl::matrix_range<viennacl::matrix<NumericT, F_C> > vcl_range_C(vcl_big_range_C, range1_C, range2_C);
567 
568  viennacl::matrix<NumericT, F_C> vcl_big_slice_C(4*matrix_size1, 4*matrix_size3);
569  viennacl::matrix_slice<viennacl::matrix<NumericT, F_C> > vcl_slice_C(vcl_big_slice_C, slice1_C, slice2_C);
570 
571 
572  std::cout << "--- Part 1: Testing matrix-matrix products ---" << std::endl;
573 
577 
578  //
579  //
580  std::cout << "Now using A=matrix, B=matrix, C=matrix" << std::endl;
581  ret = test_prod<NumericT>(epsilon,
582  A, A_trans, B, B_trans, C,
583  vcl_A, vcl_A_trans,
584  vcl_B, vcl_B_trans,
585  vcl_C);
586  if (ret != EXIT_SUCCESS)
587  return ret;
588 
589 
590  //
591  //
592  std::cout << "Now using A=matrix, B=matrix, C=range" << std::endl;
593  ret = test_prod<NumericT>(epsilon,
594  A, A_trans, B, B_trans, C,
595  vcl_A, vcl_A_trans,
596  vcl_B, vcl_B_trans,
597  vcl_range_C);
598  if (ret != EXIT_SUCCESS)
599  return ret;
600 
601  //
602  //
603  std::cout << "Now using A=matrix, B=matrix, C=slice" << std::endl;
604  ret = test_prod<NumericT>(epsilon,
605  A, A_trans, B, B_trans, C,
606  vcl_A, vcl_A_trans,
607  vcl_B, vcl_B_trans,
608  vcl_slice_C);
609  if (ret != EXIT_SUCCESS)
610  return ret;
611 
612 
613 
614  //
615  //
616  std::cout << "Now using A=matrix, B=range, C=matrix" << std::endl;
617  ret = test_prod<NumericT>(epsilon,
618  A, A_trans, B, B_trans, C,
619  vcl_A, vcl_A_trans,
620  vcl_range_B, vcl_range_B_trans,
621  vcl_C);
622  if (ret != EXIT_SUCCESS)
623  return ret;
624 
625 
626  //
627  //
628  std::cout << "Now using A=matrix, B=range, C=range" << std::endl;
629  ret = test_prod<NumericT>(epsilon,
630  A, A_trans, B, B_trans, C,
631  vcl_A, vcl_A_trans,
632  vcl_range_B, vcl_range_B_trans,
633  vcl_range_C);
634  if (ret != EXIT_SUCCESS)
635  return ret;
636 
637  //
638  //
639  std::cout << "Now using A=matrix, B=range, C=slice" << std::endl;
640  ret = test_prod<NumericT>(epsilon,
641  A, A_trans, B, B_trans, C,
642  vcl_A, vcl_A_trans,
643  vcl_range_B, vcl_range_B_trans,
644  vcl_slice_C);
645  if (ret != EXIT_SUCCESS)
646  return ret;
647 
648 
649  //
650  //
651  std::cout << "Now using A=matrix, B=slice, C=matrix" << std::endl;
652  ret = test_prod<NumericT>(epsilon,
653  A, A_trans, B, B_trans, C,
654  vcl_A, vcl_A_trans,
655  vcl_slice_B, vcl_slice_B_trans,
656  vcl_C);
657  if (ret != EXIT_SUCCESS)
658  return ret;
659 
660 
661  //
662  //
663  std::cout << "Now using A=matrix, B=slice, C=range" << std::endl;
664  ret = test_prod<NumericT>(epsilon,
665  A, A_trans, B, B_trans, C,
666  vcl_A, vcl_A_trans,
667  vcl_slice_B, vcl_slice_B_trans,
668  vcl_range_C);
669  if (ret != EXIT_SUCCESS)
670  return ret;
671 
672  //
673  //
674  std::cout << "Now using A=matrix, B=slice, C=slice" << std::endl;
675  ret = test_prod<NumericT>(epsilon,
676  A, A_trans, B, B_trans, C,
677  vcl_A, vcl_A_trans,
678  vcl_slice_B, vcl_slice_B_trans,
679  vcl_slice_C);
680  if (ret != EXIT_SUCCESS)
681  return ret;
682 
683 
687 
688  //
689  //
690  std::cout << "Now using A=range, B=matrix, C=matrix" << std::endl;
691  ret = test_prod<NumericT>(epsilon,
692  A, A_trans, B, B_trans, C,
693  vcl_range_A, vcl_range_A_trans,
694  vcl_B, vcl_B_trans,
695  vcl_C);
696  if (ret != EXIT_SUCCESS)
697  return ret;
698 
699 
700  //
701  //
702  std::cout << "Now using A=range, B=matrix, C=range" << std::endl;
703  ret = test_prod<NumericT>(epsilon,
704  A, A_trans, B, B_trans, C,
705  vcl_range_A, vcl_range_A_trans,
706  vcl_B, vcl_B_trans,
707  vcl_range_C);
708  if (ret != EXIT_SUCCESS)
709  return ret;
710 
711  //
712  //
713  std::cout << "Now using A=range, B=matrix, C=slice" << std::endl;
714  ret = test_prod<NumericT>(epsilon,
715  A, A_trans, B, B_trans, C,
716  vcl_range_A, vcl_range_A_trans,
717  vcl_B, vcl_B_trans,
718  vcl_slice_C);
719  if (ret != EXIT_SUCCESS)
720  return ret;
721 
722 
723 
724  //
725  //
726  std::cout << "Now using A=range, B=range, C=matrix" << std::endl;
727  ret = test_prod<NumericT>(epsilon,
728  A, A_trans, B, B_trans, C,
729  vcl_range_A, vcl_range_A_trans,
730  vcl_range_B, vcl_range_B_trans,
731  vcl_C);
732  if (ret != EXIT_SUCCESS)
733  return ret;
734 
735 
736  //
737  //
738  std::cout << "Now using A=range, B=range, C=range" << std::endl;
739  ret = test_prod<NumericT>(epsilon,
740  A, A_trans, B, B_trans, C,
741  vcl_range_A, vcl_range_A_trans,
742  vcl_range_B, vcl_range_B_trans,
743  vcl_range_C);
744  if (ret != EXIT_SUCCESS)
745  return ret;
746 
747  //
748  //
749  std::cout << "Now using A=range, B=range, C=slice" << std::endl;
750  ret = test_prod<NumericT>(epsilon,
751  A, A_trans, B, B_trans, C,
752  vcl_range_A, vcl_range_A_trans,
753  vcl_range_B, vcl_range_B_trans,
754  vcl_slice_C);
755  if (ret != EXIT_SUCCESS)
756  return ret;
757 
758 
759  //
760  //
761  std::cout << "Now using A=range, B=slice, C=matrix" << std::endl;
762  ret = test_prod<NumericT>(epsilon,
763  A, A_trans, B, B_trans, C,
764  vcl_range_A, vcl_range_A_trans,
765  vcl_slice_B, vcl_slice_B_trans,
766  vcl_C);
767  if (ret != EXIT_SUCCESS)
768  return ret;
769 
770 
771  //
772  //
773  std::cout << "Now using A=range, B=slice, C=range" << std::endl;
774  ret = test_prod<NumericT>(epsilon,
775  A, A_trans, B, B_trans, C,
776  vcl_range_A, vcl_range_A_trans,
777  vcl_slice_B, vcl_slice_B_trans,
778  vcl_range_C);
779  if (ret != EXIT_SUCCESS)
780  return ret;
781 
782  //
783  //
784  std::cout << "Now using A=range, B=slice, C=slice" << std::endl;
785  ret = test_prod<NumericT>(epsilon,
786  A, A_trans, B, B_trans, C,
787  vcl_range_A, vcl_range_A_trans,
788  vcl_slice_B, vcl_slice_B_trans,
789  vcl_slice_C);
790  if (ret != EXIT_SUCCESS)
791  return ret;
792 
793 
794 
798 
799  //
800  //
801  std::cout << "Now using A=slice, B=matrix, C=matrix" << std::endl;
802  ret = test_prod<NumericT>(epsilon,
803  A, A_trans, B, B_trans, C,
804  vcl_slice_A, vcl_slice_A_trans,
805  vcl_B, vcl_B_trans,
806  vcl_C);
807  if (ret != EXIT_SUCCESS)
808  return ret;
809 
810 
811  //
812  //
813  std::cout << "Now using A=slice, B=matrix, C=range" << std::endl;
814  ret = test_prod<NumericT>(epsilon,
815  A, A_trans, B, B_trans, C,
816  vcl_slice_A, vcl_slice_A_trans,
817  vcl_B, vcl_B_trans,
818  vcl_range_C);
819  if (ret != EXIT_SUCCESS)
820  return ret;
821 
822  //
823  //
824  std::cout << "Now using A=slice, B=matrix, C=slice" << std::endl;
825  ret = test_prod<NumericT>(epsilon,
826  A, A_trans, B, B_trans, C,
827  vcl_slice_A, vcl_slice_A_trans,
828  vcl_B, vcl_B_trans,
829  vcl_slice_C);
830  if (ret != EXIT_SUCCESS)
831  return ret;
832 
833 
834 
835  //
836  //
837  std::cout << "Now using A=slice, B=range, C=matrix" << std::endl;
838  ret = test_prod<NumericT>(epsilon,
839  A, A_trans, B, B_trans, C,
840  vcl_slice_A, vcl_slice_A_trans,
841  vcl_range_B, vcl_range_B_trans,
842  vcl_C);
843  if (ret != EXIT_SUCCESS)
844  return ret;
845 
846 
847  //
848  //
849  std::cout << "Now using A=slice, B=range, C=range" << std::endl;
850  ret = test_prod<NumericT>(epsilon,
851  A, A_trans, B, B_trans, C,
852  vcl_slice_A, vcl_slice_A_trans,
853  vcl_range_B, vcl_range_B_trans,
854  vcl_range_C);
855  if (ret != EXIT_SUCCESS)
856  return ret;
857 
858  //
859  //
860  std::cout << "Now using A=slice, B=range, C=slice" << std::endl;
861  ret = test_prod<NumericT>(epsilon,
862  A, A_trans, B, B_trans, C,
863  vcl_slice_A, vcl_slice_A_trans,
864  vcl_range_B, vcl_range_B_trans,
865  vcl_slice_C);
866  if (ret != EXIT_SUCCESS)
867  return ret;
868 
869 
870  //
871  //
872  std::cout << "Now using A=slice, B=slice, C=matrix" << std::endl;
873  ret = test_prod<NumericT>(epsilon,
874  A, A_trans, B, B_trans, C,
875  vcl_slice_A, vcl_slice_A_trans,
876  vcl_slice_B, vcl_slice_B_trans,
877  vcl_C);
878  if (ret != EXIT_SUCCESS)
879  return ret;
880 
881 
882  //
883  //
884  std::cout << "Now using A=slice, B=slice, C=range" << std::endl;
885  ret = test_prod<NumericT>(epsilon,
886  A, A_trans, B, B_trans, C,
887  vcl_slice_A, vcl_slice_A_trans,
888  vcl_slice_B, vcl_slice_B_trans,
889  vcl_range_C);
890  if (ret != EXIT_SUCCESS)
891  return ret;
892 
893  //
894  //
895  std::cout << "Now using A=slice, B=slice, C=slice" << std::endl;
896  ret = test_prod<NumericT>(epsilon,
897  A, A_trans, B, B_trans, C,
898  vcl_slice_A, vcl_slice_A_trans,
899  vcl_slice_B, vcl_slice_B_trans,
900  vcl_slice_C);
901  if (ret != EXIT_SUCCESS)
902  return ret;
903 
904 
905  return ret;
906 
907 }
908 
909 
910 //
911 // Control functions
912 //
913 
914 
915 
916 template< typename NumericT, typename Epsilon >
917 int test(Epsilon const& epsilon)
918 {
919  int ret;
920 
921  std::cout << "///////////////////////////////////////" << std::endl;
922  std::cout << "/// Now testing A=row, B=row, C=row ///" << std::endl;
923  std::cout << "///////////////////////////////////////" << std::endl;
924  ret = test_prod<NumericT, viennacl::row_major, viennacl::row_major, viennacl::row_major>(epsilon);
925  if (ret != EXIT_SUCCESS)
926  return ret;
927 
928  std::cout << "///////////////////////////////////////" << std::endl;
929  std::cout << "/// Now testing A=row, B=row, C=col ///" << std::endl;
930  std::cout << "///////////////////////////////////////" << std::endl;
931  ret = test_prod<NumericT, viennacl::row_major, viennacl::row_major, viennacl::column_major>(epsilon);
932  if (ret != EXIT_SUCCESS)
933  return ret;
934 
935  std::cout << "///////////////////////////////////////" << std::endl;
936  std::cout << "/// Now testing A=row, B=col, C=row ///" << std::endl;
937  std::cout << "///////////////////////////////////////" << std::endl;
938  ret = test_prod<NumericT, viennacl::row_major, viennacl::column_major, viennacl::row_major>(epsilon);
939  if (ret != EXIT_SUCCESS)
940  return ret;
941 
942  std::cout << "///////////////////////////////////////" << std::endl;
943  std::cout << "/// Now testing A=row, B=col, C=col ///" << std::endl;
944  std::cout << "///////////////////////////////////////" << std::endl;
945  ret = test_prod<NumericT, viennacl::row_major, viennacl::column_major, viennacl::column_major>(epsilon);
946  if (ret != EXIT_SUCCESS)
947  return ret;
948 
949  std::cout << "///////////////////////////////////////" << std::endl;
950  std::cout << "/// Now testing A=col, B=row, C=row ///" << std::endl;
951  std::cout << "///////////////////////////////////////" << std::endl;
952  ret = test_prod<NumericT, viennacl::column_major, viennacl::row_major, viennacl::row_major>(epsilon);
953  if (ret != EXIT_SUCCESS)
954  return ret;
955 
956  std::cout << "///////////////////////////////////////" << std::endl;
957  std::cout << "/// Now testing A=col, B=row, C=col ///" << std::endl;
958  std::cout << "///////////////////////////////////////" << std::endl;
959  ret = test_prod<NumericT, viennacl::column_major, viennacl::row_major, viennacl::column_major>(epsilon);
960  if (ret != EXIT_SUCCESS)
961  return ret;
962 
963  std::cout << "///////////////////////////////////////" << std::endl;
964  std::cout << "/// Now testing A=col, B=col, C=row ///" << std::endl;
965  std::cout << "///////////////////////////////////////" << std::endl;
966  ret = test_prod<NumericT, viennacl::column_major, viennacl::column_major, viennacl::row_major>(epsilon);
967  if (ret != EXIT_SUCCESS)
968  return ret;
969 
970  std::cout << "///////////////////////////////////////" << std::endl;
971  std::cout << "/// Now testing A=col, B=col, C=col ///" << std::endl;
972  std::cout << "///////////////////////////////////////" << std::endl;
973  ret = test_prod<NumericT, viennacl::column_major, viennacl::column_major, viennacl::column_major>(epsilon);
974  if (ret != EXIT_SUCCESS)
975  return ret;
976 
977 
978 
979  return ret;
980 }
981 
982 //
983 // -------------------------------------------------------------
984 //
985 int main()
986 {
987  std::cout << std::endl;
988  std::cout << "----------------------------------------------" << std::endl;
989  std::cout << "----------------------------------------------" << std::endl;
990  std::cout << "## Test :: BLAS 3 routines" << std::endl;
991  std::cout << "----------------------------------------------" << std::endl;
992  std::cout << "----------------------------------------------" << std::endl;
993  std::cout << std::endl;
994 
995  int retval = EXIT_SUCCESS;
996 
997  std::cout << std::endl;
998  std::cout << "----------------------------------------------" << std::endl;
999  std::cout << std::endl;
1000  {
1001  typedef float NumericT;
1002  NumericT epsilon = NumericT(1.0E-3);
1003  std::cout << "# Testing setup:" << std::endl;
1004  std::cout << " eps: " << epsilon << std::endl;
1005  std::cout << " numeric: float" << std::endl;
1006  retval = test<NumericT>(epsilon);
1007  if ( retval == EXIT_SUCCESS )
1008  std::cout << "# Test passed" << std::endl;
1009  else
1010  return retval;
1011  }
1012  std::cout << std::endl;
1013  std::cout << "----------------------------------------------" << std::endl;
1014  std::cout << std::endl;
1015 #ifdef VIENNACL_WITH_OPENCL
1017 #endif
1018  {
1019  {
1020  typedef double NumericT;
1021  NumericT epsilon = 1.0E-11;
1022  std::cout << "# Testing setup:" << std::endl;
1023  std::cout << " eps: " << epsilon << std::endl;
1024  std::cout << " numeric: double" << std::endl;
1025  retval = test<NumericT>(epsilon);
1026  if ( retval == EXIT_SUCCESS )
1027  std::cout << "# Test passed" << std::endl;
1028  else
1029  return retval;
1030  }
1031  std::cout << std::endl;
1032  std::cout << "----------------------------------------------" << std::endl;
1033  std::cout << std::endl;
1034  }
1035 
1036  std::cout << std::endl;
1037  std::cout << "------- Test completed --------" << std::endl;
1038  std::cout << std::endl;
1039 
1040 
1041  return retval;
1042 }
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
Class for representing strided submatrices of a bigger matrix A.
Definition: forwards.h:443
std::vector< std::vector< NumericT > > trans(std::vector< std::vector< NumericT > > const &A)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
Some helper routines for reading/writing/printing scheduler expressions.
A tag class representing assignment.
Definition: forwards.h:81
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
A dense matrix class.
Definition: forwards.h:375
void execute(statement const &s)
Definition: execute.hpp:279
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
A tag class representing inplace addition.
Definition: forwards.h:83
float NumericT
Definition: bisect.cpp:40
viennacl::vector< float > v1
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239
Random number generator for returning uniformly distributed values in the closed interval [0...
Definition: random.hpp:44
int test_prod(Epsilon const &epsilon, ReferenceMatrixTypeA const &A, ReferenceMatrixTypeA const &A_trans, ReferenceMatrixTypeB const &B, ReferenceMatrixTypeB const &B_trans, ReferenceMatrixTypeC &C, MatrixTypeA const &vcl_A, MatrixTypeA const &vcl_A_trans, MatrixTypeB const &vcl_B, MatrixTypeB const &vcl_B_trans, MatrixTypeC &vcl_C)
ScalarType diff(ScalarType &s1, viennacl::scalar< ScalarType > &s2)
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
Implementations of dense direct solvers are found here.
Proxy classes for matrices.
A tag class representing inplace subtraction.
Definition: forwards.h:85
viennacl::vector< int > v2
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
T norm_inf(std::vector< T, A > const &v1)
Definition: norm_inf.hpp:60
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A small collection of sequential random number generators.
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:424
float ScalarType
Definition: fft_1d.cpp:42
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
int test(Epsilon const &epsilon)
Class for representing non-strided submatrices of a bigger matrix A.
Definition: forwards.h:440
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:502
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:429
Implementation of the ViennaCL scalar class.