ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
template_base.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_DEVICE_SPECIFIC_TEMPLATES_TEMPLATE_BASE_
2 #define VIENNACL_DEVICE_SPECIFIC_TEMPLATES_TEMPLATE_BASE_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 
27 #include <list>
28 #include <set>
29 
30 #include "viennacl/ocl/kernel.hpp"
31 #include "viennacl/ocl/device.hpp"
33 
36 
41 
42 namespace viennacl
43 {
44 namespace device_specific
45 {
46 
48 {
52 };
53 
55 {
56 public:
58  {
59  parameters_type(unsigned int _simd_width, unsigned int _local_size_1, unsigned int _local_size_2, unsigned int _num_kernels) : simd_width(_simd_width), local_size_0(_local_size_1), local_size_1(_local_size_2), num_kernels(_num_kernels){ }
60 
61  unsigned int simd_width;
62  unsigned int local_size_0;
63  unsigned int local_size_1;
64  unsigned int num_kernels;
65  };
66 
67 private:
69  class map_functor : public tree_parsing::traversal_functor
70  {
71 
72  scheduler::statement_node_numeric_type numeric_type(scheduler::statement const * statement, vcl_size_t root_idx) const
73  {
74  scheduler::statement_node const * root_node = &statement->array()[root_idx];
76  root_node = &statement->array()[root_node->lhs.node_index];
77  return root_node->lhs.numeric_type;
78  }
79 
80  public:
81  typedef tools::shared_ptr<mapped_object> result_type;
82 
83  map_functor(symbolic_binder & binder, mapping_type & mapping) : binder_(binder), mapping_(mapping){ }
84 
86  template<class T>
87  result_type binary_leaf(scheduler::statement const * statement, vcl_size_t root_idx, mapping_type const * mapping) const
88  {
89  return result_type(new T(utils::numeric_type_to_string(numeric_type(statement,root_idx)), binder_.get(NULL), mapped_object::node_info(mapping, statement, root_idx)));
90  }
91 
92  template<class NumericT>
93  result_type operator()(NumericT const & /*scalar*/) const
94  {
95  return result_type(new mapped_host_scalar(utils::type_to_string<NumericT>::value(), binder_.get(NULL)));
96  }
97 
99  template<class NumericT>
100  result_type operator()(scalar<NumericT> const & scal) const
101  {
102  return result_type(new mapped_scalar(utils::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(scal))));
103  }
104 
106  template<class NumericT>
107  result_type operator()(vector_base<NumericT> const & vec) const
108  {
109  return result_type(new mapped_vector(utils::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(vec))));
110  }
111 
113  template<class NumericT>
114  result_type operator()(implicit_vector_base<NumericT> const & /*vec*/) const
115  {
116  return result_type(new mapped_implicit_vector(utils::type_to_string<NumericT>::value(), binder_.get(NULL)));
117  }
118 
120  template<class NumericT>
121  result_type operator()(matrix_base<NumericT> const & mat) const
122  {
123  return result_type(new mapped_matrix(utils::type_to_string<NumericT>::value(), binder_.get(&viennacl::traits::handle(mat)),
125  }
126 
128  template<class NumericT>
129  result_type operator()(implicit_matrix_base<NumericT> const & /*mat*/) const
130  {
131  return result_type(new mapped_implicit_matrix(utils::type_to_string<NumericT>::value(), binder_.get(NULL)));
132  }
133 
135  void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t leaf_t) const {
136  mapping_type::key_type key(root_idx, leaf_t);
137  scheduler::statement_node const & root_node = statement.array()[root_idx];
138 
139  if (leaf_t == LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
140  mapping_.insert(mapping_type::value_type(key, utils::call_on_element(root_node.lhs, *this)));
141  else if (leaf_t == RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
142  mapping_.insert(mapping_type::value_type(key, utils::call_on_element(root_node.rhs, *this)));
143  else if ( leaf_t== PARENT_NODE_TYPE)
144  {
145  if (root_node.op.type==scheduler::OPERATION_BINARY_VECTOR_DIAG_TYPE)
146  mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_vector_diag>(&statement, root_idx, &mapping_)));
147  else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_DIAG_TYPE)
148  mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_diag>(&statement, root_idx, &mapping_)));
149  else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_ROW_TYPE)
150  mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_row>(&statement, root_idx, &mapping_)));
151  else if (root_node.op.type==scheduler::OPERATION_BINARY_MATRIX_COLUMN_TYPE)
152  mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_column>(&statement, root_idx, &mapping_)));
153  else if (is_scalar_reduction(root_node))
154  mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_scalar_reduction>(&statement, root_idx, &mapping_)));
155  else if (is_vector_reduction(root_node))
156  mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_row_wise_reduction>(&statement, root_idx, &mapping_)));
157  else if (root_node.op.type == scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE)
158  mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_product>(&statement, root_idx, &mapping_)));
159  else if (root_node.op.type == scheduler::OPERATION_UNARY_TRANS_TYPE)
160  mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_trans>(&statement, root_idx, &mapping_)));
161  }
162  }
163 
164  private:
165  symbolic_binder & binder_;
166  mapping_type & mapping_;
167  };
168 
170  class prototype_generation_traversal : public tree_parsing::traversal_functor
171  {
172  private:
173  std::set<std::string> & already_generated_;
174  std::string & str_;
175  mapping_type const & mapping_;
176  std::map<std::string, unsigned int> const & widths_;
177  public:
178  prototype_generation_traversal(std::set<std::string> & already_generated, std::string & str, mapping_type const & mapping, std::map<std::string, unsigned int> const & widths) :
179  already_generated_(already_generated), str_(str), mapping_(mapping), widths_(widths){ }
180 
181  void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t leaf) const
182  {
183  scheduler::statement_node const & root_node = statement.array()[root_idx];
184  if ( (leaf==LHS_NODE_TYPE && root_node.lhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY)
185  ||(leaf==RHS_NODE_TYPE && root_node.rhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY) )
186  {
187  mapped_object * obj = at(mapping_, std::make_pair(root_idx,leaf)).get();
188  if(widths_.find(obj->name())!=widths_.end())
189  obj->append_kernel_arguments(already_generated_, str_, at(widths_, obj->name()));
190  else
191  obj->append_kernel_arguments(already_generated_, str_, 1);
192  }
193  }
194  };
195 
196 
197 
199  class set_arguments_functor : public tree_parsing::traversal_functor
200  {
201  public:
202  typedef void result_type;
203 
204  set_arguments_functor(symbolic_binder & binder, unsigned int & current_arg, viennacl::ocl::kernel & kernel) : binder_(binder), current_arg_(current_arg), kernel_(kernel){ }
205 
206  template<class NumericT>
207  result_type operator()(NumericT const & scal) const {
208  typedef typename viennacl::result_of::cl_type<NumericT>::type cl_scalartype;
209  kernel_.arg(current_arg_++, cl_scalartype(scal));
210  }
211 
213  template<class NumericT>
214  result_type operator()(scalar<NumericT> const & scal) const {
215  if (binder_.bind(&viennacl::traits::handle(scal)))
216  kernel_.arg(current_arg_++, scal.handle().opencl_handle());
217  }
218 
220  template<class NumericT>
221  result_type operator()(vector_base<NumericT> const & vec) const {
222  if (binder_.bind(&viennacl::traits::handle(vec)))
223  {
224  kernel_.arg(current_arg_++, vec.handle().opencl_handle());
225  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start(vec)));
226  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride(vec)));
227  }
228  }
229 
231  template<class NumericT>
232  result_type operator()(implicit_vector_base<NumericT> const & vec) const
233  {
234  typedef typename viennacl::result_of::cl_type<NumericT>::type cl_scalartype;
235  kernel_.arg(current_arg_++, cl_scalartype(vec.value()));
236  if (vec.has_index())
237  kernel_.arg(current_arg_++, cl_uint(vec.index()));
238  }
239 
241  template<class NumericT>
242  result_type operator()(matrix_base<NumericT> const & mat) const
243  {
244  if (binder_.bind(&viennacl::traits::handle(mat)))
245  {
246  kernel_.arg(current_arg_++, mat.handle().opencl_handle());
247  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::ld(mat)));
248  if (mat.row_major())
249  {
250  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start2(mat)));
251  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start1(mat)));
252  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride2(mat)));
253  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride1(mat)));
254  }
255  else
256  {
257  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start1(mat)));
258  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start2(mat)));
259  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride1(mat)));
260  kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride2(mat)));
261  }
262  }
263  }
264 
266  template<class NumericT>
267  result_type operator()(implicit_matrix_base<NumericT> const & mat) const
268  {
269  kernel_.arg(current_arg_++, typename viennacl::result_of::cl_type<NumericT>::type(mat.value()));
270  }
271 
273  void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t leaf_t) const
274  {
275  scheduler::statement_node const & root_node = statement.array()[root_idx];
276  if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
277  utils::call_on_element(root_node.lhs, *this);
278  else if (leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY)
279  utils::call_on_element(root_node.rhs, *this);
280  }
281 
282  private:
283  symbolic_binder & binder_;
284  unsigned int & current_arg_;
285  viennacl::ocl::kernel & kernel_;
286  };
287 
288 protected:
289 
290  static void generate_prototype(utils::kernel_generation_stream & stream, std::string const & name, std::string const & first_arguments, std::vector<mapping_type> const & mappings, statements_container const &statements,
291  std::map<std::string, unsigned int> const & widths)
292  {
293  statements_container::data_type::const_iterator sit;
294  std::vector<mapping_type>::const_iterator mit;
295  std::set<std::string> already_generated;
296 
297  std::string arguments = first_arguments;
298  for (mit = mappings.begin(), sit = statements.data().begin(); sit != statements.data().end(); ++sit, ++mit)
299  tree_parsing::traverse(*sit, sit->root(), prototype_generation_traversal(already_generated, arguments, *mit, widths), true);
300  arguments.erase(arguments.size()-1); //Last comma pruned
301  stream << "__kernel " << "void " << name << "(" << arguments << ")" << std::endl;
302  }
303 
304  static void generate_prototype(utils::kernel_generation_stream & stream, std::string const & name, std::string const & first_arguments, std::vector<mapping_type> const & mappings, statements_container const & statements)
305  {
306  generate_prototype(stream, name, first_arguments, mappings, statements, std::map<std::string, unsigned int>());
307  }
308 
309  void set_arguments(statements_container const & statements, viennacl::ocl::kernel & kernel, unsigned int & current_arg)
310  {
311  tools::shared_ptr<symbolic_binder> binder = make_binder(binding_policy_);
312  for (statements_container::data_type::const_iterator itt = statements.data().begin(); itt != statements.data().end(); ++itt)
313  tree_parsing::traverse(*itt, itt->root(), set_arguments_functor(*binder,current_arg,kernel), true);
314  }
315 
316  class invalid_template_exception : public std::exception
317  {
318  public:
319  invalid_template_exception() : message_() {}
320  invalid_template_exception(std::string message) :
321  message_("ViennaCL: Internal error: The generator cannot apply the given template to the given statement: " + message + "\n"
322  "If you are using a builtin template, please report on viennacl-support@lists.sourceforge.net! We will provide a fix as soon as possible\n"
323  "If you are using your own template, please try using other parameters") {}
324  virtual const char* what() const throw() { return message_.c_str(); }
325  virtual ~invalid_template_exception() throw() {}
326  private:
327  std::string message_;
328  };
329 
330  static void fetching_loop_info(fetching_policy_type policy, std::string const & bound, utils::kernel_generation_stream & stream, std::string & init, std::string & upper_bound, std::string & inc, std::string const & domain_id, std::string const & domain_size)
331  {
332  if (policy==FETCH_FROM_GLOBAL_STRIDED)
333  {
334  init = domain_id;
335  upper_bound = bound;
336  inc = domain_size;
337  }
338  else if (policy==FETCH_FROM_GLOBAL_CONTIGUOUS)
339  {
340  std::string chunk_size = "chunk_size";
341  std::string chunk_start = "chunk_start";
342  std::string chunk_end = "chunk_end";
343 
344  stream << "unsigned int " << chunk_size << " = (" << bound << "+" << domain_size << "-1)/" << domain_size << ";" << std::endl;
345  stream << "unsigned int " << chunk_start << " =" << domain_id << "*" << chunk_size << ";" << std::endl;
346  stream << "unsigned int " << chunk_end << " = min(" << chunk_start << "+" << chunk_size << ", " << bound << ");" << std::endl;
347  init = chunk_start;
348  upper_bound = chunk_end;
349  inc = "1";
350  }
351  }
352 
353  static bool is_node_trans(scheduler::statement::container_type const & array, vcl_size_t root_idx, leaf_t leaf_type)
354  {
355  bool res = false;
357  if (leaf_type==LHS_NODE_TYPE)
359  else
361  scheduler::statement_node const * node = &array[root_idx];
362  while ((node->*ptr).type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
363  {
364  if (array[(node->*ptr).node_index].op.type==scheduler::OPERATION_UNARY_TRANS_TYPE)
365  res = !res;
366  node = &array[(node->*ptr).node_index];
367  }
368  return res;
369  }
370 
371 protected:
372 
373  static std::string append_simd_suffix(std::string const & str, unsigned int i)
374  {
375  assert(i < 16);
376  static char suffixes[] = {'0','1','2','3','4','5','6','7','8','9',
377  'a','b','c','d','e','f'};
378  return str + tools::to_string(suffixes[i]);
379  }
380 
382  {
386  }
387 
388  static bool has_strided_access(statements_container const & statements)
389  {
390  for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it)
391  {
392  //checks for vectors
393  std::vector<scheduler::lhs_rhs_element> vectors;
395  for (std::vector<scheduler::lhs_rhs_element>::iterator itt = vectors.begin(); itt != vectors.end(); ++itt)
396  if (utils::call_on_vector(*itt, utils::stride_fun())>1)
397  return true;
398 
399  //checks for matrix
400  std::vector<scheduler::lhs_rhs_element> matrices;
402  for (std::vector<scheduler::lhs_rhs_element>::iterator itt = matrices.begin(); itt != matrices.end(); ++itt)
403  if (utils::call_on_matrix(*itt, utils::stride1_fun())>1 || utils::call_on_matrix(*itt, utils::stride2_fun())>2)
404  return true;
405 
406  std::vector<vcl_size_t> striding_operators;
407  tree_parsing::traverse(*it, it->root(), tree_parsing::filter(&is_striding_operator, striding_operators), false);
408  if(striding_operators.size() > 0)
409  return true;
410  }
411  return false;
412  }
413 
414  static vcl_size_t vector_size(scheduler::statement_node const & node, bool up_to_internal_size)
415  {
416  using namespace scheduler;
417  using namespace utils;
419  {
420  vcl_size_t size1 = up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun());
421  vcl_size_t size2 = up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun());
422  return std::min<vcl_size_t>(size1, size2);
423  }
425  return up_to_internal_size?call_on_matrix(node.lhs, internal_size2_fun()):call_on_matrix(node.lhs, size2_fun());
427  return up_to_internal_size?call_on_matrix(node.lhs, internal_size1_fun()):call_on_matrix(node.lhs, size1_fun());
428  else
429  return up_to_internal_size?call_on_vector(node.lhs, internal_size_fun()):call_on_vector(node.lhs, size_fun());
430  }
431 
432  //NB : templates are not used here because declaring a functor out of the generate() functions would be harder to read
434  {
435  virtual void operator()(utils::kernel_generation_stream & stream, unsigned int simd_width) const = 0;
436  virtual ~loop_body_base() {}
437  };
438 
439  static void element_wise_loop_1D(utils::kernel_generation_stream & stream, loop_body_base const & loop_body,
440  fetching_policy_type fetch, unsigned int simd_width, std::string const & i, std::string const & bound, std::string const & domain_id, std::string const & domain_size)
441  {
442  std::string strwidth = tools::to_string(simd_width);
443  std::string boundround = bound + "/" + strwidth;
444 
445  std::string init, upper_bound, inc;
446  fetching_loop_info(fetch, boundround, stream, init, upper_bound, inc, domain_id, domain_size);
447  stream << "for(unsigned int " << i << " = " << init << "; " << i << " < " << upper_bound << "; " << i << " += " << inc << ")" << std::endl;
448  stream << "{" << std::endl;
449  stream.inc_tab();
450  loop_body(stream, simd_width);
451  stream.dec_tab();
452  stream << "}" << std::endl;
453 
454  if (simd_width>1)
455  {
456  stream << "for(unsigned int " << i << " = " << boundround << "*" << strwidth << " + " << domain_id << "; " << i << " < " << bound << "; " << i << " += " + domain_size + ")" << std::endl;
457  stream << "{" << std::endl;
458  stream.inc_tab();
459  loop_body(stream, 1);
460  stream.dec_tab();
461  stream << "}" << std::endl;
462  }
463  }
464 
465  static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr)
466  {
467  if (simd_width==1)
468  return "(" + ptr + ")[" + offset + "] = " + value;
469  else
470  return utils::append_width("vstore", simd_width) + "(" + value + ", " + offset + ", " + ptr + ")";
471  }
472 
473  static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr)
474  {
475  if (simd_width==1)
476  return "(" + ptr + ")[" + offset + "]";
477  else
478  return utils::append_width("vload", simd_width) + "(" + offset + ", " + ptr + ")";
479  }
480 
481 private:
483  virtual std::vector<std::string> generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mapping) const = 0;
484 
485 public:
486  template_base(binding_policy_t binding_policy) : binding_policy_(binding_policy) {}
487 
488  virtual ~template_base(){ }
489 
490  std::vector<std::string> generate(std::string const & kernel_prefix, statements_container const & statements, viennacl::ocl::device const & device)
491  {
492  statements_container::data_type::const_iterator sit;
493  std::vector<mapping_type>::iterator mit;
494 
495  if(int err = check_invalid(statements, device))
496  throw generator_not_supported_exception("The supplied parameters for this template are invalid : err " + tools::to_string(err));
497 
498  //Create mapping
499  std::vector<mapping_type> mappings(statements.data().size());
500  tools::shared_ptr<symbolic_binder> binder = make_binder(binding_policy_);
501  for (mit = mappings.begin(), sit = statements.data().begin(); sit != statements.data().end(); ++sit, ++mit)
502  tree_parsing::traverse(*sit, sit->root(), map_functor(*binder,*mit), true);
503 
504  return generate_impl(kernel_prefix, statements, mappings);
505  }
506 
508  virtual int check_invalid(statements_container const & statements, viennacl::ocl::device const & device) const = 0;
509 
510  virtual void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements) = 0;
511 
512  virtual tools::shared_ptr<template_base> clone() const = 0;
513 private:
514  binding_policy_t binding_policy_;
515 };
516 
517 
518 template<class TemplateType, class ParametersType>
520 {
521 private:
522  virtual int check_invalid_impl(viennacl::ocl::device const & /*dev*/) const { return TEMPLATE_VALID; }
523 
524  virtual unsigned int n_lmem_elements() const { return 0; }
525 
526 public:
527  typedef ParametersType parameters_type;
528 
530  template_base_impl(parameters_type const & parameters, binding_policy_t binding_policy) : template_base(binding_policy), p_(parameters){ }
531 
532  parameters_type const & parameters() const
533  {
534  return p_;
535  }
536 
538  {
539  return tools::shared_ptr<template_base>(new TemplateType(*dynamic_cast<TemplateType const *>(this)));
540  }
541 
543  int check_invalid(statements_container const & statements, viennacl::ocl::device const & device) const
544  {
545  using namespace viennacl::tools;
546 
547  scheduler::statement const & statement = statements.data().front();
548  unsigned int scalartype_size = utils::size_of(lhs_most(statement.array(), statement.root()).lhs.numeric_type);
549 
550  //Query device informations
551  vcl_size_t lmem_available = static_cast<vcl_size_t>(device.local_mem_size());
552  vcl_size_t lmem_usage = scalartype_size*n_lmem_elements();
553  if (lmem_usage>lmem_available)
554  return TEMPLATE_LOCAL_MEMORY_OVERFLOW;
555 
556  //Invalid work group size
557  vcl_size_t max_workgroup_size = device.max_work_group_size();
558  std::vector<vcl_size_t> max_work_item_sizes = device.max_work_item_sizes();
559  if (p_.local_size_0*p_.local_size_1 > max_workgroup_size)
560  return TEMPLATE_WORK_GROUP_SIZE_OVERFLOW;
561  if (p_.local_size_0 > max_work_item_sizes[0])
562  return TEMPLATE_LOCAL_SIZE_0_OVERFLOW;
563 
564  if (p_.local_size_1 > max_work_item_sizes[1])
565  return TEMPLATE_LOCAL_SIZE_1_OVERFLOW;
566 
567  //Advice from the Intel guide
568  unsigned int warp_size = 8;
569  if (device.type()==CL_DEVICE_TYPE_GPU)
570  {
571  //Advice from the nvidia guide
572  warp_size = 32;
573  //Advice from the AMD guide
574  if (device.vendor_id()==4098)
575  warp_size = 64;
576  }
577  if (((p_.local_size_0*p_.local_size_1)%warp_size)>0)
578  return TEMPLATE_LOCAL_SIZE_NOT_WARP_MULTIPLE;
579 
580  //Invalid SIMD Width
581  if (p_.simd_width!=1 && p_.simd_width!=2 &&
582  p_.simd_width!=4 && p_.simd_width!=8 &&
583  p_.simd_width!=16)
584  return TEMPLATE_INVALID_SIMD_WIDTH;
585 
586  return check_invalid_impl(device);
587  }
588 
589 protected:
591 };
592 
593 }
594 }
595 
596 #endif
tools::shared_ptr< template_base > clone() const
virtual int check_invalid(statements_container const &statements, viennacl::ocl::device const &device) const =0
returns whether or not the profile has undefined behavior on particular device
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
Exception for the case the generator is unable to deal with the operation.
Definition: forwards.h:163
void set_arguments(statements_container const &statements, viennacl::ocl::kernel &kernel, unsigned int &current_arg)
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
static std::string vstore(unsigned int simd_width, std::string const &value, std::string const &offset, std::string const &ptr)
int check_invalid(statements_container const &statements, viennacl::ocl::device const &device) const
returns whether or not the profile has undefined behavior on particular device
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
Some helper routines for reading/writing/printing scheduler expressions.
void traverse(scheduler::statement const &statement, vcl_size_t root_idx, Fun const &fun, bool inspect)
Recursively execute a functor on a statement.
static bool is_node_trans(scheduler::statement::container_type const &array, vcl_size_t root_idx, leaf_t leaf_type)
parameters_type const & parameters() const
static bool is_striding_operator(scheduler::statement_node const &node)
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
cl_ulong local_mem_size() const
Size of local memory arena in bytes. The minimum value is 32 KB.
Definition: device.hpp:360
cl_device_type type() const
The OpenCL device type.
Definition: device.hpp:893
A class representing the 'data' for the LHS or RHS operand of the respective node.
Definition: forwards.h:337
container_type const & array() const
Definition: forwards.h:528
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
Definition: device.hpp:917
statement_node_numeric_type
Encodes the type of a node in the statement tree.
Definition: forwards.h:286
std::list< scheduler::statement > const & data() const
Definition: forwards.h:282
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
static std::string append_simd_suffix(std::string const &str, unsigned int i)
float NumericT
Definition: bisect.cpp:40
virtual tools::shared_ptr< template_base > clone() const =0
scheduler::statement_node const & lhs_most(scheduler::statement::container_type const &array, vcl_size_t root)
Definition: forwards.h:87
bool is_scalar_reduction(scheduler::statement_node const &node)
Definition: forwards.h:75
std::vector< value_type > container_type
Definition: forwards.h:507
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
vcl_size_t ld(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:398
Map ViennaCL objects to generator wrappers.
static bool has_strided_access(statements_container const &statements)
static void fetching_loop_info(fetching_policy_type policy, std::string const &bound, utils::kernel_generation_stream &stream, std::string &init, std::string &upper_bound, std::string &inc, std::string const &domain_id, std::string const &domain_size)
Helper for compiling a program lazily.
statement_node_numeric_type numeric_type
Definition: forwards.h:341
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
Various utility implementations for dispatching with respect to the different devices available on th...
A shared pointer class similar to boost::shared_ptr. Reimplemented in order to avoid a Boost-dependen...
Definition: shared_ptr.hpp:83
std::size_t vcl_size_t
Definition: forwards.h:75
base functor class for traversing a statement
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
static vcl_size_t vector_size(scheduler::statement_node const &node, bool up_to_internal_size)
static void generate_prototype(utils::kernel_generation_stream &stream, std::string const &name, std::string const &first_arguments, std::vector< mapping_type > const &mappings, statements_container const &statements, std::map< std::string, unsigned int > const &widths)
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
static void generate_prototype(utils::kernel_generation_stream &stream, std::string const &name, std::string const &first_arguments, std::vector< mapping_type > const &mappings, statements_container const &statements)
std::map< mapping_key, tools::shared_ptr< mapped_object > > mapping_type
Definition: forwards.h:191
virtual void enqueue(std::string const &kernel_prefix, std::vector< lazy_program_compiler > &programs, statements_container const &statements)=0
Code for parsing the expression trees.
std::vector< std::string > generate(std::string const &kernel_prefix, statements_container const &statements, viennacl::ocl::device const &device)
Representation of an OpenCL kernel in ViennaCL.
Internal utils.
template_base_impl(parameters_type const &parameters, binding_policy_t binding_policy)
The constructor.
bool row_major(T const &)
Definition: row_major.hpp:38
operation_node_type type
Definition: forwards.h:474
static void element_wise_loop_1D(utils::kernel_generation_stream &stream, loop_body_base const &loop_body, fetching_policy_type fetch, unsigned int simd_width, std::string const &i, std::string const &bound, std::string const &domain_id, std::string const &domain_size)
static std::string vload(unsigned int simd_width, std::string const &offset, std::string const &ptr)
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:502
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
bool is_vector_reduction(scheduler::statement_node const &node)
Definition: forwards.h:80
ValueT const & at(std::map< KeyT, ValueT > const &map, KeyT const &key)
Emulation of C++11's .at() member for std::map<>, const-version.
Definition: forwards.h:142
template_base(binding_policy_t binding_policy)
std::string to_string(T const t)
Definition: tools.hpp:304
Main datastructure for an node in the statement tree.
Definition: forwards.h:478
size_t max_work_group_size() const
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
Definition: device.hpp:483
unsigned int size_of(scheduler::statement_node_numeric_type type)
Definition: utils.hpp:534
tools::shared_ptr< symbolic_binder > make_binder(binding_policy_t policy)
Definition: forwards.h:251
parameters_type(unsigned int _simd_width, unsigned int _local_size_1, unsigned int _local_size_2, unsigned int _num_kernels)
std::vector< size_t > max_work_item_sizes() const
Maximum number of work-items that can be specified in each dimension of the work-group.
Definition: device.hpp:510
std::string append_width(std::string const &str, unsigned int width)
Definition: utils.hpp:558