ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
device.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_OCL_DEVICE_HPP_
2 #define VIENNACL_OCL_DEVICE_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #ifdef __APPLE__
26 #include <OpenCL/cl.h>
27 #else
28 #include <CL/cl.h>
29 #endif
30 
31 #include<stdio.h>
32 
33 #include <vector>
34 #include <string>
35 #include <sstream>
36 #include <assert.h>
38 #include "viennacl/ocl/handle.hpp"
39 #include "viennacl/ocl/error.hpp"
40 
41 namespace viennacl
42 {
43 namespace ocl
44 {
45 
49 class device
50 {
51 public:
52  explicit device() : device_(0) { flush_cache(); }
53 
54  explicit device(cl_device_id dev) : device_(dev)
55  {
56 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE)
57  std::cout << "ViennaCL: Creating device object (CTOR with cl_device_id)" << std::endl;
58 #endif
59  flush_cache();
60  }
61 
62  device(const device & other) : device_(0)
63  {
64 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE)
65  std::cout << "ViennaCL: Creating device object (Copy CTOR)" << std::endl;
66 #endif
67  if (device_ != other.device_)
68  {
69  device_ = other.device_;
70  flush_cache();
71  }
72  }
73 
75  cl_uint address_bits() const
76  {
77  if (!address_bits_valid_)
78  {
79  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), static_cast<void *>(&address_bits_), NULL);
80  VIENNACL_ERR_CHECK(err);
81  address_bits_valid_ = true;
82  }
83  return address_bits_;
84  }
85 
87  cl_bool available() const
88  {
89  if (!available_valid_)
90  {
91  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_AVAILABLE, sizeof(cl_bool), static_cast<void *>(&available_), NULL);
92  VIENNACL_ERR_CHECK(err);
93  available_valid_ = true;
94  }
95  return available_;
96  }
97 
99  cl_bool compiler_available() const
100  {
101  if (!compiler_available_valid_)
102  {
103  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_COMPILER_AVAILABLE , sizeof(cl_bool), static_cast<void *>(&compiler_available_), NULL);
104  VIENNACL_ERR_CHECK(err);
105  compiler_available_valid_ = true;
106  }
107  return compiler_available_;
108  }
109 
110 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
111 
124  cl_device_fp_config double_fp_config() const
125  {
126  if (double_support() && !double_fp_config_valid_)
127  {
128  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(cl_device_fp_config), static_cast<void *>(&double_fp_config_), NULL);
129  VIENNACL_ERR_CHECK(err);
130  double_fp_config_valid_ = true;
131  }
132  return double_fp_config_;
133  }
134 #endif
135 
137  cl_bool endian_little() const
138  {
139  if (!endian_little_valid_)
140  {
141  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ENDIAN_LITTLE, sizeof(cl_bool), static_cast<void *>(&endian_little_), NULL);
142  VIENNACL_ERR_CHECK(err);
143  endian_little_valid_ = true;
144  }
145  return endian_little_;
146  }
147 
149  cl_bool error_correction_support() const
150  {
151  if (!error_correction_support_valid_)
152  {
153  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ERROR_CORRECTION_SUPPORT , sizeof(cl_bool), static_cast<void *>(&error_correction_support_), NULL);
154  VIENNACL_ERR_CHECK(err);
155  error_correction_support_valid_ = true;
156  }
157  return error_correction_support_;
158  }
159 
167  cl_device_exec_capabilities execution_capabilities() const
168  {
169  if (!execution_capabilities_valid_)
170  {
171  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_EXECUTION_CAPABILITIES , sizeof(cl_device_exec_capabilities), static_cast<void *>(&execution_capabilities_), NULL);
172  VIENNACL_ERR_CHECK(err);
173  execution_capabilities_valid_ = true;
174  }
175  return execution_capabilities_;
176  }
177 
189  std::string extensions() const
190  {
191  if (!extensions_valid_)
192  {
193  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_EXTENSIONS, sizeof(char) * 2048, static_cast<void *>(&extensions_), NULL);
194  VIENNACL_ERR_CHECK(err);
195  extensions_valid_ = true;
196  }
197  return extensions_;
198  }
199 
201  cl_ulong global_mem_cache_size() const
202  {
203  if (!global_mem_cache_size_valid_)
204  {
205  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(cl_ulong), static_cast<void *>(&global_mem_cache_size_), NULL);
206  VIENNACL_ERR_CHECK(err);
207  global_mem_cache_size_valid_ = true;
208  }
209  return global_mem_cache_size_;
210  }
211 
213  cl_device_mem_cache_type global_mem_cache_type() const
214  {
215  if (!global_mem_cache_type_valid_)
216  {
217  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof(cl_device_mem_cache_type), static_cast<void *>(&global_mem_cache_type_), NULL);
218  VIENNACL_ERR_CHECK(err);
219  global_mem_cache_type_valid_ = true;
220  }
221  return global_mem_cache_type_;
222  }
223 
226  {
227  if (!global_mem_cacheline_size_valid_)
228  {
229  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cl_uint), static_cast<void *>(&global_mem_cacheline_size_), NULL);
230  VIENNACL_ERR_CHECK(err);
231  global_mem_cacheline_size_valid_ = true;
232  }
233  return global_mem_cacheline_size_;
234  }
235 
237  cl_ulong global_mem_size() const
238  {
239  if (!global_mem_size_valid_)
240  {
241  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), static_cast<void *>(&global_mem_size_), NULL);
242  VIENNACL_ERR_CHECK(err);
243  global_mem_size_valid_ = true;
244  }
245  return global_mem_size_;
246  }
247 
248 #ifdef CL_DEVICE_HALF_FP_CONFIG
249 
261  cl_device_fp_config half_fp_config() const
262  {
263  if (!half_fp_config_valid_)
264  {
265  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_HALF_FP_CONFIG, sizeof(cl_device_fp_config), static_cast<void *>(&half_fp_config_), NULL);
266  VIENNACL_ERR_CHECK(err);
267  half_fp_config_valid_ = true;
268  }
269  return half_fp_config_;
270  }
271 #endif
272 
274 #ifdef CL_DEVICE_HOST_UNIFIED_MEMORY
275  cl_bool host_unified_memory() const
276  {
277  if (!host_unified_memory_valid_)
278  {
279  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_bool), static_cast<void *>(&host_unified_memory_), NULL);
280  VIENNACL_ERR_CHECK(err);
281  host_unified_memory_valid_ = true;
282  }
283  return host_unified_memory_;
284  }
285 #endif
286 
288  cl_bool image_support() const
289  {
290  if (!image_support_valid_)
291  {
292  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), static_cast<void *>(&image_support_), NULL);
293  VIENNACL_ERR_CHECK(err);
294  image_support_valid_ = true;
295  }
296  return image_support_;
297  }
298 
300  size_t image2d_max_height() const
301  {
302  if (!image2d_max_height_valid_)
303  {
304  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), static_cast<void *>(&image2d_max_height_), NULL);
305  VIENNACL_ERR_CHECK(err);
306  image2d_max_height_valid_ = true;
307  }
308  return image2d_max_height_;
309  }
310 
312  size_t image2d_max_width() const
313  {
314  if (!image2d_max_width_valid_)
315  {
316  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), static_cast<void *>(&image2d_max_width_), NULL);
317  VIENNACL_ERR_CHECK(err);
318  image2d_max_width_valid_ = true;
319  }
320  return image2d_max_width_;
321  }
322 
324  size_t image3d_max_depth() const
325  {
326  if (!image3d_max_depth_valid_)
327  {
328  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), static_cast<void *>(&image3d_max_depth_), NULL);
329  VIENNACL_ERR_CHECK(err);
330  image3d_max_depth_valid_ = true;
331  }
332  return image3d_max_depth_;
333  }
334 
336  size_t image3d_max_height() const
337  {
338  if (!image3d_max_height_valid_)
339  {
340  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), static_cast<void *>(&image3d_max_height_), NULL);
341  VIENNACL_ERR_CHECK(err);
342  image3d_max_height_valid_ = true;
343  }
344  return image3d_max_height_;
345  }
346 
348  size_t image3d_max_width() const
349  {
350  if (!image3d_max_width_valid_)
351  {
352  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), static_cast<void *>(&image3d_max_width_), NULL);
353  VIENNACL_ERR_CHECK(err);
354  image3d_max_width_valid_ = true;
355  }
356  return image3d_max_width_;
357  }
358 
360  cl_ulong local_mem_size() const
361  {
362  if (!local_mem_size_valid_)
363  {
364  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), static_cast<void *>(&local_mem_size_), NULL);
365  VIENNACL_ERR_CHECK(err);
366  local_mem_size_valid_ = true;
367  }
368  return local_mem_size_;
369  }
370 
372  cl_device_local_mem_type local_mem_type() const
373  {
374  if (!local_mem_type_valid_)
375  {
376  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(cl_device_local_mem_type), static_cast<void *>(&local_mem_type_), NULL);
377  VIENNACL_ERR_CHECK(err);
378  local_mem_type_valid_ = true;
379  }
380  return local_mem_type_;
381  }
382 
384  cl_uint max_clock_frequency() const
385  {
386  if (!max_clock_frequency_valid_)
387  {
388  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), static_cast<void *>(&max_clock_frequency_), NULL);
389  VIENNACL_ERR_CHECK(err);
390  max_clock_frequency_valid_ = true;
391  }
392  return max_clock_frequency_;
393  }
394 
396  cl_uint max_compute_units() const
397  {
398  if (!max_compute_units_valid_)
399  {
400  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), static_cast<void *>(&max_compute_units_), NULL);
401  VIENNACL_ERR_CHECK(err);
402  max_compute_units_valid_ = true;
403  }
404  return max_compute_units_;
405  }
406 
408  cl_uint max_constant_args() const
409  {
410  if (!max_constant_args_valid_)
411  {
412  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(cl_uint), static_cast<void *>(&max_constant_args_), NULL);
413  VIENNACL_ERR_CHECK(err);
414  max_constant_args_valid_ = true;
415  }
416  return max_constant_args_;
417  }
418 
420  cl_ulong max_constant_buffer_size() const
421  {
422  if (!max_constant_buffer_size_valid_)
423  {
424  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), static_cast<void *>(&max_constant_buffer_size_), NULL);
425  VIENNACL_ERR_CHECK(err);
426  max_constant_buffer_size_valid_ = true;
427  }
428  return max_constant_buffer_size_;
429  }
430 
432  cl_ulong max_mem_alloc_size() const
433  {
434  if (!max_mem_alloc_size_valid_)
435  {
436  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), static_cast<void *>(&max_mem_alloc_size_), NULL);
437  VIENNACL_ERR_CHECK(err);
438  max_mem_alloc_size_valid_ = true;
439  }
440  return max_mem_alloc_size_;
441  }
442 
447  size_t max_parameter_size() const
448  {
449  if (!max_parameter_size_valid_)
450  {
451  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t), static_cast<void *>(&max_parameter_size_), NULL);
452  VIENNACL_ERR_CHECK(err);
453  max_parameter_size_valid_ = true;
454  }
455  return max_parameter_size_;
456  }
457 
459  cl_uint max_read_image_args() const
460  {
461  if (!max_read_image_args_valid_)
462  {
463  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(cl_uint), static_cast<void *>(&max_read_image_args_), NULL);
464  VIENNACL_ERR_CHECK(err);
465  max_read_image_args_valid_ = true;
466  }
467  return max_read_image_args_;
468  }
469 
471  cl_uint max_samplers() const
472  {
473  if (!max_samplers_valid_)
474  {
475  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_SAMPLERS, sizeof(cl_uint), static_cast<void *>(&max_samplers_), NULL);
476  VIENNACL_ERR_CHECK(err);
477  max_samplers_valid_ = true;
478  }
479  return max_samplers_;
480  }
481 
483  size_t max_work_group_size() const
484  {
485  if (!max_work_group_size_valid_)
486  {
487  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), static_cast<void *>(&max_work_group_size_), NULL);
488  VIENNACL_ERR_CHECK(err);
489  max_work_group_size_valid_ = true;
490  }
491  return max_work_group_size_;
492  }
493 
495  cl_uint max_work_item_dimensions() const
496  {
497  if (!max_work_item_dimensions_valid_)
498  {
499  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), static_cast<void *>(&max_work_item_dimensions_), NULL);
500  VIENNACL_ERR_CHECK(err);
501  max_work_item_dimensions_valid_ = true;
502  }
503  return max_work_item_dimensions_;
504  }
505 
510  std::vector<size_t> max_work_item_sizes() const
511  {
512  std::vector<size_t> result(max_work_item_dimensions());
513 
514  assert(result.size() < 16 && bool("Supported work item dimensions exceed available capacity!"));
515 
516  if (!max_work_item_sizes_valid_)
517  {
518  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 16, static_cast<void *>(&max_work_item_sizes_), NULL);
519  VIENNACL_ERR_CHECK(err);
520  max_work_item_sizes_valid_ = true;
521  }
522 
523  for (vcl_size_t i=0; i<result.size(); ++i)
524  result[i] = max_work_item_sizes_[i];
525 
526  return result;
527  }
528 
530  cl_uint max_write_image_args() const
531  {
532  if (!max_write_image_args_valid_)
533  {
534  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(cl_uint), static_cast<void *>(&max_write_image_args_), NULL);
535  VIENNACL_ERR_CHECK(err);
536  max_write_image_args_valid_ = true;
537  }
538  return max_write_image_args_;
539  }
540 
542  cl_uint mem_base_addr_align() const
543  {
544  if (!mem_base_addr_align_valid_)
545  {
546  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), static_cast<void *>(&mem_base_addr_align_), NULL);
547  VIENNACL_ERR_CHECK(err);
548  mem_base_addr_align_valid_ = true;
549  }
550  return mem_base_addr_align_;
551  }
552 
554  cl_uint min_data_type_align_size() const
555  {
556  if (!min_data_type_align_size_valid_)
557  {
558  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, sizeof(cl_uint), static_cast<void *>(&min_data_type_align_size_), NULL);
559  VIENNACL_ERR_CHECK(err);
560  min_data_type_align_size_valid_ = true;
561  }
562  return min_data_type_align_size_;
563  }
564 
566  std::string name() const
567  {
568  if (!name_valid_)
569  {
570  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NAME, sizeof(char) * 256, static_cast<void *>(name_), NULL);
571  VIENNACL_ERR_CHECK(err);
572  name_valid_ = true;
573  }
574  return name_;
575  }
576 
579  {
580  if ( !architecture_family_valid_)
581  {
582  architecture_family_ = get_architecture_family(vendor_id(), name());
583  architecture_family_valid_ = true;
584  }
585  return architecture_family_;
586  }
587 
588 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR
589 
590  cl_uint native_vector_width_char() const
591  {
592  if (!native_vector_width_char_valid_)
593  {
594  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, sizeof(cl_uint), static_cast<void *>(&native_vector_width_char_), NULL);
595  VIENNACL_ERR_CHECK(err);
596  native_vector_width_char_valid_ = true;
597  }
598  return native_vector_width_char_;
599  }
600 #endif
601 
602 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT
603 
604  cl_uint native_vector_width_short() const
605  {
606  if (!native_vector_width_short_valid_)
607  {
608  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, sizeof(cl_uint), static_cast<void *>(&native_vector_width_short_), NULL);
609  VIENNACL_ERR_CHECK(err);
610  native_vector_width_short_valid_ = true;
611  }
612  return native_vector_width_short_;
613  }
614 #endif
615 
616 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_INT
617 
618  cl_uint native_vector_width_int() const
619  {
620  if (!native_vector_width_int_valid_)
621  {
622  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof(cl_uint), static_cast<void *>(&native_vector_width_int_), NULL);
623  VIENNACL_ERR_CHECK(err);
624  native_vector_width_int_valid_ = true;
625  }
626  return native_vector_width_int_;
627  }
628 #endif
629 
630 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG
631 
632  cl_uint native_vector_width_long() const
633  {
634  if (!native_vector_width_long_valid_)
635  {
636  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof(cl_uint), static_cast<void *>(&native_vector_width_long_), NULL);
637  VIENNACL_ERR_CHECK(err);
638  native_vector_width_long_valid_ = true;
639  }
640  return native_vector_width_long_;
641  }
642 #endif
643 
644 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT
645 
646  cl_uint native_vector_width_float() const
647  {
648  if (!native_vector_width_float_valid_)
649  {
650  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), static_cast<void *>(&native_vector_width_float_), NULL);
651  VIENNACL_ERR_CHECK(err);
652  native_vector_width_float_valid_ = true;
653  }
654  return native_vector_width_float_;
655  }
656 #endif
657 
658 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE
659 
663  cl_uint native_vector_width_double() const
664  {
665  if (!native_vector_width_double_valid_)
666  {
667  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), static_cast<void *>(&native_vector_width_double_), NULL);
668  VIENNACL_ERR_CHECK(err);
669  native_vector_width_double_valid_ = true;
670  }
671  return native_vector_width_double_;
672  }
673 #endif
674 
675 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF
676 
680  cl_uint native_vector_width_half() const
681  {
682  if (!native_vector_width_half_valid_)
683  {
684  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, sizeof(cl_uint), static_cast<void *>(&native_vector_width_half_), NULL);
685  VIENNACL_ERR_CHECK(err);
686  native_vector_width_half_valid_ = true;
687  }
688  return native_vector_width_half_;
689  }
690 #endif
691 
692 #if CL_DEVICE_OPENCL_C_VERSION
693 
701  std::string opencl_c_version() const
702  {
703  if (!opencl_c_version_valid_)
704  {
705  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_OPENCL_C_VERSION, sizeof(char) * 128, static_cast<void *>(opencl_c_version_), NULL);
706  VIENNACL_ERR_CHECK(err);
707  opencl_c_version_valid_ = true;
708  }
709  return opencl_c_version_;
710  }
711 #endif
712 
714  cl_platform_id platform() const
715  {
716  if (!platform_valid_)
717  {
718  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), static_cast<void *>(&platform_), NULL);
719  VIENNACL_ERR_CHECK(err);
720  platform_valid_ = true;
721  }
722  return platform_;
723  }
724 
727  {
728  if (!preferred_vector_width_char_valid_)
729  {
730  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_char_), NULL);
731  VIENNACL_ERR_CHECK(err);
732  preferred_vector_width_char_valid_ = true;
733  }
734  return preferred_vector_width_char_;
735  }
736 
739  {
740  if (!preferred_vector_width_short_valid_)
741  {
742  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_short_), NULL);
743  VIENNACL_ERR_CHECK(err);
744  preferred_vector_width_short_valid_ = true;
745  }
746  return preferred_vector_width_short_;
747  }
748 
751  {
752  if (!preferred_vector_width_int_valid_)
753  {
754  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_int_), NULL);
755  VIENNACL_ERR_CHECK(err);
756  preferred_vector_width_int_valid_ = true;
757  }
758  return preferred_vector_width_int_;
759  }
760 
763  {
764  if (!preferred_vector_width_long_valid_)
765  {
766  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_long_), NULL);
767  VIENNACL_ERR_CHECK(err);
768  preferred_vector_width_long_valid_ = true;
769  }
770  return preferred_vector_width_long_;
771  }
772 
775  {
776  if (!preferred_vector_width_float_valid_)
777  {
778  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_float_), NULL);
779  VIENNACL_ERR_CHECK(err);
780  preferred_vector_width_float_valid_ = true;
781  }
782  return preferred_vector_width_float_;
783  }
784 
790  {
791  if (!preferred_vector_width_double_valid_)
792  {
793  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_double_), NULL);
794  VIENNACL_ERR_CHECK(err);
795  preferred_vector_width_double_valid_ = true;
796  }
797  return preferred_vector_width_double_;
798  }
799 
804 #ifdef CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF
805  cl_uint preferred_vector_width_half() const
806  {
807  if (!preferred_vector_width_half_valid_)
808  {
809  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_half_), NULL);
810  VIENNACL_ERR_CHECK(err);
811  preferred_vector_width_half_valid_ = true;
812  }
813  return preferred_vector_width_half_;
814  }
815 #endif
816 
823  std::string profile() const
824  {
825  if (!profile_valid_)
826  {
827  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PROFILE, sizeof(char) * 32, static_cast<void *>(profile_), NULL);
828  VIENNACL_ERR_CHECK(err);
829  profile_valid_ = true;
830  }
831  return profile_;
832  }
833 
836  {
837  if (!profiling_timer_resolution_valid_)
838  {
839  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(size_t), static_cast<void *>(&profiling_timer_resolution_), NULL);
840  VIENNACL_ERR_CHECK(err);
841  profiling_timer_resolution_valid_ = true;
842  }
843  return profiling_timer_resolution_;
844  }
845 
854  cl_command_queue_properties queue_properties() const
855  {
856  if (!queue_properties_valid_)
857  {
858  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), static_cast<void *>(&queue_properties_), NULL);
859  VIENNACL_ERR_CHECK(err);
860  queue_properties_valid_ = true;
861  }
862  return queue_properties_;
863  }
864 
878  cl_device_fp_config single_fp_config() const
879  {
880  if (!single_fp_config_valid_)
881  {
882  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), static_cast<void *>(&single_fp_config_), NULL);
883  VIENNACL_ERR_CHECK(err);
884  single_fp_config_valid_ = true;
885  }
886  return single_fp_config_;
887  }
888 
893  cl_device_type type() const
894  {
895  if (!type_valid_)
896  {
897  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_TYPE, sizeof(cl_device_type), static_cast<void *>(&type_), NULL);
898  VIENNACL_ERR_CHECK(err);
899  type_valid_ = true;
900  }
901  return type_;
902  }
903 
905  std::string vendor() const
906  {
907  if (!vendor_valid_)
908  {
909  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VENDOR, sizeof(char) * 256, static_cast<void *>(vendor_), NULL);
910  VIENNACL_ERR_CHECK(err);
911  vendor_valid_ = true;
912  }
913  return vendor_;
914  }
915 
917  cl_uint vendor_id() const
918  {
919  if (!vendor_id_valid_)
920  {
921  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VENDOR_ID, sizeof(cl_uint), static_cast<void *>(&vendor_id_), NULL);
922  VIENNACL_ERR_CHECK(err);
923  vendor_id_valid_ = true;
924  }
925  return vendor_id_;
926  }
927 
929  std::string version() const
930  {
931  if (!version_valid_)
932  {
933  cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VERSION, sizeof(char) * 256, static_cast<void *>(version_), NULL);
934  VIENNACL_ERR_CHECK(err);
935  version_valid_ = true;
936  }
937  return version_;
938  }
939 
941  std::string driver_version() const
942  {
943  if (!driver_version_valid_)
944  {
945  cl_int err = clGetDeviceInfo(device_, CL_DRIVER_VERSION, sizeof(char) * 256, static_cast<void *>(driver_version_), NULL);
946  VIENNACL_ERR_CHECK(err);
947  driver_version_valid_ = true;
948  }
949  return driver_version_;
950  }
951 
953 
954 
956  bool double_support() const
957  {
958  std::string ext = extensions();
959 
960  if (ext.find("cl_khr_fp64") != std::string::npos || ext.find("cl_amd_fp64") != std::string::npos)
961  return true;
962 
963  return false;
964  }
965 
967  std::string double_support_extension() const
968  {
969  std::string ext = extensions();
970 
971  if (ext.find("cl_amd_fp64") != std::string::npos) //AMD extension
972  return "cl_amd_fp64";
973 
974  if (ext.find("cl_khr_fp64") != std::string::npos) //Khronos-certified standard extension for double precision
975  return "cl_khr_fp64";
976 
977  return "";
978  }
979 
981  cl_device_id id() const
982  {
983  assert(device_ != 0 && bool("Device ID invalid!"));
984  return device_;
985  }
986 
995  std::string info(vcl_size_t indent = 0, char indent_char = ' ') const
996  {
997  std::string line_indent(indent, indent_char);
998  std::ostringstream oss;
999  oss << line_indent << "Name: " << name() << std::endl;
1000  oss << line_indent << "Vendor: " << vendor() << std::endl;
1001  oss << line_indent << "Type: " << device_type_to_string(type()) << std::endl;
1002  oss << line_indent << "Available: " << available() << std::endl;
1003  oss << line_indent << "Max Compute Units: " << max_compute_units() << std::endl;
1004  oss << line_indent << "Max Work Group Size: " << max_work_group_size() << std::endl;
1005  oss << line_indent << "Global Mem Size: " << global_mem_size() << std::endl;
1006  oss << line_indent << "Local Mem Size: " << local_mem_size() << std::endl;
1007  oss << line_indent << "Local Mem Type: " << local_mem_type() << std::endl;
1008 #ifdef CL_DEVICE_HOST_UNIFIED_MEMORY
1009  oss << line_indent << "Host Unified Memory: " << host_unified_memory() << std::endl;
1010 #endif
1011 
1012  return oss.str();
1013  }
1014 
1020  std::string full_info(vcl_size_t indent = 0, char indent_char = ' ') const
1021  {
1022  std::string line_indent(indent, indent_char);
1023  std::ostringstream oss;
1024  oss << line_indent << "Address Bits: " << address_bits() << std::endl;
1025  oss << line_indent << "Available: " << available() << std::endl;
1026  oss << line_indent << "Compiler Available: " << compiler_available() << std::endl;
1027 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
1028  oss << line_indent << "Double FP Config: " << fp_config_to_string(double_fp_config()) << std::endl;
1029 #endif
1030  oss << line_indent << "Endian Little: " << endian_little() << std::endl;
1031  oss << line_indent << "Error Correction Support: " << error_correction_support() << std::endl;
1032  oss << line_indent << "Execution Capabilities: " << exec_capabilities_to_string(execution_capabilities()) << std::endl;
1033  oss << line_indent << "Extensions: " << extensions() << std::endl;
1034  oss << line_indent << "Global Mem Cache Size: " << global_mem_cache_size() << " Bytes" << std::endl;
1035  oss << line_indent << "Global Mem Cache Type: " << mem_cache_type_to_string(global_mem_cache_type()) << std::endl;
1036  oss << line_indent << "Global Mem Cacheline Size: " << global_mem_cacheline_size() << " Bytes" << std::endl;
1037  oss << line_indent << "Global Mem Size: " << global_mem_size() << " Bytes" << std::endl;
1038 #ifdef CL_DEVICE_HALF_FP_CONFIG
1039  oss << line_indent << "Half PF Config: " << fp_config_to_string(half_fp_config()) << std::endl;
1040 #endif
1041 #ifdef CL_DEVICE_HOST_UNIFIED_MEMORY
1042  oss << line_indent << "Host Unified Memory: " << host_unified_memory() << std::endl;
1043 #endif
1044  oss << line_indent << "Image Support: " << image_support() << std::endl;
1045  oss << line_indent << "Image2D Max Height: " << image2d_max_height() << std::endl;
1046  oss << line_indent << "Image2D Max Width: " << image2d_max_width() << std::endl;
1047  oss << line_indent << "Image3D Max Depth: " << image3d_max_depth() << std::endl;
1048  oss << line_indent << "Image3D Max Height: " << image3d_max_height() << std::endl;
1049  oss << line_indent << "Image3D Max Width: " << image3d_max_width() << std::endl;
1050  oss << line_indent << "Local Mem Size: " << local_mem_size() << " Bytes" << std::endl;
1051  oss << line_indent << "Local Mem Type: " << local_mem_type_to_string(local_mem_type()) << std::endl;
1052  oss << line_indent << "Max Clock Frequency: " << max_clock_frequency() << " MHz" << std::endl;
1053  oss << line_indent << "Max Compute Units: " << max_compute_units() << std::endl;
1054  oss << line_indent << "Max Constant Args: " << max_constant_args() << std::endl;
1055  oss << line_indent << "Max Constant Buffer Size: " << max_constant_buffer_size() << " Bytes" << std::endl;
1056  oss << line_indent << "Max Mem Alloc Size: " << max_mem_alloc_size() << " Bytes" << std::endl;
1057  oss << line_indent << "Max Parameter Size: " << max_parameter_size() << " Bytes" << std::endl;
1058  oss << line_indent << "Max Read Image Args: " << max_read_image_args() << std::endl;
1059  oss << line_indent << "Max Samplers: " << max_samplers() << std::endl;
1060  oss << line_indent << "Max Work Group Size: " << max_work_group_size() << std::endl;
1061  oss << line_indent << "Max Work Item Dimensions: " << max_work_item_dimensions() << std::endl;
1062  oss << line_indent << "Max Work Item Sizes: " << convert_to_string(max_work_item_sizes()) << std::endl;
1063  oss << line_indent << "Max Write Image Args: " << max_write_image_args() << std::endl;
1064  oss << line_indent << "Mem Base Addr Align: " << mem_base_addr_align() << std::endl;
1065  oss << line_indent << "Min Data Type Align Size: " << min_data_type_align_size() << " Bytes" << std::endl;
1066  oss << line_indent << "Name: " << name() << std::endl;
1067 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR
1068  oss << line_indent << "Native Vector Width char: " << native_vector_width_char() << std::endl;
1069 #endif
1070 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT
1071  oss << line_indent << "Native Vector Width short: " << native_vector_width_short() << std::endl;
1072 #endif
1073 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_INT
1074  oss << line_indent << "Native Vector Width int: " << native_vector_width_int() << std::endl;
1075 #endif
1076 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG
1077  oss << line_indent << "Native Vector Width long: " << native_vector_width_long() << std::endl;
1078 #endif
1079 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT
1080  oss << line_indent << "Native Vector Width float: " << native_vector_width_float() << std::endl;
1081 #endif
1082 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE
1083  oss << line_indent << "Native Vector Width double: " << native_vector_width_double() << std::endl;
1084 #endif
1085 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF
1086  oss << line_indent << "Native Vector Width half: " << native_vector_width_half() << std::endl;
1087 #endif
1088 #ifdef CL_DEVICE_OPENCL_C_VERSION
1089  oss << line_indent << "OpenCL C Version: " << opencl_c_version() << std::endl;
1090 #endif
1091  oss << line_indent << "Platform: " << platform() << std::endl;
1092  oss << line_indent << "Preferred Vector Width char: " << preferred_vector_width_char() << std::endl;
1093  oss << line_indent << "Preferred Vector Width short: " << preferred_vector_width_short() << std::endl;
1094  oss << line_indent << "Preferred Vector Width int: " << preferred_vector_width_int() << std::endl;
1095  oss << line_indent << "Preferred Vector Width long: " << preferred_vector_width_long() << std::endl;
1096  oss << line_indent << "Preferred Vector Width float: " << preferred_vector_width_float() << std::endl;
1097  oss << line_indent << "Preferred Vector Width double: " << preferred_vector_width_double() << std::endl;
1098 #ifdef CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF
1099  oss << line_indent << "Preferred Vector Width half: " << preferred_vector_width_half() << std::endl;
1100 #endif
1101  oss << line_indent << "Profile: " << profile() << std::endl;
1102  oss << line_indent << "Profiling Timer Resolution: " << profiling_timer_resolution() << " ns" << std::endl;
1103  oss << line_indent << "Queue Properties: " << queue_properties_to_string(queue_properties()) << std::endl;
1104  oss << line_indent << "Single FP Config: " << fp_config_to_string(single_fp_config()) << std::endl;
1105  oss << line_indent << "Type: " << device_type_to_string(type()) << std::endl;
1106  oss << line_indent << "Vendor: " << vendor() << std::endl;
1107  oss << line_indent << "Vendor ID: " << vendor_id() << std::endl;
1108  oss << line_indent << "Version: " << version() << std::endl;
1109  oss << line_indent << "Driver Version: " << driver_version() << std::endl;
1110 
1111  return oss.str();
1112  }
1113 
1114  bool operator==(device const & other) const
1115  {
1116  return device_ == other.device_;
1117  }
1118 
1119  bool operator==(cl_device_id other) const
1120  {
1121  return device_ == other;
1122  }
1123 
1125  std::string fp_config_to_string(cl_device_fp_config conf) const
1126  {
1127  std::ostringstream oss;
1128  if (conf & CL_FP_DENORM)
1129  oss << "CL_FP_DENORM ";
1130  if (conf & CL_FP_INF_NAN)
1131  oss << "CL_FP_INF_NAN ";
1132  if (conf & CL_FP_ROUND_TO_NEAREST)
1133  oss << "CL_FP_ROUND_TO_NEAREST ";
1134  if (conf & CL_FP_ROUND_TO_ZERO)
1135  oss << "CL_FP_ROUND_TO_ZERO ";
1136  if (conf & CL_FP_ROUND_TO_INF)
1137  oss << "CL_FP_ROUND_TO_INF ";
1138  if (conf & CL_FP_FMA)
1139  oss << "CL_FP_FMA ";
1140 #ifdef CL_FP_SOFT_FLOAT
1141  if (conf & CL_FP_SOFT_FLOAT)
1142  oss << "CL_FP_SOFT_FLOAT ";
1143 #endif
1144 
1145  return oss.str();
1146  }
1147 
1148  std::string exec_capabilities_to_string(cl_device_exec_capabilities cap) const
1149  {
1150  std::ostringstream oss;
1151  if (cap & CL_EXEC_KERNEL)
1152  oss << "CL_EXEC_KERNEL ";
1153  if (cap & CL_EXEC_NATIVE_KERNEL)
1154  oss << "CL_EXEC_NATIVE_KERNEL ";
1155 
1156  return oss.str();
1157  }
1158 
1159  std::string mem_cache_type_to_string(cl_device_mem_cache_type cachetype) const
1160  {
1161  std::ostringstream oss;
1162  if (cachetype == CL_NONE)
1163  oss << "CL_NONE ";
1164  else if (cachetype == CL_READ_ONLY_CACHE)
1165  oss << "CL_READ_ONLY_CACHE ";
1166  else if (cachetype == CL_READ_WRITE_CACHE)
1167  oss << "CL_READ_WRITE_CACHE ";
1168 
1169  return oss.str();
1170  }
1171 
1172  std::string local_mem_type_to_string(cl_device_local_mem_type loc_mem_type) const
1173  {
1174  std::ostringstream oss;
1175  if (loc_mem_type & CL_LOCAL)
1176  oss << "CL_LOCAL ";
1177  if (loc_mem_type & CL_GLOBAL)
1178  oss << "CL_GLOBAL ";
1179 
1180  return oss.str();
1181  }
1182 
1183  std::string convert_to_string(std::vector<size_t> const & vec) const
1184  {
1185  std::ostringstream oss;
1186  for (vcl_size_t i=0; i<vec.size(); ++i)
1187  oss << vec[i] << " ";
1188 
1189  return oss.str();
1190  }
1191 
1192  std::string queue_properties_to_string(cl_command_queue_properties queue_prop) const
1193  {
1194  std::ostringstream oss;
1195  if (queue_prop & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
1196  oss << "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ";
1197  if (queue_prop & CL_QUEUE_PROFILING_ENABLE)
1198  oss << "CL_QUEUE_PROFILING_ENABLE ";
1199 
1200  return oss.str();
1201  }
1202 
1203  std::string device_type_to_string(cl_device_type dev_type) const
1204  {
1205  std::ostringstream oss;
1206  if (dev_type & CL_DEVICE_TYPE_GPU)
1207  oss << "GPU ";
1208  if (dev_type & CL_DEVICE_TYPE_CPU)
1209  oss << "CPU ";
1210  if (dev_type & CL_DEVICE_TYPE_ACCELERATOR)
1211  oss << "Accelerator ";
1212  if (dev_type & CL_DEVICE_TYPE_DEFAULT)
1213  oss << "(default)";
1214 
1215  return oss.str();
1216  }
1217 
1218 private:
1219 
1220  void flush_cache()
1221  {
1222  address_bits_valid_ = false;
1223  architecture_family_valid_ = false;
1224  available_valid_ = false;
1225  compiler_available_valid_ = false;
1226 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
1227  double_fp_config_valid_ = false;
1228 #endif
1229  endian_little_valid_ = false;
1230  error_correction_support_valid_ = false;
1231  execution_capabilities_valid_ = false;
1232  extensions_valid_ = false;
1233  global_mem_cache_size_valid_ = false;
1234  global_mem_cache_type_valid_ = false;
1235  global_mem_cacheline_size_valid_ = false;
1236  global_mem_size_valid_ = false;
1237 #ifdef CL_DEVICE_HALF_FP_CONFIG
1238  half_fp_config_valid_ = false;
1239 #endif
1240  host_unified_memory_valid_ = false;
1241  image_support_valid_ = false;
1242  image2d_max_height_valid_ = false;
1243  image2d_max_width_valid_ = false;
1244  image3d_max_depth_valid_ = false;
1245  image3d_max_height_valid_ = false;
1246  image3d_max_width_valid_ = false;
1247  local_mem_size_valid_ = false;
1248  local_mem_type_valid_ = false;
1249  max_clock_frequency_valid_ = false;
1250  max_compute_units_valid_ = false;
1251  max_constant_args_valid_ = false;
1252  max_constant_buffer_size_valid_ = false;
1253  max_mem_alloc_size_valid_ = false;
1254  max_parameter_size_valid_ = false;
1255  max_read_image_args_valid_ = false;
1256  max_samplers_valid_ = false;
1257  max_work_group_size_valid_ = false;
1258  max_work_item_dimensions_valid_ = false;
1259  max_work_item_sizes_valid_ = false;
1260  max_write_image_args_valid_ = false;
1261  mem_base_addr_align_valid_ = false;
1262  min_data_type_align_size_valid_ = false;
1263  name_valid_ = false;
1264  native_vector_width_char_valid_ = false;
1265  native_vector_width_short_valid_ = false;
1266  native_vector_width_int_valid_ = false;
1267  native_vector_width_long_valid_ = false;
1268  native_vector_width_float_valid_ = false;
1269  native_vector_width_double_valid_ = false;
1270  native_vector_width_half_valid_ = false;
1271  opencl_c_version_valid_ = false;
1272  platform_valid_ = false;
1273  preferred_vector_width_char_valid_ = false;
1274  preferred_vector_width_short_valid_ = false;
1275  preferred_vector_width_int_valid_ = false;
1276  preferred_vector_width_long_valid_ = false;
1277  preferred_vector_width_float_valid_ = false;
1278  preferred_vector_width_double_valid_ = false;
1279  preferred_vector_width_half_valid_ = false;
1280  profile_valid_ = false;
1281  profiling_timer_resolution_valid_ = false;
1282  queue_properties_valid_ = false;
1283  single_fp_config_valid_ = false;
1284  type_valid_ = false;
1285  vendor_valid_ = false;
1286  vendor_id_valid_ = false;
1287  version_valid_ = false;
1288  driver_version_valid_ = false;
1289  }
1290 
1291  cl_device_id device_;
1292 
1293  //
1294  // Device information supported by OpenCL 1.0 to follow
1295  // cf. http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clGetDeviceInfo.html
1296  // Note that all members are declared 'mutable', as they represent a caching mechanism in order to circumvent repeated potentially expensive calls to the OpenCL SDK
1297  //
1298 
1299  mutable bool address_bits_valid_;
1300  mutable cl_uint address_bits_;
1301 
1302  mutable bool available_valid_;
1303  mutable cl_bool available_;
1304 
1305  mutable bool compiler_available_valid_;
1306  mutable cl_bool compiler_available_;
1307 
1308 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
1309  mutable bool double_fp_config_valid_;
1310  mutable cl_device_fp_config double_fp_config_;
1311 #endif
1312 
1313  mutable bool endian_little_valid_;
1314  mutable cl_bool endian_little_;
1315 
1316  mutable bool error_correction_support_valid_;
1317  mutable cl_bool error_correction_support_;
1318 
1319  mutable bool execution_capabilities_valid_;
1320  mutable cl_device_exec_capabilities execution_capabilities_;
1321 
1322  mutable bool extensions_valid_;
1323  mutable char extensions_[2048]; // don't forget to adjust member function accordingly when changing array size
1324 
1325  mutable bool global_mem_cache_size_valid_;
1326  mutable cl_ulong global_mem_cache_size_;
1327 
1328  mutable bool global_mem_cache_type_valid_;
1329  mutable cl_device_mem_cache_type global_mem_cache_type_;
1330 
1331  mutable bool global_mem_cacheline_size_valid_;
1332  mutable cl_uint global_mem_cacheline_size_;
1333 
1334  mutable bool global_mem_size_valid_;
1335  mutable cl_ulong global_mem_size_;
1336 
1337 #ifdef CL_DEVICE_HALF_FP_CONFIG
1338  mutable bool half_fp_config_valid_;
1339  mutable cl_device_fp_config half_fp_config_;
1340 #endif
1341 
1342  mutable bool host_unified_memory_valid_;
1343  mutable cl_bool host_unified_memory_;
1344 
1345  mutable bool image_support_valid_;
1346  mutable cl_bool image_support_;
1347 
1348  mutable bool image2d_max_height_valid_;
1349  mutable size_t image2d_max_height_;
1350 
1351  mutable bool image2d_max_width_valid_;
1352  mutable size_t image2d_max_width_;
1353 
1354  mutable bool image3d_max_depth_valid_;
1355  mutable size_t image3d_max_depth_;
1356 
1357  mutable bool image3d_max_height_valid_;
1358  mutable size_t image3d_max_height_;
1359 
1360  mutable bool image3d_max_width_valid_;
1361  mutable size_t image3d_max_width_;
1362 
1363  mutable bool local_mem_size_valid_;
1364  mutable cl_ulong local_mem_size_;
1365 
1366  mutable bool local_mem_type_valid_;
1367  mutable cl_device_local_mem_type local_mem_type_;
1368 
1369  mutable bool max_clock_frequency_valid_;
1370  mutable cl_uint max_clock_frequency_;
1371 
1372  mutable bool max_compute_units_valid_;
1373  mutable cl_uint max_compute_units_;
1374 
1375  mutable bool max_constant_args_valid_;
1376  mutable cl_uint max_constant_args_;
1377 
1378  mutable bool max_constant_buffer_size_valid_;
1379  mutable cl_ulong max_constant_buffer_size_;
1380 
1381  mutable bool max_mem_alloc_size_valid_;
1382  mutable cl_ulong max_mem_alloc_size_;
1383 
1384  mutable bool max_parameter_size_valid_;
1385  mutable size_t max_parameter_size_;
1386 
1387  mutable bool max_read_image_args_valid_;
1388  mutable cl_uint max_read_image_args_;
1389 
1390  mutable bool max_samplers_valid_;
1391  mutable cl_uint max_samplers_;
1392 
1393  mutable bool max_work_group_size_valid_;
1394  mutable size_t max_work_group_size_;
1395 
1396  mutable bool max_work_item_dimensions_valid_;
1397  mutable cl_uint max_work_item_dimensions_;
1398 
1399  mutable bool max_work_item_sizes_valid_;
1400  mutable size_t max_work_item_sizes_[16]; //we do not support execution models with more than 16 dimensions. This should totally suffice in practice, though.
1401 
1402  mutable bool max_write_image_args_valid_;
1403  mutable cl_uint max_write_image_args_;
1404 
1405  mutable bool mem_base_addr_align_valid_;
1406  mutable cl_uint mem_base_addr_align_;
1407 
1408  mutable bool min_data_type_align_size_valid_;
1409  mutable cl_uint min_data_type_align_size_;
1410 
1411  mutable bool name_valid_;
1412  mutable char name_[256]; // don't forget to adjust member function accordingly when changing array size
1413 
1414  mutable bool native_vector_width_char_valid_;
1415  mutable cl_uint native_vector_width_char_;
1416 
1417  mutable bool native_vector_width_short_valid_;
1418  mutable cl_uint native_vector_width_short_;
1419 
1420  mutable bool native_vector_width_int_valid_;
1421  mutable cl_uint native_vector_width_int_;
1422 
1423  mutable bool native_vector_width_long_valid_;
1424  mutable cl_uint native_vector_width_long_;
1425 
1426  mutable bool native_vector_width_float_valid_;
1427  mutable cl_uint native_vector_width_float_;
1428 
1429  mutable bool native_vector_width_double_valid_;
1430  mutable cl_uint native_vector_width_double_;
1431 
1432  mutable bool native_vector_width_half_valid_;
1433  mutable cl_uint native_vector_width_half_;
1434 
1435  mutable bool opencl_c_version_valid_;
1436  mutable char opencl_c_version_[128]; // don't forget to adjust member function accordingly when changing array size
1437 
1438  mutable bool platform_valid_;
1439  mutable cl_platform_id platform_;
1440 
1441  mutable bool preferred_vector_width_char_valid_;
1442  mutable cl_uint preferred_vector_width_char_;
1443 
1444  mutable bool preferred_vector_width_short_valid_;
1445  mutable cl_uint preferred_vector_width_short_;
1446 
1447  mutable bool preferred_vector_width_int_valid_;
1448  mutable cl_uint preferred_vector_width_int_;
1449 
1450  mutable bool preferred_vector_width_long_valid_;
1451  mutable cl_uint preferred_vector_width_long_;
1452 
1453  mutable bool preferred_vector_width_float_valid_;
1454  mutable cl_uint preferred_vector_width_float_;
1455 
1456  mutable bool preferred_vector_width_double_valid_;
1457  mutable cl_uint preferred_vector_width_double_;
1458 
1459  mutable bool preferred_vector_width_half_valid_;
1460  mutable cl_uint preferred_vector_width_half_;
1461 
1462  mutable bool profile_valid_;
1463  mutable char profile_[32]; // don't forget to adjust member function accordingly when changing array size
1464 
1465  mutable bool profiling_timer_resolution_valid_;
1466  mutable size_t profiling_timer_resolution_;
1467 
1468  mutable bool queue_properties_valid_;
1469  mutable cl_command_queue_properties queue_properties_;
1470 
1471  mutable bool single_fp_config_valid_;
1472  mutable cl_device_fp_config single_fp_config_;
1473 
1474  mutable bool type_valid_;
1475  mutable cl_device_type type_;
1476 
1477  mutable bool vendor_valid_;
1478  mutable char vendor_[256]; // don't forget to adjust member function accordingly when changing array size
1479 
1480  mutable bool vendor_id_valid_;
1481  mutable cl_uint vendor_id_;
1482 
1483  mutable bool version_valid_;
1484  mutable char version_[256]; // don't forget to adjust member function accordingly when changing array size
1485 
1486  mutable bool driver_version_valid_;
1487  mutable char driver_version_[256]; // don't forget to adjust member function accordingly when changing array size
1488 
1489  mutable bool architecture_family_valid_;
1490  mutable device_architecture_family architecture_family_;
1491 };
1492 
1493 } //namespace ocl
1494 } //namespace viennacl
1495 
1496 #endif
size_t image3d_max_depth() const
Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Definition: device.hpp:324
cl_device_id id() const
Returns the OpenCL device id.
Definition: device.hpp:981
cl_uint min_data_type_align_size() const
The smallest alignment in bytes which can be used for any data type.
Definition: device.hpp:554
cl_device_exec_capabilities execution_capabilities() const
Describes the execution capabilities of the device.
Definition: device.hpp:167
std::string driver_version() const
Vendor name string.
Definition: device.hpp:941
cl_bool image_support() const
Is CL_TRUE if the device and the host have a unified memory subsystem and is CL_FALSE otherwise...
Definition: device.hpp:288
cl_uint max_read_image_args() const
Max number of simultaneous image objects that can be read by a kernel. The minimum value is 128 if CL...
Definition: device.hpp:459
cl_device_mem_cache_type global_mem_cache_type() const
Type of global memory cache supported. Valid values are: CL_NONE, CL_READ_ONLY_CACHE, and CL_READ_WRITE_CACHE.
Definition: device.hpp:213
cl_uint preferred_vector_width_char() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:726
cl_bool compiler_available() const
Is CL_FALSE if the implementation does not have a compiler available to compile the program source...
Definition: device.hpp:99
cl_ulong max_constant_buffer_size() const
Max size in bytes of a constant buffer allocation. The minimum value is 64 KB.
Definition: device.hpp:420
bool operator==(device const &other) const
Definition: device.hpp:1114
cl_uint max_write_image_args() const
Max number of simultaneous image objects that can be written to by a kernel. The minimum value is 8 i...
Definition: device.hpp:530
cl_ulong max_mem_alloc_size() const
Max size of memory object allocation in bytes. The minimum value is max(1/4th of CL_DEVICE_GLOBAL_MEM...
Definition: device.hpp:432
cl_uint preferred_vector_width_int() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:750
cl_uint preferred_vector_width_short() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:738
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
cl_ulong local_mem_size() const
Size of local memory arena in bytes. The minimum value is 32 KB.
Definition: device.hpp:360
std::string queue_properties_to_string(cl_command_queue_properties queue_prop) const
Definition: device.hpp:1192
cl_device_type type() const
The OpenCL device type.
Definition: device.hpp:893
bool operator==(cl_device_id other) const
Definition: device.hpp:1119
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
Definition: device.hpp:917
cl_uint preferred_vector_width_float() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:774
cl_bool error_correction_support() const
Is CL_TRUE if the device implements error correction for all accesses to compute device memory (globa...
Definition: device.hpp:149
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Definition: device.hpp:995
#define VIENNACL_ERR_CHECK(err)
Definition: error.hpp:681
size_t image3d_max_width() const
Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Definition: device.hpp:348
std::string fp_config_to_string(cl_device_fp_config conf) const
Helper function converting a floating point configuration to a string.
Definition: device.hpp:1125
cl_device_local_mem_type local_mem_type() const
Type of local memory supported. This can be set to CL_LOCAL implying dedicated local memory storage s...
Definition: device.hpp:372
size_t image2d_max_height() const
Max height of 2D image in pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Definition: device.hpp:300
size_t image3d_max_height() const
Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Definition: device.hpp:336
cl_platform_id platform() const
The platform associated with this device.
Definition: device.hpp:714
device(cl_device_id dev)
Definition: device.hpp:54
Implementation of a smart-pointer-like class for handling OpenCL handles.
size_t image2d_max_width() const
Max width of 2D image in pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Definition: device.hpp:312
cl_uint global_mem_cacheline_size() const
Size of global memory cache in bytes.
Definition: device.hpp:225
Various utility implementations for dispatching with respect to the different devices available on th...
cl_ulong global_mem_cache_size() const
Size of global memory cache in bytes.
Definition: device.hpp:201
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
cl_uint max_constant_args() const
Max number of arguments declared with the __constant qualifier in a kernel. The minimum value is 8...
Definition: device.hpp:408
device(const device &other)
Definition: device.hpp:62
cl_device_fp_config single_fp_config() const
Describes single precision floating-point capability of the OpenCL device.
Definition: device.hpp:878
std::size_t vcl_size_t
Definition: forwards.h:75
device_architecture_family get_architecture_family(cl_uint vendor_id, std::string const &name)
std::string mem_cache_type_to_string(cl_device_mem_cache_type cachetype) const
Definition: device.hpp:1159
std::string name() const
Device name string.
Definition: device.hpp:566
device_architecture_family architecture_family() const
Device architecture family.
Definition: device.hpp:578
cl_ulong global_mem_size() const
Size of global memory in bytes.
Definition: device.hpp:237
std::string double_support_extension() const
ViennaCL convenience function: Returns the device extension which enables double precision (usually c...
Definition: device.hpp:967
Error handling for the OpenCL layer of ViennaCL.
cl_bool available() const
Is CL_TRUE if the device is available and CL_FALSE if the device is not available.
Definition: device.hpp:87
std::string extensions() const
Returns a space-separated list of extension names (the extension names themselves do not contain any ...
Definition: device.hpp:189
std::string version() const
Vendor name string.
Definition: device.hpp:929
cl_uint max_work_item_dimensions() const
Maximum dimensions that specify the global and local work-item IDs used by the data parallel executio...
Definition: device.hpp:495
std::string exec_capabilities_to_string(cl_device_exec_capabilities cap) const
Definition: device.hpp:1148
cl_uint preferred_vector_width_double() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:789
cl_bool endian_little() const
Is CL_TRUE if the OpenCL device is a little endian device and CL_FALSE otherwise. ...
Definition: device.hpp:137
cl_uint address_bits() const
The default compute device address space size specified as an unsigned integer value in bits...
Definition: device.hpp:75
std::string vendor() const
Vendor name string.
Definition: device.hpp:905
cl_uint max_clock_frequency() const
Maximum configured clock frequency of the device in MHz.
Definition: device.hpp:384
std::string device_type_to_string(cl_device_type dev_type) const
Definition: device.hpp:1203
std::string profile() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:823
std::string convert_to_string(std::vector< size_t > const &vec) const
Definition: device.hpp:1183
std::string local_mem_type_to_string(cl_device_local_mem_type loc_mem_type) const
Definition: device.hpp:1172
cl_uint max_samplers() const
Max number of simultaneous image objects that can be read by a kernel. The minimum value is 128 if CL...
Definition: device.hpp:471
cl_uint max_compute_units() const
The number of parallel compute cores on the OpenCL device. The minimum value is 1.
Definition: device.hpp:396
size_t profiling_timer_resolution() const
Describes the resolution of device timer. This is measured in nanoseconds.
Definition: device.hpp:835
size_t max_work_group_size() const
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
Definition: device.hpp:483
std::string full_info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with all device properties defined in the OpenCL 1.1 standard, listed in alphabetical order. Use info() for a short overview.
Definition: device.hpp:1020
cl_uint mem_base_addr_align() const
Describes the alignment in bits of the base address of any allocated memory object.
Definition: device.hpp:542
size_t max_parameter_size() const
Max size in bytes of the arguments that can be passed to a kernel. The minimum value is 1024...
Definition: device.hpp:447
cl_uint preferred_vector_width_long() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:762
std::vector< size_t > max_work_item_sizes() const
Maximum number of work-items that can be specified in each dimension of the work-group.
Definition: device.hpp:510
cl_command_queue_properties queue_properties() const
Describes the command-queue properties supported by the device.
Definition: device.hpp:854