ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
bisect_kernel_calls.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_BISECT_KERNEL_CALLS_HPP_
2 #define VIENNACL_LINALG_OPENCL_BISECT_KERNEL_CALLS_HPP_
3 
4 
5 /* =========================================================================
6  Copyright (c) 2010-2016, Institute for Microelectronics,
7  Institute for Analysis and Scientific Computing,
8  TU Wien.
9  Portions of this software are copyright by UChicago Argonne, LLC.
10 
11  -----------------
12  ViennaCL - The Vienna Computing Library
13  -----------------
14 
15  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
16 
17  (A list of authors and contributors can be found in the manual)
18 
19  License: MIT (X11), see file LICENSE in the base directory
20 ============================================================================= */
21 
22 
31 // includes, project
36 
37 namespace viennacl
38 {
39 namespace linalg
40 {
41 namespace opencl
42 {
43 const std::string BISECT_KERNEL_SMALL = "bisectKernelSmall";
44 const std::string BISECT_KERNEL_LARGE = "bisectKernelLarge";
45 const std::string BISECT_KERNEL_LARGE_ONE_INTERVALS = "bisectKernelLarge_OneIntervals";
46 const std::string BISECT_KERNEL_LARGE_MULT_INTERVALS = "bisectKernelLarge_MultIntervals";
47 
48 template<typename NumericT>
51  const unsigned int mat_size,
52  const NumericT lg, const NumericT ug,
53  const NumericT precision)
54  {
55  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context());
57 
61 
62  viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a),
63  viennacl::traits::opencl_handle(input.g_b),
64  static_cast<cl_uint>(mat_size),
65  viennacl::traits::opencl_handle(result.vcl_g_left),
66  viennacl::traits::opencl_handle(result.vcl_g_right),
67  viennacl::traits::opencl_handle(result.vcl_g_left_count),
68  viennacl::traits::opencl_handle(result.vcl_g_right_count),
69  static_cast<NumericT>(lg),
70  static_cast<NumericT>(ug),
71  static_cast<cl_uint>(0),
72  static_cast<cl_uint>(mat_size),
73  static_cast<NumericT>(precision)
74  ));
75 
76  }
77 
78 template<typename NumericT>
81  const unsigned int mat_size,
82  const NumericT lg, const NumericT ug,
83  const NumericT precision)
84  {
85  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context());
87 
89  kernel.global_work_size(0, mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2); // Use only 128 threads for 256 < n <= 512, this
90  kernel.local_work_size(0, mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2); // is reasoned
91 
92  viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a),
93  viennacl::traits::opencl_handle(input.g_b),
94  static_cast<cl_uint>(mat_size),
95  static_cast<NumericT>(lg),
96  static_cast<NumericT>(ug),
97  static_cast<cl_uint>(0),
98  static_cast<cl_uint>(mat_size),
99  static_cast<NumericT>(precision),
100  viennacl::traits::opencl_handle(result.g_num_one),
101  viennacl::traits::opencl_handle(result.g_num_blocks_mult),
102  viennacl::traits::opencl_handle(result.g_left_one),
103  viennacl::traits::opencl_handle(result.g_right_one),
104  viennacl::traits::opencl_handle(result.g_pos_one),
105  viennacl::traits::opencl_handle(result.g_left_mult),
106  viennacl::traits::opencl_handle(result.g_right_mult),
107  viennacl::traits::opencl_handle(result.g_left_count_mult),
108  viennacl::traits::opencl_handle(result.g_right_count_mult),
109  viennacl::traits::opencl_handle(result.g_blocks_mult),
110  viennacl::traits::opencl_handle(result.g_blocks_mult_sum)
111  ));
112 
113  }
114 
115 template<typename NumericT>
118  const unsigned int mat_size,
119  const NumericT precision)
120  {
121  unsigned int num_one_intervals = result.g_num_one;
122  unsigned int num_blocks = viennacl::linalg::detail::getNumBlocksLinear(num_one_intervals,
124 
125  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context());
127 
129  kernel.global_work_size(0, num_blocks * (mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2));
131 
132  viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a),
133  viennacl::traits::opencl_handle(input.g_b),
134  static_cast<cl_uint>(mat_size),
135  static_cast<cl_uint>(num_one_intervals),
136  viennacl::traits::opencl_handle(result.g_left_one),
137  viennacl::traits::opencl_handle(result.g_right_one),
138  viennacl::traits::opencl_handle(result.g_pos_one),
139  static_cast<NumericT>(precision)
140  ));
141  }
142 
143 
144 template<typename NumericT>
147  const unsigned int mat_size,
148  const NumericT precision)
149  {
150  unsigned int num_blocks_mult = result.g_num_blocks_mult;
151 
152  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context());
154 
156  kernel.global_work_size(0, num_blocks_mult * (mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2));
158 
159  viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a),
160  viennacl::traits::opencl_handle(input.g_b),
161  static_cast<cl_uint>(mat_size),
162  viennacl::traits::opencl_handle(result.g_blocks_mult),
163  viennacl::traits::opencl_handle(result.g_blocks_mult_sum),
164  viennacl::traits::opencl_handle(result.g_left_mult),
165  viennacl::traits::opencl_handle(result.g_right_mult),
166  viennacl::traits::opencl_handle(result.g_left_count_mult),
167  viennacl::traits::opencl_handle(result.g_right_count_mult),
168  viennacl::traits::opencl_handle(result.g_lambda_mult),
169  viennacl::traits::opencl_handle(result.g_pos_mult),
170  static_cast<NumericT>(precision)
171  ));
172  }
173 } // namespace opencl
174 } // namespace linalg
175 } // namespace viennacl
176 
177 #endif
static void init(viennacl::ocl::context &ctx)
Definition: bisect.hpp:2573
viennacl::vector< NumericT > g_b
device side representation of superdiagonal
Definition: structs.hpp:62
void bisectLargeMultIntervals(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
viennacl::vector< NumericT > g_left_mult
left interval limits of intervals containing multiple eigenvalues after the first iteration step ...
Definition: structs.hpp:146
void bisectSmall(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataSmall< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
#define VIENNACL_BISECT_MAX_THREADS_BLOCK
Definition: config.hpp:32
Helper structures to simplify variable handling.
viennacl::vector< NumericT > g_left_one
left interval limits of intervals containing one eigenvalue after the first iteration step ...
Definition: structs.hpp:137
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
viennacl::scalar< unsigned int > g_num_blocks_mult
number of (thread) blocks of intervals containing multiple eigenvalues after the first steo ...
Definition: structs.hpp:134
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
Utility functions.
viennacl::vector< unsigned int > g_blocks_mult
start addresses in g_left_mult etc. of blocks of intervals containing more than one eigenvalue after ...
Definition: structs.hpp:156
viennacl::vector< unsigned int > g_left_count_mult
number of eigenvalues less than the left limit of the eigenvalue intervals containing multiple eigenv...
Definition: structs.hpp:151
Global configuration parameters.
const std::string BISECT_KERNEL_SMALL
float NumericT
Definition: bisect.cpp:40
const std::string BISECT_KERNEL_LARGE_ONE_INTERVALS
viennacl::vector< unsigned int > g_right_count_mult
number of eigenvalues less than the right limit of the eigenvalue intervals containing multiple eigen...
Definition: structs.hpp:154
const std::string BISECT_KERNEL_LARGE
In this class the input matrix is stored.
Definition: structs.hpp:53
void bisectLargeOneIntervals(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
unsigned int getNumBlocksLinear(const unsigned int num_threads, const unsigned int num_threads_block)
Definition: util.hpp:96
viennacl::vector< NumericT > vcl_g_right
right interval limits at the end of the computation
Definition: structs.hpp:103
viennacl::vector< NumericT > g_lambda_mult
eigenvalues that have been generated in the second step from intervals that still contained multiple ...
Definition: structs.hpp:162
viennacl::vector< NumericT > vcl_g_left
left interval limits at the end of the computation
Definition: structs.hpp:101
viennacl::vector< unsigned int > vcl_g_left_count
number of eigenvalues smaller than the left interval limit
Definition: structs.hpp:105
viennacl::vector< unsigned int > g_pos_one
interval indices (position in sorted listed of eigenvalues) of intervals containing one eigenvalue af...
Definition: structs.hpp:143
OpenCL kernels for the bisection algorithm for eigenvalues.
#define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX
Definition: config.hpp:38
const std::string BISECT_KERNEL_LARGE_MULT_INTERVALS
viennacl::vector< NumericT > g_right_one
right interval limits of intervals containing one eigenvalue after the first iteration step ...
Definition: structs.hpp:140
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
viennacl::vector< unsigned int > g_pos_mult
eigenvalue index of intervals that have been generated in the second processing step ...
Definition: structs.hpp:165
viennacl::vector< unsigned int > vcl_g_right_count
number of eigenvalues bigger than the right interval limit
Definition: structs.hpp:107
In this class the data of the result for large matrices is stored.
Definition: structs.hpp:125
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
viennacl::vector< unsigned int > g_blocks_mult_sum
accumulated number of intervals in g_left_mult etc. of blocks of intervals containing more than one e...
Definition: structs.hpp:159
viennacl::scalar< unsigned int > g_num_one
number of intervals containing one eigenvalue after the first step
Definition: structs.hpp:131
In this class the data of the result for small matrices is stored.
Definition: structs.hpp:96
Main kernel class for the generation of the bisection kernels and utilities.
Definition: bisect.hpp:2566
void bisectLarge(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
viennacl::vector< NumericT > g_a
device side representation of diagonal
Definition: structs.hpp:60
viennacl::vector< NumericT > g_right_mult
right interval limits of intervals containing multiple eigenvalues after the first iteration step ...
Definition: structs.hpp:148