ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
bisect_kernel_large_onei.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_CUDA_BISECT_KERNEL_LARGE_ONEI_HPP_
2 #define VIENNACL_LINALG_CUDA_BISECT_KERNEL_LARGE_ONEI_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 
30 // includes, project
33 // additional kernel
35 
36 namespace viennacl
37 {
38 namespace linalg
39 {
40 namespace cuda
41 {
56 template<typename NumericT>
57 __global__
58 void
59 bisectKernelLarge_OneIntervals(const NumericT *g_d, const NumericT *g_s, const unsigned int n,
60  unsigned int num_intervals,
61  NumericT *g_left, NumericT *g_right,
62  unsigned int *g_pos,
63  NumericT precision)
64 {
65 
66  const unsigned int gtid = (blockDim.x * blockIdx.x) + threadIdx.x;
67 
68  __shared__ NumericT s_left_scratch[VIENNACL_BISECT_MAX_THREADS_BLOCK];
69  __shared__ NumericT s_right_scratch[VIENNACL_BISECT_MAX_THREADS_BLOCK];
70 
71  // active interval of thread
72  // left and right limit of current interval
73  NumericT left, right;
74  // number of threads smaller than the right limit (also corresponds to the
75  // global index of the eigenvalues contained in the active interval)
76  unsigned int right_count;
77  // flag if current thread converged
78  unsigned int converged = 0;
79  // midpoint when current interval is subdivided
80  NumericT mid = 0.0f;
81  // number of eigenvalues less than mid
82  unsigned int mid_count = 0;
83 
84  // read data from global memory
85  if (gtid < num_intervals)
86  {
87  left = g_left[gtid];
88  right = g_right[gtid];
89  right_count = g_pos[gtid];
90  }
91 
92 
93  // flag to determine if all threads converged to eigenvalue
94  __shared__ unsigned int converged_all_threads;
95 
96  // initialized shared flag
97  if (0 == threadIdx.x)
98  {
99  converged_all_threads = 0;
100  }
101 
102  __syncthreads();
103 
104  // process until all threads converged to an eigenvalue
105  while (true)
106  {
107 
108  converged_all_threads = 1;
109 
110  // update midpoint for all active threads
111  if ((gtid < num_intervals) && (0 == converged))
112  {
113  mid = computeMidpoint(left, right);
114  }
115 
116  // find number of eigenvalues that are smaller than midpoint
117  mid_count = computeNumSmallerEigenvalsLarge(g_d, g_s, n,
118  mid, gtid, num_intervals,
119  s_left_scratch,
120  s_right_scratch,
121  converged);
122 
123  __syncthreads();
124 
125  // for all active threads
126  if ((gtid < num_intervals) && (0 == converged))
127  {
128 
129  // update intervals -- always one child interval survives
130  if (right_count == mid_count)
131  {
132  right = mid;
133  }
134  else
135  {
136  left = mid;
137  }
138 
139  // check for convergence
140  NumericT t0 = right - left;
141  NumericT t1 = max(abs(right), abs(left)) * precision;
142 
143  if (t0 < min(precision, t1))
144  {
145  NumericT lambda = computeMidpoint(left, right);
146  left = lambda;
147  right = lambda;
148 
149  converged = 1;
150  }
151  else
152  {
153  converged_all_threads = 0;
154  }
155  }
156 
157  __syncthreads();
158 
159  if (1 == converged_all_threads)
160  {
161  break;
162  }
163 
164  __syncthreads();
165  }
166 
167  // write data back to global memory
168  __syncthreads();
169 
170  if (gtid < num_intervals)
171  {
172  // intervals converged so left and right interval limit are both identical
173  // and identical to the eigenvalue
174  g_left[gtid] = left;
175  }
176 }
177 } // namespace cuda
178 } // namespace linalg
179 } // namespace viennacl
180 #endif // #ifndef VIENNACL_LINALG_CUDA_BISECT_KERNEL_LARGE_ONEI_HPP_
#define VIENNACL_BISECT_MAX_THREADS_BLOCK
Definition: config.hpp:32
Utility functions.
Global configuration parameters.
float NumericT
Definition: bisect.cpp:40
__device__ NumericT computeMidpoint(const NumericT left, const NumericT right)
Definition: bisect_util.hpp:89
NumericT max(std::vector< NumericT > const &v1)
Definition: maxmin.hpp:47
__global__ void bisectKernelLarge_OneIntervals(const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int num_intervals, NumericT *g_left, NumericT *g_right, unsigned int *g_pos, NumericT precision)
__device__ unsigned int computeNumSmallerEigenvalsLarge(const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
NumericT min(std::vector< NumericT > const &v1)
Definition: maxmin.hpp:91