1 #ifndef VIENNACL_LINALG_CUDA_BISECT_KERNEL_SMALL_HPP_
2 #define VIENNACL_LINALG_CUDA_BISECT_KERNEL_SMALL_HPP_
58 template<
typename NumericT>
63 unsigned int *g_left_count,
unsigned int *g_right_count,
65 const unsigned int lg_eig_count,
const unsigned int ug_eig_count,
80 __shared__
unsigned int
86 __shared__
unsigned int compact_second_chunk;
87 __shared__
unsigned int all_threads_converged;
90 __shared__
unsigned int num_threads_active;
93 __shared__
unsigned int num_threads_compaction;
96 unsigned int *s_compaction_list_exc = s_compaction_list + 1;
103 unsigned int left_count = 0;
104 unsigned int right_count = 0;
108 unsigned int mid_count = 0;
110 unsigned int is_active_second = 0;
112 s_compaction_list[threadIdx.x] = 0;
113 s_left[threadIdx.x] = 0;
114 s_right[threadIdx.x] = 0;
115 s_left_count[threadIdx.x] = 0;
116 s_right_count[threadIdx.x] = 0;
121 if (0 == threadIdx.x)
125 s_left_count[0] = lg_eig_count;
126 s_right_count[0] = ug_eig_count;
128 compact_second_chunk = 0;
129 num_threads_active = 1;
131 num_threads_compaction = 1;
139 all_threads_converged = 1;
142 is_active_second = 0;
144 s_left, s_right, s_left_count, s_right_count,
146 left, right, left_count, right_count,
147 mid, all_threads_converged);
152 if (1 == all_threads_converged)
165 threadIdx.x, num_threads_active,
181 if (threadIdx.x < num_threads_active)
189 s_left, s_right, s_left_count, s_right_count,
191 left_count, mid_count, right_count,
192 epsilon, compact_second_chunk,
193 s_compaction_list_exc,
201 left_count, mid_count, right_count,
202 s_compaction_list_exc, compact_second_chunk,
214 if (compact_second_chunk > 0)
220 mid, right, mid_count, right_count,
221 s_compaction_list, num_threads_active,
227 if (0 == threadIdx.x)
231 num_threads_active += s_compaction_list[num_threads_active];
233 num_threads_compaction =
ceilPow2(num_threads_active);
235 compact_second_chunk = 0;
253 g_left[threadIdx.x] = s_left[threadIdx.x];
255 g_left_count[threadIdx.x] = s_left_count[threadIdx.x];
261 #endif // #ifndef _BISECT_KERNEL_SMALL_H_
__global__ void bisectKernelSmall(const NumericT *g_d, const NumericT *g_s, const unsigned int n, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon)
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix.
__device__ void createIndicesCompaction(T *s_compaction_list_exc, unsigned int num_threads_compaction)
__device__ unsigned int computeNumSmallerEigenvals(const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
__device__ void storeNonEmptyIntervals(unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT mid, NumericT right, const S left_count, const S mid_count, const S right_count, NumericT precision, unsigned int &compact_second_chunk, T *s_compaction_list_exc, unsigned int &is_active_second)
Store all non-empty intervals resulting from the subdivision of the interval currently processed by t...
__device__ int ceilPow2(int n)
Global configuration parameters.
__device__ void compactIntervals(NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT mid, NumericT right, unsigned int mid_count, unsigned int right_count, T *s_compaction_list, unsigned int num_threads_active, unsigned int is_active_second)
Perform stream compaction for second child intervals.
#define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX
__device__ void subdivideActiveIntervalMulti(const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged)
Subdivide interval if active and not already converged.
__device__ void storeIntervalConverged(NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT &left, NumericT &mid, NumericT &right, S &left_count, S &mid_count, S &right_count, T *s_compaction_list_exc, unsigned int &compact_second_chunk, const unsigned int num_threads_active, unsigned int &is_active_second)