1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_AMG_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_AMG_HPP
23 template<
typename StringT>
27 source.append(
"__kernel void amg_influence_trivial( \n");
28 source.append(
" __global const unsigned int * A_row_indices, \n");
29 source.append(
" __global const unsigned int * A_col_indices, \n");
30 source.append(
" unsigned int A_size1, \n");
31 source.append(
" unsigned int A_nnz, \n");
32 source.append(
" __global unsigned int * influences_row, \n");
33 source.append(
" __global unsigned int * influences_id, \n");
34 source.append(
" __global unsigned int * influences_values) { \n");
36 source.append(
" for (unsigned int i = get_global_id(0); i < A_size1; i += get_global_size(0)) \n");
37 source.append(
" { \n");
38 source.append(
" unsigned int tmp = A_row_indices[i]; \n");
39 source.append(
" influences_row[i] = tmp; \n");
40 source.append(
" influences_values[i] = A_row_indices[i+1] - tmp; \n");
41 source.append(
" } \n");
43 source.append(
" for (unsigned int i = get_global_id(0); i < A_nnz; i += get_global_size(0)) \n");
44 source.append(
" influences_id[i] = A_col_indices[i]; \n");
46 source.append(
" if (get_global_id(0) == 0) \n");
47 source.append(
" influences_row[A_size1] = A_row_indices[A_size1]; \n");
48 source.append(
"} \n");
53 template<
typename StringT>
57 source.append(
"__kernel void amg_pmis2_init_workdata( \n");
58 source.append(
" __global unsigned int *work_state, \n");
59 source.append(
" __global unsigned int *work_random, \n");
60 source.append(
" __global unsigned int *work_index, \n");
61 source.append(
" __global unsigned int const *point_types, \n");
62 source.append(
" __global unsigned int const *random_weights, \n");
63 source.append(
" unsigned int size) { \n");
65 source.append(
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
66 source.append(
" switch (point_types[i]) { \n");
67 source.append(
" case 0: work_state[i] = 1; break; \n");
68 source.append(
" case 1: work_state[i] = 2; break; \n");
69 source.append(
" case 2: work_state[i] = 0; break; \n");
71 source.append(
" default: break; // do nothing \n");
72 source.append(
" } \n");
74 source.append(
" work_random[i] = random_weights[i]; \n");
75 source.append(
" work_index[i] = i; \n");
76 source.append(
" } \n");
77 source.append(
"} \n");
82 template<
typename StringT>
86 source.append(
"__kernel void amg_pmis2_max_neighborhood( \n");
87 source.append(
" __global unsigned int *work_state, \n");
88 source.append(
" __global unsigned int *work_random, \n");
89 source.append(
" __global unsigned int *work_index, \n");
90 source.append(
" __global unsigned int *work_state2, \n");
91 source.append(
" __global unsigned int *work_random2, \n");
92 source.append(
" __global unsigned int *work_index2, \n");
93 source.append(
" __global unsigned int const *influences_row, \n");
94 source.append(
" __global unsigned int const *influences_id, \n");
95 source.append(
" unsigned int size) { \n");
97 source.append(
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
100 source.append(
" unsigned int state = work_state[i]; \n");
101 source.append(
" unsigned int random = work_random[i]; \n");
102 source.append(
" unsigned int index = work_index[i]; \n");
105 source.append(
" unsigned int j_stop = influences_row[i + 1]; \n");
106 source.append(
" for (unsigned int j = influences_row[i]; j < j_stop; ++j) { \n");
107 source.append(
" unsigned int influenced_point_id = influences_id[j]; \n");
110 source.append(
" if (state < work_state[influenced_point_id]) { \n");
111 source.append(
" state = work_state[influenced_point_id]; \n");
112 source.append(
" random = work_random[influenced_point_id]; \n");
113 source.append(
" index = work_index[influenced_point_id]; \n");
114 source.append(
" } else if (state == work_state[influenced_point_id]) { \n");
115 source.append(
" if (random < work_random[influenced_point_id]) { \n");
116 source.append(
" state = work_state[influenced_point_id]; \n");
117 source.append(
" random = work_random[influenced_point_id]; \n");
118 source.append(
" index = work_index[influenced_point_id]; \n");
119 source.append(
" } else if (random == work_random[influenced_point_id]) { \n");
120 source.append(
" if (index < work_index[influenced_point_id]) { \n");
121 source.append(
" state = work_state[influenced_point_id]; \n");
122 source.append(
" random = work_random[influenced_point_id]; \n");
123 source.append(
" index = work_index[influenced_point_id]; \n");
124 source.append(
" } \n");
125 source.append(
" } \n");
126 source.append(
" } \n");
128 source.append(
" }\n");
131 source.append(
" work_state2[i] = state; \n");
132 source.append(
" work_random2[i] = random; \n");
133 source.append(
" work_index2[i] = index; \n");
134 source.append(
" } \n");
135 source.append(
"} \n");
140 template<
typename StringT>
144 source.append(
"__kernel void amg_pmis2_mark_mis_nodes( \n");
145 source.append(
" __global unsigned int const *work_state, \n");
146 source.append(
" __global unsigned int const *work_index, \n");
147 source.append(
" __global unsigned int *point_types, \n");
148 source.append(
" __global unsigned int *undecided_buffer, \n");
149 source.append(
" unsigned int size) { \n");
151 source.append(
" unsigned int num_undecided = 0; \n");
152 source.append(
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
153 source.append(
" unsigned int max_state = work_state[i]; \n");
154 source.append(
" unsigned int max_index = work_index[i]; \n");
156 source.append(
" if (point_types[i] == 0) { \n");
157 source.append(
" if (i == max_index) point_types[i] = 1; \n");
158 source.append(
" else if (max_state == 2) point_types[i] = 2; \n");
159 source.append(
" else num_undecided += 1; \n");
160 source.append(
" } \n");
161 source.append(
" } \n");
164 source.append(
" __local unsigned int shared_buffer[256]; \n");
165 source.append(
" shared_buffer[get_local_id(0)] = num_undecided; \n");
166 source.append(
" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2) { \n");
167 source.append(
" barrier(CLK_LOCAL_MEM_FENCE); \n");
168 source.append(
" if (get_local_id(0) < stride) shared_buffer[get_local_id(0)] += shared_buffer[get_local_id(0)+stride]; \n");
169 source.append(
" } \n");
171 source.append(
" if (get_local_id(0) == 0) \n");
172 source.append(
" undecided_buffer[get_group_id(0)] = shared_buffer[0]; \n");
174 source.append(
"} \n");
178 template<
typename StringT>
182 source.append(
"__kernel void amg_pmis2_reset_state( \n");
183 source.append(
" __global unsigned int *point_types, \n");
184 source.append(
" unsigned int size) { \n");
186 source.append(
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
187 source.append(
" if (point_types[i] != 1) point_types[i] = 0;\n");
188 source.append(
" } \n");
190 source.append(
"} \n");
199 template<
typename StringT>
203 source.append(
" __kernel void amg_agg_propagate_coarse_indices( \n");
204 source.append(
" __global unsigned int *point_types, \n");
205 source.append(
" __global unsigned int *coarse_ids, \n");
206 source.append(
" __global unsigned int const *influences_row, \n");
207 source.append(
" __global unsigned int const *influences_id, \n");
208 source.append(
" unsigned int size) { \n");
210 source.append(
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
211 source.append(
" { \n");
212 source.append(
" if (point_types[i] == 1) { \n");
213 source.append(
" unsigned int coarse_index = coarse_ids[i]; \n");
215 source.append(
" unsigned int j_stop = influences_row[i + 1]; \n");
216 source.append(
" for (unsigned int j = influences_row[i]; j < j_stop; ++j) { \n");
217 source.append(
" unsigned int influenced_point_id = influences_id[j]; \n");
218 source.append(
" coarse_ids[influenced_point_id] = coarse_index; \n");
219 source.append(
" if (influenced_point_id != i) point_types[influenced_point_id] = 2; \n");
220 source.append(
" } \n");
221 source.append(
" } \n");
222 source.append(
" } \n");
223 source.append(
"} \n");
229 template<
typename StringT>
233 source.append(
" __kernel void amg_agg_merge_undecided( \n");
234 source.append(
" __global unsigned int *point_types, \n");
235 source.append(
" __global unsigned int *coarse_ids, \n");
236 source.append(
" __global unsigned int const *influences_row, \n");
237 source.append(
" __global unsigned int const *influences_id, \n");
238 source.append(
" unsigned int size) { \n");
240 source.append(
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
241 source.append(
" { \n");
242 source.append(
" if (point_types[i] == 0) { \n");
244 source.append(
" unsigned int j_stop = influences_row[i + 1]; \n");
245 source.append(
" for (unsigned int j = influences_row[i]; j < j_stop; ++j) { \n");
246 source.append(
" unsigned int influenced_point_id = influences_id[j]; \n");
247 source.append(
" if (point_types[influenced_point_id] != 0) { \n");
248 source.append(
" coarse_ids[i] = coarse_ids[influenced_point_id]; \n");
249 source.append(
" break; \n");
250 source.append(
" } \n");
251 source.append(
" } \n");
253 source.append(
" } \n");
254 source.append(
" } \n");
255 source.append(
"} \n");
260 template<
typename StringT>
264 source.append(
" __kernel void amg_agg_merge_undecided_2( \n");
265 source.append(
" __global unsigned int *point_types, \n");
266 source.append(
" unsigned int size) { \n");
268 source.append(
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
269 source.append(
" if (point_types[i] == 0) point_types[i] = 2; \n");
271 source.append(
"} \n");
276 template<
typename StringT>
280 source.append(
" __kernel void amg_interpol_ag( \n");
281 source.append(
" __global unsigned int * P_row_indices, \n");
282 source.append(
" __global unsigned int * P_column_indices, \n");
283 source.append(
" __global "); source.append(numeric_string); source.append(
" * P_elements, \n");
284 source.append(
" __global const unsigned int * coarse_agg_ids, \n");
285 source.append(
" unsigned int size) { \n");
287 source.append(
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
288 source.append(
" { \n");
289 source.append(
" P_row_indices[i] = i; \n");
290 source.append(
" P_column_indices[i] = coarse_agg_ids[i]; \n");
291 source.append(
" P_elements[i] = 1; \n");
292 source.append(
" } \n");
293 source.append(
" if (get_global_id(0) == 0) P_row_indices[size] = size; \n");
294 source.append(
" } \n");
298 template<
typename StringT>
302 source.append(
"__kernel void amg_interpol_sa( \n");
303 source.append(
" __global unsigned int const *A_row_indices, \n");
304 source.append(
" __global unsigned int const *A_col_indices, \n");
305 source.append(
" __global "); source.append(numeric_string); source.append(
" const *A_elements, \n");
306 source.append(
" unsigned int A_size1, \n");
307 source.append(
" unsigned int A_nnz, \n");
308 source.append(
" __global unsigned int *Jacobi_row_indices, \n");
309 source.append(
" __global unsigned int *Jacobi_col_indices, \n");
310 source.append(
" __global "); source.append(numeric_string); source.append(
" *Jacobi_elements, \n");
311 source.append(
" "); source.append(numeric_string); source.append(
" omega) { \n");
313 source.append(
" for (unsigned int row = get_global_id(0); row < A_size1; row += get_global_size(0)) \n");
314 source.append(
" { \n");
315 source.append(
" unsigned int row_begin = A_row_indices[row]; \n");
316 source.append(
" unsigned int row_end = A_row_indices[row+1]; \n");
318 source.append(
" Jacobi_row_indices[row] = row_begin; \n");
321 source.append(
" "); source.append(numeric_string); source.append(
" diag = 0; \n");
322 source.append(
" for (unsigned int j = row_begin; j < row_end; ++j) { \n");
323 source.append(
" if (A_col_indices[j] == row) { \n");
324 source.append(
" diag = A_elements[j]; \n");
325 source.append(
" break; \n");
326 source.append(
" } \n");
327 source.append(
" } \n");
330 source.append(
" for (unsigned int j = row_begin; j < row_end; ++j) { \n");
331 source.append(
" unsigned int col_index = A_col_indices[j]; \n");
332 source.append(
" Jacobi_col_indices[j] = col_index; \n");
333 source.append(
" Jacobi_elements[j] = (col_index == row) ? (1 - omega) : (-omega * A_elements[j] / diag); \n");
334 source.append(
" } \n");
336 source.append(
" } \n");
337 source.append(
" if (get_global_id(0) == 0) Jacobi_row_indices[A_size1] = A_nnz; \n");
338 source.append(
"} \n");
345 template<
typename NumericT>
355 static std::map<cl_context, bool> init_done;
362 source.reserve(2048);
364 viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
379 #ifdef VIENNACL_BUILD_INFO
380 std::cout <<
"Creating program " << prog_name << std::endl;
382 ctx.add_program(source, prog_name);
383 init_done[ctx.handle().get()] =
true;
Main kernel class for generating OpenCL kernels for compressed_matrix.
void generate_amg_pmis2_reset_state(StringT &source)
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
void generate_amg_pmis2_init_workdata(StringT &source)
Provides OpenCL-related utilities.
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Common implementations shared by OpenCL-based operations.
static void apply(viennacl::ocl::context const &)
const OCL_TYPE & get() const
void generate_amg_pmis2_mark_mis_nodes(StringT &source)
void generate_amg_agg_merge_undecided_2(StringT &source)
static std::string program_name()
void generate_amg_influence_trivial(StringT &source)
static void init(viennacl::ocl::context &ctx)
Representation of an OpenCL kernel in ViennaCL.
void generate_amg_pmis2_max_neighborhood(StringT &source)
void generate_amg_agg_merge_undecided(StringT &source)
Helper class for converting a type to its string representation.
void generate_amg_interpol_ag(StringT &source, std::string const &numeric_string)
void generate_amg_interpol_sa(StringT &source, std::string const &numeric_string)
void generate_amg_agg_propagate_coarse_indices(StringT &source)