ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fft.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_FFT_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_FFT_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include "viennacl/tools/tools.hpp"
22 #include "viennacl/ocl/kernel.hpp"
24 #include "viennacl/ocl/utils.hpp"
25 
28 namespace viennacl
29 {
30 namespace linalg
31 {
32 namespace opencl
33 {
34 namespace kernels
35 {
36 
38 
39 
40 // Postprocessing phase of Bluestein algorithm
41 template<typename StringT>
42 void generate_fft_bluestein_post(StringT & source, std::string const & numeric_string)
43 {
44  source.append("__kernel void bluestein_post(__global "); source.append(numeric_string); source.append("2 *Z, \n");
45  source.append(" __global "); source.append(numeric_string); source.append("2 *out, \n");
46  source.append(" unsigned int size) \n");
47  source.append("{ \n");
48  source.append(" unsigned int glb_id = get_global_id(0); \n");
49  source.append(" unsigned int glb_sz = get_global_size(0); \n");
50 
51  source.append(" unsigned int double_size = size << 1; \n");
52  source.append(" "); source.append(numeric_string); source.append(" sn_a, cs_a; \n");
53  source.append(" const "); source.append(numeric_string); source.append(" NUM_PI = 3.14159265358979323846; \n");
54 
55  source.append(" for (unsigned int i = glb_id; i < size; i += glb_sz) { \n");
56  source.append(" unsigned int rm = i * i % (double_size); \n");
57  source.append(" "); source.append(numeric_string); source.append(" angle = ("); source.append(numeric_string); source.append(")rm / size * (-NUM_PI); \n");
58 
59  source.append(" sn_a = sincos(angle, &cs_a); \n");
60 
61  source.append(" "); source.append(numeric_string); source.append("2 b_i = ("); source.append(numeric_string); source.append("2)(cs_a, sn_a); \n");
62  source.append(" out[i] = ("); source.append(numeric_string); source.append("2)(Z[i].x * b_i.x - Z[i].y * b_i.y, Z[i].x * b_i.y + Z[i].y * b_i.x); \n");
63  source.append(" } \n");
64  source.append("} \n");
65 }
66 
67 // Preprocessing phase of Bluestein algorithm
68 template<typename StringT>
69 void generate_fft_bluestein_pre(StringT & source, std::string const & numeric_string)
70 {
71  source.append("__kernel void bluestein_pre(__global "); source.append(numeric_string); source.append("2 *input, \n");
72  source.append(" __global "); source.append(numeric_string); source.append("2 *A, \n");
73  source.append(" __global "); source.append(numeric_string); source.append("2 *B, \n");
74  source.append(" unsigned int size, \n");
75  source.append(" unsigned int ext_size \n");
76  source.append(" ) { \n");
77  source.append(" unsigned int glb_id = get_global_id(0); \n");
78  source.append(" unsigned int glb_sz = get_global_size(0); \n");
79 
80  source.append(" unsigned int double_size = size << 1; \n");
81 
82  source.append(" "); source.append(numeric_string); source.append(" sn_a, cs_a; \n");
83  source.append(" const "); source.append(numeric_string); source.append(" NUM_PI = 3.14159265358979323846; \n");
84 
85  source.append(" for (unsigned int i = glb_id; i < size; i += glb_sz) { \n");
86  source.append(" unsigned int rm = i * i % (double_size); \n");
87  source.append(" "); source.append(numeric_string); source.append(" angle = ("); source.append(numeric_string); source.append(")rm / size * NUM_PI; \n");
88 
89  source.append(" sn_a = sincos(-angle, &cs_a); \n");
90 
91  source.append(" "); source.append(numeric_string); source.append("2 a_i = ("); source.append(numeric_string); source.append("2)(cs_a, sn_a); \n");
92  source.append(" "); source.append(numeric_string); source.append("2 b_i = ("); source.append(numeric_string); source.append("2)(cs_a, -sn_a); \n");
93 
94  source.append(" A[i] = ("); source.append(numeric_string); source.append("2)(input[i].x * a_i.x - input[i].y * a_i.y, input[i].x * a_i.y + input[i].y * a_i.x); \n");
95  source.append(" B[i] = b_i; \n");
96 
97  // very bad instruction, to be fixed
98  source.append(" if (i) \n");
99  source.append(" B[ext_size - i] = b_i; \n");
100  source.append(" } \n");
101  source.append("} \n");
102 }
103 
105 template<typename StringT>
106 void generate_fft_complex_to_real(StringT & source, std::string const & numeric_string)
107 {
108  source.append("__kernel void complex_to_real(__global "); source.append(numeric_string); source.append("2 *in, \n");
109  source.append(" __global "); source.append(numeric_string); source.append(" *out, \n");
110  source.append(" unsigned int size) { \n");
111  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
112  source.append(" out[i] = in[i].x; \n");
113  source.append("} \n");
114 }
115 
117 template<typename StringT>
118 void generate_fft_div_vec_scalar(StringT & source, std::string const & numeric_string)
119 {
120  source.append("__kernel void fft_div_vec_scalar(__global "); source.append(numeric_string); source.append("2 *input1, \n");
121  source.append(" unsigned int size, \n");
122  source.append(" "); source.append(numeric_string); source.append(" factor) { \n");
123  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
124  source.append(" input1[i] /= factor; \n");
125  source.append("} \n");
126 }
127 
129 template<typename StringT>
130 void generate_fft_mult_vec(StringT & source, std::string const & numeric_string)
131 {
132  source.append("__kernel void fft_mult_vec(__global const "); source.append(numeric_string); source.append("2 *input1, \n");
133  source.append(" __global const "); source.append(numeric_string); source.append("2 *input2, \n");
134  source.append(" __global "); source.append(numeric_string); source.append("2 *output, \n");
135  source.append(" unsigned int size) { \n");
136  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
137  source.append(" "); source.append(numeric_string); source.append("2 in1 = input1[i]; \n");
138  source.append(" "); source.append(numeric_string); source.append("2 in2 = input2[i]; \n");
139 
140  source.append(" output[i] = ("); source.append(numeric_string); source.append("2)(in1.x * in2.x - in1.y * in2.y, in1.x * in2.y + in1.y * in2.x); \n");
141  source.append(" } \n");
142  source.append("} \n");
143 }
144 
146 template<typename StringT>
147 void generate_fft_real_to_complex(StringT & source, std::string const & numeric_string)
148 {
149  source.append("__kernel void real_to_complex(__global "); source.append(numeric_string); source.append(" *in, \n");
150  source.append(" __global "); source.append(numeric_string); source.append("2 *out, \n");
151  source.append(" unsigned int size) { \n");
152  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
153  source.append(" "); source.append(numeric_string); source.append("2 val = 0; \n");
154  source.append(" val.x = in[i]; \n");
155  source.append(" out[i] = val; \n");
156  source.append(" } \n");
157  source.append("} \n");
158 }
159 
161 template<typename StringT>
162 void generate_fft_reverse_inplace(StringT & source, std::string const & numeric_string)
163 {
164  source.append("__kernel void reverse_inplace(__global "); source.append(numeric_string); source.append(" *vec, uint size) { \n");
165  source.append(" for (uint i = get_global_id(0); i < (size >> 1); i+=get_global_size(0)) { \n");
166  source.append(" "); source.append(numeric_string); source.append(" val1 = vec[i]; \n");
167  source.append(" "); source.append(numeric_string); source.append(" val2 = vec[size - i - 1]; \n");
168 
169  source.append(" vec[i] = val2; \n");
170  source.append(" vec[size - i - 1] = val1; \n");
171  source.append(" } \n");
172  source.append("} \n");
173 }
174 
176 template<typename StringT>
177 void generate_fft_transpose(StringT & source, std::string const & numeric_string)
178 {
179  source.append("__kernel void transpose(__global "); source.append(numeric_string); source.append("2 *input, \n");
180  source.append(" __global "); source.append(numeric_string); source.append("2 *output, \n");
181  source.append(" unsigned int row_num, \n");
182  source.append(" unsigned int col_num) { \n");
183  source.append(" unsigned int size = row_num * col_num; \n");
184  source.append(" for (unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
185  source.append(" unsigned int row = i / col_num; \n");
186  source.append(" unsigned int col = i - row*col_num; \n");
187 
188  source.append(" unsigned int new_pos = col * row_num + row; \n");
189 
190  source.append(" output[new_pos] = input[i]; \n");
191  source.append(" } \n");
192  source.append("} \n");
193 }
194 
196 template<typename StringT>
197 void generate_fft_transpose_inplace(StringT & source, std::string const & numeric_string)
198 {
199  source.append("__kernel void transpose_inplace(__global "); source.append(numeric_string); source.append("2* input, \n");
200  source.append(" unsigned int row_num, \n");
201  source.append(" unsigned int col_num) { \n");
202  source.append(" unsigned int size = row_num * col_num; \n");
203  source.append(" for (unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
204  source.append(" unsigned int row = i / col_num; \n");
205  source.append(" unsigned int col = i - row*col_num; \n");
206 
207  source.append(" unsigned int new_pos = col * row_num + row; \n");
208 
209  source.append(" if (i < new_pos) { \n");
210  source.append(" "); source.append(numeric_string); source.append("2 val = input[i]; \n");
211  source.append(" input[i] = input[new_pos]; \n");
212  source.append(" input[new_pos] = val; \n");
213  source.append(" } \n");
214  source.append(" } \n");
215  source.append("} \n");
216 }
217 
219 template<typename StringT>
220 void generate_fft_vandermonde_prod(StringT & source, std::string const & numeric_string)
221 {
222  source.append("__kernel void vandermonde_prod(__global "); source.append(numeric_string); source.append(" *vander, \n");
223  source.append(" __global "); source.append(numeric_string); source.append(" *vector, \n");
224  source.append(" __global "); source.append(numeric_string); source.append(" *result, \n");
225  source.append(" uint size) { \n");
226  source.append(" for (uint i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
227  source.append(" "); source.append(numeric_string); source.append(" mul = vander[i]; \n");
228  source.append(" "); source.append(numeric_string); source.append(" pwr = 1; \n");
229  source.append(" "); source.append(numeric_string); source.append(" val = 0; \n");
230 
231  source.append(" for (uint j = 0; j < size; j++) { \n");
232  source.append(" val = val + pwr * vector[j]; \n");
233  source.append(" pwr *= mul; \n");
234  source.append(" } \n");
235 
236  source.append(" result[i] = val; \n");
237  source.append(" } \n");
238  source.append("} \n");
239 }
240 
242 template<typename StringT>
243 void generate_fft_zero2(StringT & source, std::string const & numeric_string)
244 {
245  source.append("__kernel void zero2(__global "); source.append(numeric_string); source.append("2 *input1, \n");
246  source.append(" __global "); source.append(numeric_string); source.append("2 *input2, \n");
247  source.append(" unsigned int size) { \n");
248  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
249  source.append(" input1[i] = 0; \n");
250  source.append(" input2[i] = 0; \n");
251  source.append(" } \n");
252  source.append("} \n");
253 }
254 
256 
257 // main kernel class
259 template<typename NumericT>
260 struct fft
261 {
262  static std::string program_name()
263  {
265  }
266 
267  static void init(viennacl::ocl::context & ctx)
268  {
269  static std::map<cl_context, bool> init_done;
270  if (!init_done[ctx.handle().get()])
271  {
273  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
274 
275  std::string source;
276  source.reserve(8192);
277 
278  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
279 
280  // unary operations
281  if (numeric_string == "float" || numeric_string == "double")
282  {
283  generate_fft_bluestein_post(source, numeric_string);
284  generate_fft_bluestein_pre(source, numeric_string);
285  generate_fft_complex_to_real(source, numeric_string);
286  generate_fft_div_vec_scalar(source, numeric_string);
287  generate_fft_mult_vec(source, numeric_string);
288  generate_fft_real_to_complex(source, numeric_string);
289  generate_fft_reverse_inplace(source, numeric_string);
290  generate_fft_transpose(source, numeric_string);
291  generate_fft_transpose_inplace(source, numeric_string);
292  generate_fft_vandermonde_prod(source, numeric_string);
293  generate_fft_zero2(source, numeric_string);
294  }
295 
296  std::string prog_name = program_name();
297  #ifdef VIENNACL_BUILD_INFO
298  std::cout << "Creating program " << prog_name << std::endl;
299  #endif
300  ctx.add_program(source, prog_name);
301  init_done[ctx.handle().get()] = true;
302  } //if
303  } //init
304 };
305 
306 } // namespace kernels
307 } // namespace opencl
308 } // namespace linalg
309 } // namespace viennacl
310 #endif
311 
Implements a OpenCL platform within ViennaCL.
Various little tools used here and there in ViennaCL.
void generate_fft_div_vec_scalar(StringT &source, std::string const &numeric_string)
OpenCL kernel generation code for dividing a complex number by a real number.
Definition: fft.hpp:118
Main kernel class for generating OpenCL kernels for the fast Fourier transform.
Definition: fft.hpp:260
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
static std::string program_name()
Definition: fft.hpp:262
void generate_fft_bluestein_post(StringT &source, std::string const &numeric_string)
Definition: fft.hpp:42
Provides OpenCL-related utilities.
void generate_fft_reverse_inplace(StringT &source, std::string const &numeric_string)
Reverses the entries in a vector.
Definition: fft.hpp:162
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:611
void generate_fft_complex_to_real(StringT &source, std::string const &numeric_string)
Extract real part of a complex number array.
Definition: fft.hpp:106
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
const OCL_TYPE & get() const
Definition: handle.hpp:191
void generate_fft_zero2(StringT &source, std::string const &numeric_string)
Zero two complex vectors (to avoid kernel launch overhead)
Definition: fft.hpp:243
void generate_fft_mult_vec(StringT &source, std::string const &numeric_string)
Elementwise product of two complex vectors.
Definition: fft.hpp:130
void generate_fft_transpose_inplace(StringT &source, std::string const &numeric_string)
Simplistic inplace matrix transpose function.
Definition: fft.hpp:197
void generate_fft_bluestein_pre(StringT &source, std::string const &numeric_string)
Definition: fft.hpp:69
Representation of an OpenCL kernel in ViennaCL.
void generate_fft_vandermonde_prod(StringT &source, std::string const &numeric_string)
Computes the matrix vector product with a Vandermonde matrix.
Definition: fft.hpp:220
void generate_fft_real_to_complex(StringT &source, std::string const &numeric_string)
Embedds a real-valued vector into a complex one.
Definition: fft.hpp:147
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
static void init(viennacl::ocl::context &ctx)
Definition: fft.hpp:267
void generate_fft_transpose(StringT &source, std::string const &numeric_string)
Simplistic matrix transpose function.
Definition: fft.hpp:177