1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_MATRIX_ELEMENT_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_MATRIX_ELEMENT_HPP
25 template <
typename StringType>
27 std::string
const & funcname, std::string
const & op, std::string
const & op_name,
bool is_row_major)
29 source.append(
"__kernel void "); source.append(funcname); source.append(
"_"); source.append(op_name); source.append(
"(\n");
30 source.append(
" __global "); source.append(numeric_string); source.append(
" * A, \n");
31 source.append(
" unsigned int A_start1, unsigned int A_start2, \n");
32 source.append(
" unsigned int A_inc1, unsigned int A_inc2, \n");
33 source.append(
" unsigned int A_size1, unsigned int A_size2, \n");
34 source.append(
" unsigned int A_internal_size1, unsigned int A_internal_size2, \n");
36 source.append(
" __global const "); source.append(numeric_string); source.append(
" * B, \n");
37 source.append(
" unsigned int B_start1, unsigned int B_start2, \n");
38 source.append(
" unsigned int B_inc1, unsigned int B_inc2, \n");
39 source.append(
" unsigned int B_internal_size1, unsigned int B_internal_size2) { \n");
43 source.append(
" unsigned int row_gid = get_global_id(0) / get_local_size(0); \n");
44 source.append(
" unsigned int col_gid = get_global_id(0) % get_local_size(0); \n");
46 source.append(
" for (unsigned int row = row_gid; row < A_size1; row += get_num_groups(0)) \n");
47 source.append(
" for (unsigned int col = col_gid; col < A_size2; col += get_local_size(0)) \n");
48 source.append(
" A[(row * A_inc1 + A_start1) * A_internal_size2 + col * A_inc2 + A_start2] \n");
49 source.append(
" "); source.append(op); source.append(
" "); source.append(funcname); source.append(
"(B[(row * B_inc1 + B_start1) * B_internal_size2 + col * B_inc2 + B_start2]); \n");
53 source.append(
" unsigned int row_gid = get_global_id(0) % get_local_size(0); \n");
54 source.append(
" unsigned int col_gid = get_global_id(0) / get_local_size(0); \n");
56 source.append(
" for (unsigned int col = col_gid; col < A_size2; col += get_num_groups(0)) \n");
57 source.append(
" for (unsigned int row = row_gid; row < A_size1; row += get_local_size(0)) \n");
58 source.append(
" A[(row * A_inc1 + A_start1) + (col * A_inc2 + A_start2) * A_internal_size1] \n");
59 source.append(
" "); source.append(op); source.append(
" "); source.append(funcname); source.append(
"(B[(row * B_inc1 + B_start1) + (col * B_inc2 + B_start2) * B_internal_size1]); \n");
61 source.append(
"} \n");
64 template <
typename StringType>
76 template <
typename NumericT,
typename F>
89 static std::map<cl_context, bool> init_done;
96 viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
99 if (numeric_string ==
"float" || numeric_string ==
"double")
124 #ifdef VIENNACL_BUILD_INFO
125 std::cout <<
"Creating program " << prog_name << std::endl;
127 ctx.add_program(source, prog_name);
128 init_done[ctx.handle().get()] =
true;
static std::string program_name()
static void init(viennacl::ocl::context &ctx)
Helper class for checking whether a matrix has a row-major layout.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Provides OpenCL-related utilities.
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Main kernel class for generating OpenCL kernels for elementwise-operations such as element_sin() on/w...
static void apply(viennacl::ocl::context const &)
const OCL_TYPE & get() const
void generate_matrix_unary_element_ops(StringType &source, std::string const &numeric_string, std::string const &funcname, std::string const &op, std::string const &op_name, bool is_row_major)
Representation of an OpenCL kernel in ViennaCL.
std::string type_to_string(viennacl::row_major)
Helper class for converting a type to its string representation.
Runtime generation of OpenCL kernels for matrix operations.