ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sliced_ell_matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_SLICED_ELL_MATRIX_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_SLICED_ELL_MATRIX_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include "viennacl/tools/tools.hpp"
22 #include "viennacl/ocl/kernel.hpp"
24 #include "viennacl/ocl/utils.hpp"
25 
27 
30 namespace viennacl
31 {
32 namespace linalg
33 {
34 namespace opencl
35 {
36 namespace kernels
37 {
38 
40 
41 template<typename StringT>
42 void generate_sliced_ell_vec_mul(StringT & source, std::string const & numeric_string, bool with_alpha_beta)
43 {
44  if (with_alpha_beta)
45  source.append("__kernel void vec_mul_alpha_beta( \n");
46  else
47  source.append("__kernel void vec_mul( \n");
48  source.append(" __global const unsigned int * columns_per_block, \n");
49  source.append(" __global const unsigned int * column_indices, \n");
50  source.append(" __global const unsigned int * block_start, \n");
51  source.append(" __global const "); source.append(numeric_string); source.append(" * elements, \n");
52  source.append(" __global const "); source.append(numeric_string); source.append(" * x, \n");
53  source.append(" uint4 layout_x, \n");
54  if (with_alpha_beta) { source.append(" "); source.append(numeric_string); source.append(" alpha, \n"); }
55  source.append(" __global "); source.append(numeric_string); source.append(" * result, \n");
56  source.append(" uint4 layout_result, \n");
57  if (with_alpha_beta) { source.append(" "); source.append(numeric_string); source.append(" beta, \n"); }
58  source.append(" unsigned int block_size) \n");
59  source.append("{ \n");
60  source.append(" uint blocks_per_workgroup = get_local_size(0) / block_size; \n");
61  source.append(" uint id_in_block = get_local_id(0) % block_size; \n");
62  source.append(" uint num_blocks = (layout_result.z - 1) / block_size + 1; \n");
63  source.append(" uint global_warp_count = blocks_per_workgroup * get_num_groups(0); \n");
64  source.append(" uint global_warp_id = blocks_per_workgroup * get_group_id(0) + get_local_id(0) / block_size; \n");
65 
66  source.append(" for (uint block_idx = global_warp_id; block_idx < num_blocks; block_idx += global_warp_count) { \n");
67  source.append(" "); source.append(numeric_string); source.append(" sum = 0; \n");
68 
69  source.append(" uint row = block_idx * block_size + id_in_block; \n");
70  source.append(" uint offset = block_start[block_idx]; \n");
71  source.append(" uint num_columns = columns_per_block[block_idx]; \n");
72  source.append(" for (uint item_id = 0; item_id < num_columns; item_id++) { \n");
73  source.append(" uint index = offset + item_id * block_size + id_in_block; \n");
74  source.append(" "); source.append(numeric_string); source.append(" val = elements[index]; \n");
75  source.append(" sum += (val != 0) ? (x[column_indices[index] * layout_x.y + layout_x.x] * val) : 0; \n");
76  source.append(" } \n");
77 
78  source.append(" if (row < layout_result.z) \n");
79  if (with_alpha_beta)
80  source.append(" result[row * layout_result.y + layout_result.x] = alpha * sum + ((beta != 0) ? beta * result[row * layout_result.y + layout_result.x] : 0); \n");
81  else
82  source.append(" result[row * layout_result.y + layout_result.x] = sum; \n");
83  source.append(" } \n");
84  source.append("} \n");
85 }
86 
87 
89 
90 // main kernel class
92 template<typename NumericT, typename IndexT>
94 
95 template<typename NumericT>
96 struct sliced_ell_matrix<NumericT, unsigned int>
97 {
98  static std::string program_name()
99  {
101  }
102 
103  static void init(viennacl::ocl::context & ctx)
104  {
105  static std::map<cl_context, bool> init_done;
106  if (!init_done[ctx.handle().get()])
107  {
109  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
110 
111  std::string source;
112  source.reserve(1024);
113 
114  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
115 
116  // fully parametrized kernels:
117  generate_sliced_ell_vec_mul(source, numeric_string, true);
118  generate_sliced_ell_vec_mul(source, numeric_string, false);
119 
120  std::string prog_name = program_name();
121  #ifdef VIENNACL_BUILD_INFO
122  std::cout << "Creating program " << prog_name << std::endl;
123  #endif
124  ctx.add_program(source, prog_name);
125  init_done[ctx.handle().get()] = true;
126  } //if
127  } //init
128 };
129 
130 } // namespace kernels
131 } // namespace opencl
132 } // namespace linalg
133 } // namespace viennacl
134 #endif
135 
Implements a OpenCL platform within ViennaCL.
Various little tools used here and there in ViennaCL.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
Provides OpenCL-related utilities.
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:611
Common implementations shared by OpenCL-based operations.
float NumericT
Definition: bisect.cpp:40
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
const OCL_TYPE & get() const
Definition: handle.hpp:191
Main kernel class for generating OpenCL kernels for ell_matrix.
void generate_sliced_ell_vec_mul(StringT &source, std::string const &numeric_string, bool with_alpha_beta)
Representation of an OpenCL kernel in ViennaCL.
Helper class for converting a type to its string representation.
Definition: utils.hpp:57