ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
matrix_solve.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_MATRIX_SOLVE_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_MATRIX_SOLVE_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include "viennacl/tools/tools.hpp"
22 #include "viennacl/ocl/kernel.hpp"
24 #include "viennacl/ocl/utils.hpp"
25 
27 
30 namespace viennacl
31 {
32 namespace linalg
33 {
34 namespace opencl
35 {
36 namespace kernels
37 {
38 
39 template<typename StringT>
40 void generate_matrix_solve_blas3(StringT & source, std::string const & numeric_string,
41  bool row_major_A, bool row_major_B,
42  bool upper_solve, bool unit_diagonal)
43 {
44  //start OpenCL code:
45  source.append("__kernel void ");
46  if (unit_diagonal)
47  source.append("unit_");
48  if (upper_solve)
49  source.append("upper_");
50  else
51  source.append("lower_");
52  source.append("solve");
53 
54  source.append("( \n");
55  source.append(" __global const "); source.append(numeric_string); source.append(" * A, \n");
56  source.append(" unsigned int A_start1, unsigned int A_start2, \n");
57  source.append(" unsigned int A_inc1, unsigned int A_inc2, \n");
58  source.append(" unsigned int A_size1, unsigned int A_size2, \n");
59  source.append(" unsigned int A_internal_size1, unsigned int A_internal_size2, \n");
60  source.append(" __global "); source.append(numeric_string); source.append(" * B, \n");
61  source.append(" unsigned int B_start1, unsigned int B_start2, \n");
62  source.append(" unsigned int B_inc1, unsigned int B_inc2, \n");
63  source.append(" unsigned int B_size1, unsigned int B_size2, \n");
64  source.append(" unsigned int B_internal_size1, unsigned int B_internal_size2) { \n");
65  source.append(" "); source.append(numeric_string); source.append(" temp; \n");
66  if (upper_solve)
67  {
68  //Note: A is square, thus A_rows == A_cols and no dispatch for transposedness needed
69  source.append(" for (unsigned int row_cnt = 0; row_cnt < A_size1; ++row_cnt) \n");
70  source.append(" { \n");
71  source.append(" unsigned int row = A_size1 - 1 - row_cnt; \n");
72  }
73  else //lower triangular solve
74  {
75  source.append(" for (unsigned int row = 0; row < A_size1; ++row) \n");
76  source.append(" { \n");
77  }
78 
79  if (!unit_diagonal)
80  {
81  source.append(" barrier(CLK_GLOBAL_MEM_FENCE); \n");
82  source.append(" if (get_local_id(0) == 0) \n");
83  //Note: A is square, thus A_internal_rows == A_internal_cols and no dispatch for transposedness needed
84  if (row_major_B)
85  source.append(" B[(row * B_inc1 + B_start1) * B_internal_size2 + (get_group_id(0) * B_inc2 + B_start2)] /= ");
86  else
87  source.append(" B[(row * B_inc1 + B_start1) + (get_group_id(0) * B_inc2 + B_start2) * B_internal_size1] /= ");
88 
89  if (row_major_A)
90  source.append("A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]; \n");
91  else
92  source.append("A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2)*A_internal_size1]; \n");
93  }
94 
95  source.append(" barrier(CLK_GLOBAL_MEM_FENCE); \n");
96 
97  if (row_major_B)
98  source.append(" temp = B[(row * B_inc1 + B_start1) * B_internal_size2 + (get_group_id(0) * B_inc2 + B_start2)]; \n");
99  else
100  source.append(" temp = B[(row * B_inc1 + B_start1) + (get_group_id(0) * B_inc2 + B_start2) * B_internal_size1]; \n");
101 
102  source.append(" //eliminate column of op(A) with index 'row' in parallel: \n");
103  if (upper_solve)
104  source.append(" for (unsigned int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n");
105  else
106  source.append(" for (unsigned int elim = row + get_local_id(0) + 1; elim < A_size1; elim += get_local_size(0)) \n");
107 
108  if (row_major_B)
109  source.append(" B[(elim * B_inc1 + B_start1) * B_internal_size2 + (get_group_id(0) * B_inc2 + B_start2)] -= temp * ");
110  else
111  source.append(" B[(elim * B_inc1 + B_start1) + (get_group_id(0) * B_inc2 + B_start2) * B_internal_size1] -= temp * ");
112 
113  if (row_major_A)
114  source.append("A[(elim * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]; \n");
115  else
116  source.append("A[(elim * A_inc1 + A_start1) + (row * A_inc2 + A_start2) * A_internal_size1]; \n");
117 
118  source.append(" } \n");
119  source.append("} \n");
120 }
121 
122 
123 // main kernel class
129 template<typename NumericT, typename LayoutT1, typename LayoutT2>
131 {
132  static std::string program_name()
133  {
134  return viennacl::ocl::type_to_string<NumericT>::apply() + "_matrix_solve_" + detail::type_to_string(LayoutT1()) + detail::type_to_string(LayoutT2());
135  }
136 
137  static void init(viennacl::ocl::context & ctx)
138  {
139  static std::map<cl_context, bool> init_done;
140  if (!init_done[ctx.handle().get()])
141  {
143  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
144  bool matrix_row_major = viennacl::is_row_major<LayoutT1>::value;
145  bool rhs_row_major = viennacl::is_row_major<LayoutT2>::value;
146 
147  std::string source;
148  source.reserve(8192);
149 
150  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
151 
152  // only generate for floating points (forces error for integers)
153  if (numeric_string == "float" || numeric_string == "double")
154  {
155  generate_matrix_solve_blas3(source, numeric_string, matrix_row_major, rhs_row_major,
156  false, false);
157  generate_matrix_solve_blas3(source, numeric_string, matrix_row_major, rhs_row_major,
158  false, true);
159  generate_matrix_solve_blas3(source, numeric_string, matrix_row_major, rhs_row_major,
160  true, false);
161  generate_matrix_solve_blas3(source, numeric_string, matrix_row_major, rhs_row_major,
162  true, true);
163  }
164 
165  std::string prog_name = program_name();
166  #ifdef VIENNACL_BUILD_INFO
167  std::cout << "Creating program " << prog_name << std::endl;
168  #endif
169  ctx.add_program(source, prog_name);
170  init_done[ctx.handle().get()] = true;
171  } //if
172  } //init
173 };
174 
175 } // namespace kernels
176 } // namespace opencl
177 } // namespace linalg
178 } // namespace viennacl
179 #endif
180 
Implements a OpenCL platform within ViennaCL.
Helper class for checking whether a matrix has a row-major layout.
Definition: forwards.h:484
Various little tools used here and there in ViennaCL.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
Provides OpenCL-related utilities.
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:611
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
const OCL_TYPE & get() const
Definition: handle.hpp:191
void generate_matrix_solve_blas3(StringT &source, std::string const &numeric_string, bool row_major_A, bool row_major_B, bool upper_solve, bool unit_diagonal)
Main kernel class for the generation of matrix solve kernels.
Representation of an OpenCL kernel in ViennaCL.
std::string type_to_string(viennacl::row_major)
Definition: matrix.hpp:917
static void init(viennacl::ocl::context &ctx)
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
Runtime generation of OpenCL kernels for matrix operations.