ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
nmf_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2016, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
26 
28 
29 namespace viennacl
30 {
31 namespace linalg
32 {
33 namespace cuda
34 {
35 
37 template<typename NumericT>
38 __global__ void el_wise_mul_div(NumericT * matrix1,
39  NumericT const * matrix2,
40  NumericT const * matrix3,
41  unsigned int size)
42 {
43  for (unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i +=gridDim.x * blockDim.x)
44  {
45  NumericT val = matrix1[i] * matrix2[i];
46  NumericT divisor = matrix3[i];
47  matrix1[i] = (divisor > (NumericT) 0.00001) ? (val / divisor) : NumericT(0);
48  }
49 }
50 
58 template<typename NumericT>
62  viennacl::linalg::nmf_config const & conf)
63 {
64  vcl_size_t k = W.size2();
65  conf.iters_ = 0;
66 
69 
72 
76 
80 
82 
84 
85  NumericT last_diff = 0;
86  NumericT diff_init = 0;
87  bool stagnation_flag = false;
88 
89  for (vcl_size_t i = 0; i < conf.max_iterations(); i++)
90  {
91  conf.iters_ = i + 1;
92 
93  hn = viennacl::linalg::prod(trans(W), V);
94  htmp = viennacl::linalg::prod(trans(W), W);
95  hd = viennacl::linalg::prod(htmp, H);
96 
97  el_wise_mul_div<<<128, 128>>>(viennacl::cuda_arg<NumericT>(H),
98  viennacl::cuda_arg<NumericT>(hn),
99  viennacl::cuda_arg<NumericT>(hd),
100  static_cast<unsigned int>(H.internal_size1() * H.internal_size2()));
101  VIENNACL_CUDA_LAST_ERROR_CHECK("el_wise_mul_div");
102 
103  wn = viennacl::linalg::prod(V, trans(H));
104  wtmp = viennacl::linalg::prod(W, H);
105  wd = viennacl::linalg::prod(wtmp, trans(H));
106 
107  el_wise_mul_div<<<128, 128>>>(viennacl::cuda_arg<NumericT>(W),
108  viennacl::cuda_arg<NumericT>(wn),
109  viennacl::cuda_arg<NumericT>(wd),
110  static_cast<unsigned int>( W.internal_size1() * W.internal_size2()));
111  VIENNACL_CUDA_LAST_ERROR_CHECK("el_wise_mul_div");
112 
113  if (i % conf.check_after_steps() == 0) //check for convergence
114  {
115  appr = viennacl::linalg::prod(W, H);
116 
117  appr -= V;
119 
120  if (i == 0)
121  diff_init = diff_val;
122 
123  if (conf.print_relative_error())
124  std::cout << diff_val / diff_init << std::endl;
125 
126  // Approximation check
127  if (diff_val / diff_init < conf.tolerance())
128  break;
129 
130  // Stagnation check
131  if (std::fabs(diff_val - last_diff) / (diff_val * conf.check_after_steps()) < conf.stagnation_tolerance()) //avoid situations where convergence stagnates
132  {
133  if (stagnation_flag) // iteration stagnates (two iterates with no notable progress)
134  break;
135  else
136  // record stagnation in this iteration
137  stagnation_flag = true;
138  } else
139  // good progress in this iteration, so unset stagnation flag
140  stagnation_flag = false;
141 
142  // prepare for next iterate:
143  last_diff = diff_val;
144  }
145  }
146 }
147 
148 } //namespace cuda
149 } //namespace linalg
150 } //namespace viennacl
151 
152 #endif /* VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_ */
void nmf(viennacl::matrix_base< NumericT > const &V, viennacl::matrix_base< NumericT > &W, viennacl::matrix_base< NumericT > &H, viennacl::linalg::nmf_config const &conf)
The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung...
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
vcl_size_t check_after_steps() const
Number of steps after which the convergence of NMF should be checked (again)
Configuration class for the nonnegative-matrix-factorization algorithm. Specify tolerances, maximum iteration counts, etc., here.
float NumericT
Definition: bisect.cpp:40
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:239
__global__ void el_wise_mul_div(NumericT *matrix1, NumericT const *matrix2, NumericT const *matrix3, unsigned int size)
Main CUDA kernel for nonnegative matrix factorization of a dense matrices.
vcl_size_t max_iterations() const
Returns the maximum number of iterations for the NMF algorithm.
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: matrix_def.hpp:93
double tolerance() const
Returns the relative tolerance for convergence.
std::size_t vcl_size_t
Definition: forwards.h:75
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:226
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:224
double stagnation_tolerance() const
Relative tolerance for the stagnation check.
Common routines for CUDA execution.
bool row_major() const
Definition: matrix_def.hpp:248
scalar_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_norm_frobenius > norm_frobenius(const matrix_base< NumericT > &A)
#define VIENNACL_CUDA_LAST_ERROR_CHECK(message)
Definition: common.hpp:30
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238
ScalarType diff(ScalarType &s1, viennacl::scalar< ScalarType > &s2)
Definition: blas3_solve.cpp:50
bool print_relative_error() const
Returns the flag specifying whether the relative tolerance should be printed in each iteration...