1 #ifndef VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_
37 template<
typename NumericT>
43 for (
unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i +=gridDim.x * blockDim.x)
45 NumericT val = matrix1[i] * matrix2[i];
58 template<
typename NumericT>
87 bool stagnation_flag =
false;
97 el_wise_mul_div<<<128, 128>>>(viennacl::cuda_arg<NumericT>(H),
98 viennacl::cuda_arg<NumericT>(hn),
99 viennacl::cuda_arg<NumericT>(hd),
107 el_wise_mul_div<<<128, 128>>>(viennacl::cuda_arg<NumericT>(W),
108 viennacl::cuda_arg<NumericT>(wn),
109 viennacl::cuda_arg<NumericT>(wd),
121 diff_init = diff_val;
124 std::cout << diff_val / diff_init << std::endl;
127 if (diff_val / diff_init < conf.
tolerance())
137 stagnation_flag =
true;
140 stagnation_flag =
false;
143 last_diff = diff_val;
void nmf(viennacl::matrix_base< NumericT > const &V, viennacl::matrix_base< NumericT > &W, viennacl::matrix_base< NumericT > &H, viennacl::linalg::nmf_config const &conf)
The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung...
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
vcl_size_t check_after_steps() const
Number of steps after which the convergence of NMF should be checked (again)
Configuration class for the nonnegative-matrix-factorization algorithm. Specify tolerances, maximum iteration counts, etc., here.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
__global__ void el_wise_mul_div(NumericT *matrix1, NumericT const *matrix2, NumericT const *matrix3, unsigned int size)
Main CUDA kernel for nonnegative matrix factorization of a dense matrices.
vcl_size_t max_iterations() const
Returns the maximum number of iterations for the NMF algorithm.
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
double tolerance() const
Returns the relative tolerance for convergence.
size_type size2() const
Returns the number of columns.
size_type size1() const
Returns the number of rows.
double stagnation_tolerance() const
Relative tolerance for the stagnation check.
Common routines for CUDA execution.
scalar_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_norm_frobenius > norm_frobenius(const matrix_base< NumericT > &A)
#define VIENNACL_CUDA_LAST_ERROR_CHECK(message)
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
ScalarType diff(ScalarType &s1, viennacl::scalar< ScalarType > &s2)
bool print_relative_error() const
Returns the flag specifying whether the relative tolerance should be printed in each iteration...