Implementations of NMF operations using CUDA. More...
#include <cmath>
#include "viennacl/forwards.h"
#include "viennacl/scalar.hpp"
#include "viennacl/tools/tools.hpp"
#include "viennacl/meta/predicate.hpp"
#include "viennacl/meta/enable_if.hpp"
#include "viennacl/traits/size.hpp"
#include "viennacl/traits/start.hpp"
#include "viennacl/traits/stride.hpp"
#include "viennacl/linalg/cuda/common.hpp"
Go to the source code of this file.
Namespaces | |
viennacl | |
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them. | |
viennacl::linalg | |
Provides all linear algebra operations which are not covered by operator overloads. | |
viennacl::linalg::cuda | |
Holds all CUDA compute kernels used by ViennaCL. | |
viennacl::linalg::cuda::detail | |
Helper functions for the CUDA linear algebra backend. | |
Macros | |
#define | VIENNACL_MDOT_WORKGROUP_SIZE 128 |
#define | VIENNACL_MDOT_WORKGROUP_NUM 128 |
Functions | |
template<typename DestNumericT , typename SrcNumericT > | |
__global__ void | viennacl::linalg::cuda::convert_kernel (DestNumericT *dest, unsigned int start_dest, unsigned int inc_dest, unsigned int size_dest, SrcNumericT const *src, unsigned int start_src, unsigned int inc_src) |
template<typename DestNumericT , typename SrcNumericT > | |
void | viennacl::linalg::cuda::convert (vector_base< DestNumericT > &dest, vector_base< SrcNumericT > const &src) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT , typename ScalarType1 > | |
void | viennacl::linalg::cuda::av (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
void | viennacl::linalg::cuda::avbv (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
void | viennacl::linalg::cuda::avbv_v (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vector_assign_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, NumericT alpha) |
template<typename NumericT , typename ScalarT1 > | |
void | viennacl::linalg::cuda::vector_assign (vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false) |
Assign a constant value to a vector (-range/-slice) More... | |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vector_swap_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::vector_swap (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2) |
Swaps the contents of two vectors, data is copied. More... | |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::element_op_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::element_op_int_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
template<typename NumericT , typename OpT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &proxy) |
Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) More... | |
template<typename OpT > | |
void | viennacl::linalg::cuda::element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &proxy) |
template<typename OpT > | |
void | viennacl::linalg::cuda::element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_acos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_asin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_atan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_ceil_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_cos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_cosh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_exp_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_fabs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_abs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_floor_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_log_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_log10_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_sin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_sinh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_sqrt_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_tan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_element_tanh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &proxy) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::inner_prod_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT *group_buffer) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vector_sum_kernel_floats (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vector_sum_kernel_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vector_sum_kernel_unsigned_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT , typename ScalarT > | |
void | viennacl::linalg::cuda::inner_prod_impl (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::inner_prod_cpu (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More... | |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vector_multi_sum_kernel (NumericT const *vec1, NumericT *result, unsigned int start_result, unsigned int inc_result) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::inner_prod_impl (vector_base< NumericT > const &x, vector_tuple< NumericT > const &vec_tuple, vector_base< NumericT > &result) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::norm_kernel_floats (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::norm_kernel_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::norm_kernel_unsigned_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::norm_1_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the l^1-norm of a vector. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::norm_1_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the l^1-norm of a vector. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::norm_2_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the l^2-norm of a vector - implementation. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::norm_2_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the l^2-norm of a vector - implementation. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::norm_inf_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the supremum-norm of a vector. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::norm_inf_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the supremum-norm of a vector. More... | |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vector_maxmin_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::max_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the maximum of a vector, both reduction stages run on the GPU. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::max_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::min_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the maximum of a vector, both reduction stages run on the GPU. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::min_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::sum_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the maximum of a vector, both reduction stages run on the GPU. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::sum_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More... | |
template<typename NumericT > | |
__device__ NumericT | viennacl::linalg::cuda::cuda_abs (NumericT val) |
__device__ unsigned long | viennacl::linalg::cuda::cuda_abs (unsigned long val) |
__device__ unsigned int | viennacl::linalg::cuda::cuda_abs (unsigned int val) |
__device__ unsigned short | viennacl::linalg::cuda::cuda_abs (unsigned short val) |
__device__ unsigned char | viennacl::linalg::cuda::cuda_abs (unsigned char val) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::index_norm_inf_kernel (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result) |
template<typename NumericT > | |
vcl_size_t | viennacl::linalg::cuda::index_norm_inf (vector_base< NumericT > const &vec1) |
Computes the index of the first entry that is equal to the supremum-norm in modulus. More... | |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::plane_rotation_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT alpha, NumericT beta) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::plane_rotation (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta) |
Computes a plane rotation of two vectors. More... | |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::scan_kernel_1 (NumericT const *X, unsigned int startX, unsigned int incX, unsigned int sizeX, NumericT *Y, unsigned int startY, unsigned int incY, unsigned int scan_offset, NumericT *carries) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::scan_kernel_2 (NumericT *carries) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::scan_kernel_3 (NumericT *Y, unsigned int startY, unsigned int incY, unsigned int sizeY, NumericT const *carries) |
template<typename NumericT > | |
void | viennacl::linalg::cuda::detail::scan_impl (vector_base< NumericT > const &input, vector_base< NumericT > &output, bool is_inclusive) |
Worker routine for scan routines. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::inclusive_scan (vector_base< NumericT > const &input, vector_base< NumericT > &output) |
This function implements an inclusive scan using CUDA. More... | |
template<typename NumericT > | |
void | viennacl::linalg::cuda::exclusive_scan (vector_base< NumericT > const &input, vector_base< NumericT > &output) |
This function implements an exclusive scan using CUDA. More... | |
Implementations of NMF operations using CUDA.
Implementations of vector operations using a plain single-threaded execution on CPU.
Definition in file vector_operations.hpp.
#define VIENNACL_MDOT_WORKGROUP_NUM 128 |
Definition at line 1804 of file vector_operations.hpp.
#define VIENNACL_MDOT_WORKGROUP_SIZE 128 |
Definition at line 1803 of file vector_operations.hpp.