1 #ifndef VIENNACL_LINALG_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_VECTOR_OPERATIONS_HPP_
38 #ifdef VIENNACL_WITH_OPENCL
42 #ifdef VIENNACL_WITH_CUDA
50 template<
typename DestNumericT,
typename SrcNumericT>
60 #ifdef VIENNACL_WITH_OPENCL
65 #ifdef VIENNACL_WITH_CUDA
77 template<
typename T,
typename ScalarType1>
79 vector_base<T> const & vec2, ScalarType1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha)
88 #ifdef VIENNACL_WITH_OPENCL
93 #ifdef VIENNACL_WITH_CUDA
106 template<
typename T,
typename ScalarType1,
typename ScalarType2>
108 vector_base<T> const & vec2, ScalarType1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha,
109 vector_base<T> const & vec3, ScalarType2
const & beta,
vcl_size_t len_beta,
bool reciprocal_beta,
bool flip_sign_beta)
118 vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
119 vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
121 #ifdef VIENNACL_WITH_OPENCL
124 vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
125 vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
128 #ifdef VIENNACL_WITH_CUDA
131 vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
132 vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
143 template<
typename T,
typename ScalarType1,
typename ScalarType2>
145 vector_base<T> const & vec2, ScalarType1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha,
146 vector_base<T> const & vec3, ScalarType2
const & beta,
vcl_size_t len_beta,
bool reciprocal_beta,
bool flip_sign_beta)
155 vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
156 vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
158 #ifdef VIENNACL_WITH_OPENCL
161 vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
162 vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
165 #ifdef VIENNACL_WITH_CUDA
168 vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
169 vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
194 #ifdef VIENNACL_WITH_OPENCL
199 #ifdef VIENNACL_WITH_CUDA
227 #ifdef VIENNACL_WITH_OPENCL
232 #ifdef VIENNACL_WITH_CUDA
254 template<
typename T,
typename OP>
265 #ifdef VIENNACL_WITH_OPENCL
270 #ifdef VIENNACL_WITH_CUDA
285 #define VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(OPNAME) \
286 template<typename T> \
287 viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_##OPNAME> > \
288 element_##OPNAME(vector_base<T> const & v1, vector_base<T> const & v2) \
290 return viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_##OPNAME> >(v1, v2); \
293 template<typename V1, typename V2, typename OP, typename T> \
294 viennacl::vector_expression<const vector_expression<const V1, const V2, OP>, const vector_base<T>, op_element_binary<op_##OPNAME> > \
295 element_##OPNAME(vector_expression<const V1, const V2, OP> const & proxy, vector_base<T> const & v2) \
297 return viennacl::vector_expression<const vector_expression<const V1, const V2, OP>, const vector_base<T>, op_element_binary<op_##OPNAME> >(proxy, v2); \
300 template<typename T, typename V2, typename V3, typename OP> \
301 viennacl::vector_expression<const vector_base<T>, const vector_expression<const V2, const V3, OP>, op_element_binary<op_##OPNAME> > \
302 element_##OPNAME(vector_base<T> const & v1, vector_expression<const V2, const V3, OP> const & proxy) \
304 return viennacl::vector_expression<const vector_base<T>, const vector_expression<const V2, const V3, OP>, op_element_binary<op_##OPNAME> >(v1, proxy); \
307 template<typename V1, typename V2, typename OP1, \
308 typename V3, typename V4, typename OP2> \
309 viennacl::vector_expression<const vector_expression<const V1, const V2, OP1>, \
310 const vector_expression<const V3, const V4, OP2>, \
311 op_element_binary<op_##OPNAME> > \
312 element_##OPNAME(vector_expression<const V1, const V2, OP1> const & proxy1, \
313 vector_expression<const V3, const V4, OP2> const & proxy2) \
315 return viennacl::vector_expression<const vector_expression<const V1, const V2, OP1>, \
316 const vector_expression<const V3, const V4, OP2>, \
317 op_element_binary<op_##OPNAME> >(proxy1, proxy2); \
320 VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(
prod)
321 VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(div)
322 VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(pow)
324 VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(eq)
325 VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(neq)
326 VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(greater)
327 VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(less)
328 VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(geq)
329 VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(leq)
331 #undef VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS
334 #define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname) \
335 template<typename T> \
336 viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_unary<op_##funcname> > \
337 element_##funcname(vector_base<T> const & v) \
339 return viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_unary<op_##funcname> >(v, v); \
341 template<typename LHS, typename RHS, typename OP> \
342 viennacl::vector_expression<const vector_expression<const LHS, const RHS, OP>, \
343 const vector_expression<const LHS, const RHS, OP>, \
344 op_element_unary<op_##funcname> > \
345 element_##funcname(vector_expression<const LHS, const RHS, OP> const & proxy) \
347 return viennacl::vector_expression<const vector_expression<const LHS, const RHS, OP>, \
348 const vector_expression<const LHS, const RHS, OP>, \
349 op_element_unary<op_##funcname> >(proxy, proxy); \
370 #undef VIENNACL_MAKE_UNARY_ELEMENT_OP
391 assert( vec1.
size() == vec2.
size() && bool(
"Size mismatch") );
398 #ifdef VIENNACL_WITH_OPENCL
403 #ifdef VIENNACL_WITH_CUDA
416 template<
typename LHS,
typename RHS,
typename OP,
typename T>
427 template<
typename T,
typename LHS,
typename RHS,
typename OP>
438 template<
typename LHS1,
typename RHS1,
typename OP1,
439 typename LHS2,
typename RHS2,
typename OP2,
typename T>
463 assert( vec1.
size() == vec2.
size() && bool(
"Size mismatch") );
470 #ifdef VIENNACL_WITH_OPENCL
475 #ifdef VIENNACL_WITH_CUDA
488 template<
typename LHS,
typename RHS,
typename OP,
typename T>
499 template<
typename T,
typename LHS,
typename RHS,
typename OP>
510 template<
typename LHS1,
typename RHS1,
typename OP1,
511 typename LHS2,
typename RHS2,
typename OP2,
typename S3>
534 assert( x.
size() == y_tuple.
const_at(0).size() && bool(
"Size mismatch") );
535 assert( result.
size() == y_tuple.
const_size() && bool(
"Number of elements does not match result size") );
542 #ifdef VIENNACL_WITH_OPENCL
547 #ifdef VIENNACL_WITH_CUDA
574 #ifdef VIENNACL_WITH_OPENCL
579 #ifdef VIENNACL_WITH_CUDA
597 template<
typename LHS,
typename RHS,
typename OP,
typename S2>
621 #ifdef VIENNACL_WITH_OPENCL
626 #ifdef VIENNACL_WITH_CUDA
643 template<
typename LHS,
typename RHS,
typename OP,
typename S2>
668 #ifdef VIENNACL_WITH_OPENCL
673 #ifdef VIENNACL_WITH_CUDA
690 template<
typename LHS,
typename RHS,
typename OP,
typename T>
713 #ifdef VIENNACL_WITH_OPENCL
718 #ifdef VIENNACL_WITH_CUDA
735 template<
typename LHS,
typename RHS,
typename OP,
typename S2>
760 #ifdef VIENNACL_WITH_OPENCL
765 #ifdef VIENNACL_WITH_CUDA
782 template<
typename LHS,
typename RHS,
typename OP,
typename T>
805 #ifdef VIENNACL_WITH_OPENCL
810 #ifdef VIENNACL_WITH_CUDA
827 template<
typename LHS,
typename RHS,
typename OP,
typename S2>
851 #ifdef VIENNACL_WITH_OPENCL
855 #ifdef VIENNACL_WITH_CUDA
870 template<
typename LHS,
typename RHS,
typename OP>
884 template<
typename NumericT>
892 #ifdef VIENNACL_WITH_OPENCL
897 #ifdef VIENNACL_WITH_CUDA
914 template<
typename LHS,
typename RHS,
typename OP,
typename NumericT>
935 #ifdef VIENNACL_WITH_OPENCL
940 #ifdef VIENNACL_WITH_CUDA
957 template<
typename LHS,
typename RHS,
typename OP,
typename S2>
971 template<
typename NumericT>
979 #ifdef VIENNACL_WITH_OPENCL
984 #ifdef VIENNACL_WITH_CUDA
1001 template<
typename LHS,
typename RHS,
typename OP,
typename NumericT>
1014 template<
typename T>
1022 #ifdef VIENNACL_WITH_OPENCL
1027 #ifdef VIENNACL_WITH_CUDA
1044 template<
typename LHS,
typename RHS,
typename OP,
typename S2>
1058 template<
typename NumericT>
1066 #ifdef VIENNACL_WITH_OPENCL
1071 #ifdef VIENNACL_WITH_CUDA
1088 template<
typename LHS,
typename RHS,
typename OP,
typename NumericT>
1101 template<
typename T>
1109 #ifdef VIENNACL_WITH_OPENCL
1114 #ifdef VIENNACL_WITH_CUDA
1131 template<
typename LHS,
typename RHS,
typename OP,
typename S2>
1151 template<
typename T>
1161 #ifdef VIENNACL_WITH_OPENCL
1166 #ifdef VIENNACL_WITH_CUDA
1189 template<
typename NumericT>
1198 #ifdef VIENNACL_WITH_OPENCL
1204 #ifdef VIENNACL_WITH_CUDA
1222 template<
typename NumericT>
1239 template<
typename NumericT>
1248 #ifdef VIENNACL_WITH_OPENCL
1254 #ifdef VIENNACL_WITH_CUDA
1272 template<
typename NumericT>
1279 template<
typename T,
typename LHS,
typename RHS,
typename OP>
1283 assert( (v1.
size() > 0) &&
bool(
"Vector not yet initialized!") );
1290 template<
typename T,
typename LHS,
typename RHS,
typename OP>
1294 assert( (v1.
size() > 0) &&
bool(
"Vector not yet initialized!") );
void min_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the minimum of a vector, where the result is stored on a CPU scalar.
vcl_size_t const_size() const
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
void vector_assign(vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
void avbv(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void norm_2_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the l^2-norm of a vector - implementation.
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
void inclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan on the host using OpenMP.
Worker class for decomposing expression templates.
void inner_prod_cpu(vector_base< T > const &vec1, vector_base< T > const &vec2, T &result)
Computes the inner product of two vectors with the final reduction step on the CPU - dispatcher inter...
void norm_1_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^1-norm of a vector.
void min_cpu(vector_base< T > const &vec, T &result)
Computes the minimum of a vector with final reduction on the CPU.
vector< NumericT > operator-=(vector_base< NumericT > &v1, const viennacl::vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, viennacl::op_prod > &proxy)
Implementation of the operation v1 -= A * v2, where A is a matrix.
Implementations of NMF operations using OpenCL.
Exception class in case of memory errors.
void norm_1_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on CPU.
void max_cpu(vector_base< T > const &vec, T &result)
Computes the maximum of a vector with final reduction on the CPU.
Generic size and resize functionality for different vector and matrix types.
Defines the worker class for decomposing an expression tree into small chunks, which can be processed...
void norm_inf_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the supremum-norm of a vector.
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void sum_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the sum of all elements from the vector.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vector< NumericT > operator+=(vector_base< NumericT > &v1, const viennacl::vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, viennacl::op_prod > &proxy)
Implementation of the operation v1 += A * v2, where A is a matrix.
void sum_impl(vector_base< T > const &vec, scalar< T > &result)
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void norm_2_cpu(vector_base< T > const &vec, T &result)
Computes the l^2-norm of a vector with final reduction on the CPU - dispatcher interface.
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
void avbv_v(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void norm_inf_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the supremum-norm of a vector.
void max_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the maximum of a vector, both reduction stages run on the GPU.
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA.
void min_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the minimum of a vector, where the result is stored in an OpenCL buffer. ...
void norm_1_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the l^1-norm of a vector.
An expression template class that represents a binary operation that yields a vector.
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, OP > const &proxy)
Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syn...
void avbv_v(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
A tag class representing inplace addition.
void max_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU...
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied.
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - dispatcher interface.
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
viennacl::vector< float > v1
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
void max_impl(vector_base< T > const &vec, scalar< T > &result)
void inner_prod_impl(vector_base< T > const &vec1, vector_base< T > const &vec2, vector_base< T > &partial_result)
Computes the partial inner product of two vectors - implementation. Library users should call inner_p...
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
void sum_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the sum over all entries of a vector.
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA.
void norm_2_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^2-norm of a vector - implementation.
void max_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the maximum value of a vector, where the result is stored in an OpenCL buffer.
void inner_prod_impl(vector_base< T > const &vec1, vector_base< T > const &vec2, scalar< T > &result)
Computes the inner product of two vectors - dispatcher interface.
void max_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the maximum value of a vector, where the value is stored in a host value.
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
void norm_1_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on the CPU.
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
void norm_2_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on CPU.
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
void min_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the minimum of a vector.
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied.
#define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname)
void norm_inf_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the supremum-norm of a vector.
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
void inner_prod_cpu(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
void vector_swap(vector_base< T > &vec1, vector_base< T > &vec2)
Swaps the contents of two vectors, data is copied.
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_inf_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the supremum-norm of a vector.
void norm_1_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^1-norm of a vector - dispatcher interface.
void norm_1_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^1-norm of a vector.
void sum_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU...
Common base class for dense vectors, vector ranges, and vector slices.
void sum_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the maximum of a vector, both reduction stages run on the GPU.
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void min_impl(vector_base< T > const &vec, scalar< T > &result)
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA.
void exclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan on the host using OpenMP.
void min_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU...
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, op_element_binary< OP > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
void norm_1_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the l^1-norm of a vector.
void avbv(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void inclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan.
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
void min_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the maximum of a vector, both reduction stages run on the GPU.
A tag class representing inplace subtraction.
void max_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
void sum_cpu(vector_base< T > const &vec, T &result)
Computes the sum of a vector with final reduction on the CPU.
void element_op(matrix_base< NumericT, SizeT > &A, matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &proxy)
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA.
void norm_inf_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the supremum-norm of a vector.
cl_uint index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
size_type size() const
Returns the length of the vector (cf. std::vector)
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
void norm_inf_cpu(vector_base< T > const &vec, T &result)
Computes the supremum-norm of a vector.
VectorType const & const_at(vcl_size_t i) const
Implementation of a range object for use with proxy objects.
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
void vector_swap(vector_base< T > &vec1, vector_base< T > &vec2)
Swaps the contents of two vectors, data is copied.
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void inner_prod_cpu(vector_base< T > const &vec1, vector_base< T > const &vec2, T &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
Implementation of the ViennaCL scalar class.
Implementations of NMF operations using CUDA.
void norm_2_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the l^2-norm of a vector - implementation.
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_inf_cpu(vector_base< T > const &vec, T &result)
Computes the supremum-norm of a vector with final reduction on the CPU.
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void sum_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the sum over all entries of a vector.
Simple enable-if variant that uses the SFINAE pattern.
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Implementations of NMF operations using a plain single-threaded or OpenMP-enabled execution on CPU...