Implementations of column-major dense matrix related operations, including matrix-vector products, using CUDA. More...
Go to the source code of this file.
Namespaces | |
viennacl | |
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them. | |
viennacl::linalg | |
Provides all linear algebra operations which are not covered by operator overloads. | |
viennacl::linalg::cuda | |
Holds all CUDA compute kernels used by ViennaCL. | |
Functions | |
template<typename DestNumericT , typename SrcNumericT > | |
__global__ void | viennacl::linalg::cuda::convert_col_kernel (DestNumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const SrcNumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::element_op_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::element_op_int_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::matrix_col_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::trans_vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename NumericT > | |
__global__ void | viennacl::linalg::cuda::scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::bidiag_pack_row_major_kernel (T *A, T *D, T *S, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::bidiag_pack_column_major_kernel (T *A, T *D, T *S, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::copy_col_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::copy_col_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::copy_row_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::copy_row_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::house_update_A_left_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::house_update_A_left_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::house_update_A_right_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::house_update_A_right_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__device__ void | viennacl::linalg::cuda::col_reduce_lcl_array (T *sums, unsigned int th_Idx, unsigned int bl_Dim) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::house_update_QL_row_major_kernel (T *QL, T *V, unsigned int size1, unsigned int strideQ) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::house_update_QL_column_major_kernel (T *QL, T *V, unsigned int size1, unsigned int strideQ) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::givens_next_row_major_kernel (T *matr, T *cs, T *ss, unsigned int size, unsigned int stride, unsigned int start_i, unsigned int end_i) |
template<typename T > | |
__global__ void | viennacl::linalg::cuda::givens_next_column_major_kernel (T *matr, T *cs, T *ss, unsigned int size, unsigned int stride, unsigned int start_i, unsigned int end_i) |
Implementations of column-major dense matrix related operations, including matrix-vector products, using CUDA.
Definition in file matrix_operations_col.hpp.