ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
viennacl::linalg::cuda Namespace Reference

Holds all CUDA compute kernels used by ViennaCL. More...

Namespaces

 amg
 
 detail
 Helper functions for the CUDA linear algebra backend.
 

Classes

struct  mat_mult_matrix_index
 Helper struct for accessing an element of a row- or column-major matrix. More...
 

Functions

template<typename NumericT >
void bisectSmall (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataSmall< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
 
template<typename NumericT >
void bisectLarge (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
 
template<typename NumericT >
void bisectLarge_OneIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
 
template<typename NumericT >
void bisectLarge_MultIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
 
template<typename NumericT >
__device__ void writeToGmem (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum, unsigned short *s_compaction_list, unsigned short *s_cl_helper, unsigned int offset_mult_lambda)
 Write data to global memory. More...
 
template<typename NumericT >
__device__ void compactStreamsFinal (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, unsigned int &offset_mult_lambda, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper, unsigned int is_one_lambda, unsigned int is_one_lambda_2, NumericT &left, NumericT &right, NumericT &left_2, NumericT &right_2, unsigned int &left_count, unsigned int &right_count, unsigned int &left_count_2, unsigned int &right_count_2, unsigned int c_block_iend, unsigned int c_sum_block, unsigned int c_block_iend_2, unsigned int c_sum_block_2)
 Perform final stream compaction before writing data to global memory. More...
 
__device__ void scanCompactBlocksStartAddress (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper)
 Compute addresses to obtain compact list of block start addresses. More...
 
__device__ void scanSumBlocks (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper)
 Perform scan to obtain number of eigenvalues before a specific block. More...
 
__device__ void scanInitial (const unsigned int tid, const unsigned int tid_2, const unsigned int mat_size, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper)
 
template<typename NumericT >
__device__ void storeNonEmptyIntervalsLarge (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, NumericT left, NumericT mid, NumericT right, const unsigned short left_count, const unsigned short mid_count, const unsigned short right_count, NumericT epsilon, unsigned int &compact_second_chunk, unsigned short *s_compaction_list, unsigned int &is_active_second)
 
template<typename NumericT >
__global__ void bisectKernelLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon, unsigned int *g_num_one, unsigned int *g_num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum)
 Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute. More...
 
template<typename NumericT >
__global__ void bisectKernelLarge_MultIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int *blocks_mult, unsigned int *blocks_mult_sum, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, NumericT *g_lambda, unsigned int *g_pos, NumericT precision)
 
template<typename NumericT >
__global__ void bisectKernelLarge_OneIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int num_intervals, NumericT *g_left, NumericT *g_right, unsigned int *g_pos, NumericT precision)
 
template<typename NumericT >
__global__ void bisectKernelSmall (const NumericT *g_d, const NumericT *g_s, const unsigned int n, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon)
 Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix. More...
 
__device__ int floorPow2 (int n)
 
__device__ int ceilPow2 (int n)
 
template<typename NumericT >
__device__ NumericT computeMidpoint (const NumericT left, const NumericT right)
 
template<class S , class T , class NumericT >
__device__ void storeInterval (unsigned int addr, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT right, S left_count, S right_count, NumericT precision)
 
template<typename NumericT >
__device__ unsigned int computeNumSmallerEigenvals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
 
template<typename NumericT >
__device__ unsigned int computeNumSmallerEigenvalsLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
 
template<class S , class T , class NumericT >
__device__ void storeNonEmptyIntervals (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT mid, NumericT right, const S left_count, const S mid_count, const S right_count, NumericT precision, unsigned int &compact_second_chunk, T *s_compaction_list_exc, unsigned int &is_active_second)
 Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread. More...
 
template<class T >
__device__ void createIndicesCompaction (T *s_compaction_list_exc, unsigned int num_threads_compaction)
 
template<class T , class NumericT >
__device__ void compactIntervals (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT mid, NumericT right, unsigned int mid_count, unsigned int right_count, T *s_compaction_list, unsigned int num_threads_active, unsigned int is_active_second)
 Perform stream compaction for second child intervals. More...
 
template<class T , class S , class NumericT >
__device__ void storeIntervalConverged (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT &left, NumericT &mid, NumericT &right, S &left_count, S &mid_count, S &right_count, T *s_compaction_list_exc, unsigned int &compact_second_chunk, const unsigned int num_threads_active, unsigned int &is_active_second)
 
template<class T , class NumericT >
__device__ void subdivideActiveIntervalMulti (const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged)
 Subdivide interval if active and not already converged. More...
 
template<class T , class NumericT >
__device__ void subdivideActiveInterval (const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged)
 Subdivide interval if active and not already converged. More...
 
template<typename NumericT >
__global__ void matrix_matrix_upper_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool unit_diagonal)
 
template<typename NumericT >
__global__ void matrix_matrix_lower_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool unit_diagonal)
 
template<typename NumericT , typename SolverTagT >
void inplace_solve (matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT tag)
 Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation). More...
 
template<typename NumericT >
__global__ void triangular_substitute_inplace_row_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options)
 
template<typename NumericT >
__global__ void triangular_substitute_inplace_col_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options)
 
template<typename NumericT , typename SolverTagT >
void inplace_solve (matrix_base< NumericT > const &mat, vector_base< NumericT > &vec, SolverTagT)
 Direct inplace solver for dense triangular systems (non-transposed version) More...
 
__host__ __device__ float2 operator+ (float2 a, float2 b)
 
__host__ __device__ float2 operator- (float2 a, float2 b)
 
template<typename SCALARTYPE >
__device__ float2 operator/ (float2 a, SCALARTYPE b)
 
__device__ float2 operator* (float2 in1, float2 in2)
 
__host__ __device__ double2 operator+ (double2 a, double2 b)
 
__host__ __device__ double2 operator- (double2 a, double2 b)
 
template<typename SCALARTYPE >
__host__ __device__ double2 operator/ (double2 a, SCALARTYPE b)
 
__host__ __device__ double2 operator* (double2 in1, double2 in2)
 
__device__ unsigned int get_reorder_num (unsigned int v, unsigned int bit_size)
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_direct (const Numeric2T *input, Numeric2T *output, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major)
 
template<typename NumericT , unsigned int AlignmentV>
void direct (viennacl::vector< NumericT, AlignmentV > const &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Direct 1D algorithm for computing Fourier transformation. More...
 
template<typename NumericT , unsigned int AlignmentV>
void direct (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &in, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Direct 2D algorithm for computing Fourier transformation. More...
 
template<typename NumericT >
__global__ void fft_reorder (NumericT *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, bool is_row_major)
 
template<typename NumericT , unsigned int AlignmentV>
void reorder (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_radix2_local (Numeric2T *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major)
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_radix2 (Numeric2T *input, unsigned int s, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major)
 
template<typename NumericT , unsigned int AlignmentV>
void radix2 (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Radix-2 1D algorithm for computing Fourier transformation. More...
 
template<typename NumericT , unsigned int AlignmentV>
void radix2 (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Radix-2 2D algorithm for computing Fourier transformation. More...
 
template<typename Numeric2T , typename NumericT >
__global__ void bluestein_post (Numeric2T *Z, Numeric2T *out, unsigned int size, NumericT sign)
 
template<typename Numeric2T , typename NumericT >
__global__ void bluestein_pre (Numeric2T *input, Numeric2T *A, Numeric2T *B, unsigned int size, unsigned int ext_size, NumericT sign)
 
template<typename NumericT >
__global__ void zero2 (NumericT *input1, NumericT *input2, unsigned int size)
 
template<typename NumericT , unsigned int AlignmentV>
void bluestein (viennacl::vector< NumericT, AlignmentV > &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t)
 Bluestein's algorithm for computing Fourier transformation. More...
 
template<typename NumericT >
__global__ void fft_mult_vec (const NumericT *input1, const NumericT *input2, NumericT *output, unsigned int size)
 
template<typename NumericT , unsigned int AlignmentV>
void multiply_complex (viennacl::vector< NumericT, AlignmentV > const &input1, viennacl::vector< NumericT, AlignmentV > const &input2, viennacl::vector< NumericT, AlignmentV > &output)
 Mutiply two complex vectors and store result in output. More...
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_div_vec_scalar (Numeric2T *input1, unsigned int size, NumericT factor)
 
template<typename NumericT , unsigned int AlignmentV>
void normalize (viennacl::vector< NumericT, AlignmentV > &input)
 Normalize vector on with his own size. More...
 
template<typename NumericT >
__global__ void transpose (const NumericT *input, NumericT *output, unsigned int row_num, unsigned int col_num)
 
template<typename NumericT , unsigned int AlignmentV>
void transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &input, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &output)
 Transpose matrix. More...
 
template<typename NumericT >
__global__ void transpose_inplace (NumericT *input, unsigned int row_num, unsigned int col_num)
 
template<typename NumericT , unsigned int AlignmentV>
void transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input)
 Inplace_transpose matrix. More...
 
template<typename RealT , typename ComplexT >
__global__ void real_to_complex (const RealT *in, ComplexT *out, unsigned int size)
 
template<typename NumericT >
void real_to_complex (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
 Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More...
 
template<typename ComplexT , typename RealT >
__global__ void complex_to_real (const ComplexT *in, RealT *out, unsigned int size)
 
template<typename NumericT >
void complex_to_real (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
 Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More...
 
template<typename NumericT >
__global__ void reverse_inplace (NumericT *vec, unsigned int size)
 
template<typename NumericT >
void reverse (viennacl::vector_base< NumericT > &in)
 Reverse vector to oposite order and save it in input vector. More...
 
template<typename IndexT >
__global__ void extract_L_kernel_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, unsigned int A_size1, unsigned int *L_row_indices)
 
template<typename NumericT >
__global__ void extract_L_kernel_2 (unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, unsigned int const *L_row_indices, unsigned int *L_col_indices, NumericT *L_elements)
 
template<typename NumericT >
void extract_L (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
 
template<typename NumericT >
__global__ void ilu_scale_kernel_1 (unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, NumericT *D_elements)
 
template<typename NumericT >
__global__ void ilu_scale_kernel_2 (unsigned int const *R_row_indices, unsigned int const *R_col_indices, NumericT *R_elements, unsigned int R_size1, NumericT *D_elements)
 Scales values in a matrix such that output = D * input * D, where D is a diagonal matrix (only the diagonal is provided) More...
 
template<typename NumericT >
void icc_scale (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
 Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. More...
 
template<typename NumericT >
__global__ void icc_chow_patel_sweep_kernel (unsigned int const *L_row_indices, unsigned int const *L_col_indices, NumericT *L_elements, NumericT const *L_backup, unsigned int L_size1, NumericT const *aij_L)
 CUDA kernel for one Chow-Patel-ICC sweep. More...
 
template<typename NumericT >
void icc_chow_patel_sweep (compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L)
 Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) More...
 
template<typename IndexT >
__global__ void extract_LU_kernel_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, unsigned int A_size1, unsigned int *L_row_indices, unsigned int *U_row_indices)
 
template<typename NumericT >
__global__ void extract_LU_kernel_2 (unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, unsigned int const *L_row_indices, unsigned int *L_col_indices, NumericT *L_elements, unsigned int const *U_row_indices, unsigned int *U_col_indices, NumericT *U_elements)
 
template<typename NumericT >
void extract_LU (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
 
template<typename NumericT >
void ilu_scale (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
 Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. More...
 
template<typename NumericT >
__global__ void ilu_chow_patel_sweep_kernel (unsigned int const *L_row_indices, unsigned int const *L_col_indices, NumericT *L_elements, NumericT const *L_backup, unsigned int L_size1, NumericT const *aij_L, unsigned int const *U_trans_row_indices, unsigned int const *U_trans_col_indices, NumericT *U_trans_elements, NumericT const *U_trans_backup, NumericT const *aij_U_trans)
 CUDA kernel for one Chow-Patel-ILU sweep. More...
 
template<typename NumericT >
void ilu_chow_patel_sweep (compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L, compressed_matrix< NumericT > &U_trans, vector< NumericT > const &aij_U_trans)
 Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) More...
 
template<typename NumericT >
__global__ void ilu_form_neumann_matrix_kernel (unsigned int const *R_row_indices, unsigned int const *R_col_indices, NumericT *R_elements, unsigned int R_size1, NumericT *D_elements)
 
template<typename NumericT >
void ilu_form_neumann_matrix (compressed_matrix< NumericT > &R, vector< NumericT > &diag_R)
 
template<typename NumericT >
__global__ void pipelined_cg_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT *r, NumericT const *Ap, NumericT beta, NumericT *inner_prod_buffer, unsigned int size)
 
template<typename NumericT >
void pipelined_cg_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__global__ void pipelined_cg_csr_vec_mul_blocked_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
__global__ void pipelined_cg_csr_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, unsigned int block_size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_update_s_kernel (NumericT *s, NumericT const *residual, NumericT const *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int chunk_size, unsigned int chunk_offset)
 
template<typename NumericT >
void pipelined_bicgstab_update_s (vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT omega, NumericT const *s, NumericT *residual, NumericT const *As, NumericT beta, NumericT const *Ap, NumericT const *r0star, NumericT *inner_prod_buffer, unsigned int size)
 
template<typename NumericT >
void pipelined_bicgstab_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__global__ void pipelined_bicgstab_csr_vec_mul_blocked_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_csr_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, unsigned int block_size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename T >
__global__ void pipelined_gmres_normalize_vk_kernel (T *vk, unsigned int vk_offset, T const *residual, T *R_buffer, unsigned int R_offset, T const *inner_prod_buffer, unsigned int chunk_size, T *r_dot_vk_buffer, unsigned int chunk_offset, unsigned int size)
 
template<typename T >
void pipelined_gmres_normalize_vk (vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 Performs a vector normalization needed for an efficient pipelined GMRES algorithm. More...
 
template<typename T >
__global__ void pipelined_gmres_gram_schmidt_stage1_kernel (T const *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T *vi_in_vk_buffer, unsigned int chunk_size)
 
template<typename T >
void pipelined_gmres_gram_schmidt_stage1 (vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
 
template<typename T >
__global__ void pipelined_gmres_gram_schmidt_stage2_kernel (T *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T const *vi_in_vk_buffer, unsigned int chunk_size, T *R_buffer, unsigned int krylov_dim, T *inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_gram_schmidt_stage2 (vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
 
template<typename T >
__global__ void pipelined_gmres_update_result_kernel (T *result, T const *residual, T const *krylov_basis, unsigned int size, unsigned int internal_size, T const *coefficients, unsigned int k)
 
template<typename T >
void pipelined_gmres_update_result (vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k)
 
template<typename NumericT >
void pipelined_gmres_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (coordinate_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (sliced_ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (hyb_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename DestNumericT , typename SrcNumericT >
void convert (matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
 
template<typename NumericT , typename SizeT , typename DistanceT >
void trans (matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
 
template<typename NumericT , typename ScalarT >
void am (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void ambm (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void ambm_m (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
void matrix_assign (matrix_base< NumericT > &mat, NumericT s, bool clear=false)
 
template<typename NumericT >
void matrix_diagonal_assign (matrix_base< NumericT > &mat, NumericT s)
 
template<typename NumericT >
void matrix_diag_from_vector (const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
 
template<typename NumericT >
void matrix_diag_to_vector (matrix_base< NumericT > const &mat, int k, vector_base< NumericT > &vec)
 
template<typename NumericT >
void matrix_row (matrix_base< NumericT > const &mat, unsigned int i, vector_base< NumericT > &vec)
 
template<typename NumericT >
void matrix_column (const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
 
template<typename NumericT , typename SizeT , typename OpT >
void element_op (matrix_base< NumericT, SizeT > &A, matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &proxy)
 
template<typename SizeT , typename OpT >
void element_op (matrix_base< float, SizeT > &A, matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const &proxy)
 
template<typename SizeT , typename OpT >
void element_op (matrix_base< double, SizeT > &A, matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const &proxy)
 
template<typename NumericT >
void prod_impl (const matrix_base< NumericT > &mat, bool mat_transpose, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
 Carries out matrix-vector multiplication. More...
 
template<typename NumericT , typename ScalarT >
void prod_impl (const matrix_base< NumericT > &A, bool trans_A, const matrix_base< NumericT > &B, bool trans_B, matrix_base< NumericT > &C, ScalarT alpha, ScalarT beta)
 Carries out matrix-matrix multiplication. More...
 
template<typename NumericT , typename ScalarT >
void scaled_rank_1_update (matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
 The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update. More...
 
template<typename NumericT , typename VectorType >
void bidiag_pack (matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
 This function stores the diagonal and the superdiagonal of a matrix in two vectors. More...
 
template<typename NumericT >
void copy_vec (matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
 This function copies a row or a column from a matrix to a vector. More...
 
template<typename NumericT >
void house_update_A_left (matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
 This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P. More...
 
template<typename NumericT >
void house_update_A_right (matrix_base< NumericT > &A, vector_base< NumericT > &D)
 This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P. More...
 
template<typename NumericT >
void house_update_QL (matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
 This function updates the matrix Q, which is needed for the computation of the eigenvectors. More...
 
template<typename NumericT >
void givens_next (matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
 This function updates the matrix Q. It is part of the tql2 algorithm. More...
 
template<typename DestNumericT , typename SrcNumericT >
__global__ void convert_col_kernel (DestNumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const SrcNumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void matrix_col_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void element_op_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void element_op_int_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void matrix_col_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void trans_vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename NumericT >
__global__ void scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename T >
__global__ void bidiag_pack_row_major_kernel (T *A, T *D, T *S, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__global__ void bidiag_pack_column_major_kernel (T *A, T *D, T *S, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__global__ void copy_col_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride)
 
template<typename T >
__global__ void copy_col_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride)
 
template<typename T >
__global__ void copy_row_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride)
 
template<typename T >
__global__ void copy_row_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride)
 
template<typename T >
__global__ void house_update_A_left_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__global__ void house_update_A_left_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__global__ void house_update_A_right_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__global__ void house_update_A_right_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__device__ void col_reduce_lcl_array (T *sums, unsigned int th_Idx, unsigned int bl_Dim)
 
template<typename T >
__global__ void house_update_QL_row_major_kernel (T *QL, T *V, unsigned int size1, unsigned int strideQ)
 
template<typename T >
__global__ void house_update_QL_column_major_kernel (T *QL, T *V, unsigned int size1, unsigned int strideQ)
 
template<typename T >
__global__ void givens_next_row_major_kernel (T *matr, T *cs, T *ss, unsigned int size, unsigned int stride, unsigned int start_i, unsigned int end_i)
 
template<typename T >
__global__ void givens_next_column_major_kernel (T *matr, T *cs, T *ss, unsigned int size, unsigned int stride, unsigned int start_i, unsigned int end_i)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename DestNumericT , typename SrcNumericT >
__global__ void convert_row_kernel (DestNumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const SrcNumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void trans_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_internal_size1, unsigned int A_internal_size2, unsigned int A_size1, unsigned int A_size2, unsigned int A_stride1, unsigned int A_stride2, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_internal_size1, unsigned int B_internal_size2, unsigned int B_stride1, unsigned int B_stride2, bool data_major)
 
template<typename NumericT >
__global__ void am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void matrix_row_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void element_op_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void element_op_int_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void matrix_row_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void trans_vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename NumericT >
__global__ void scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename NumericT >
__global__ void el_wise_mul_div (NumericT *matrix1, NumericT const *matrix2, NumericT const *matrix3, unsigned int size)
 Main CUDA kernel for nonnegative matrix factorization of a dense matrices. More...
 
template<typename NumericT >
void nmf (viennacl::matrix_base< NumericT > const &V, viennacl::matrix_base< NumericT > &W, viennacl::matrix_base< NumericT > &H, viennacl::linalg::nmf_config const &conf)
 The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized. More...
 
template<typename NumericT >
__global__ void as_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2)
 
template<typename NumericT >
__global__ void as_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2)
 
template<typename ScalarT1 , typename ScalarT2 , typename NumericT >
viennacl::enable_if
< viennacl::is_scalar
< ScalarT1 >::value
&&viennacl::is_scalar
< ScalarT2 >::value
&&viennacl::is_any_scalar
< NumericT >::value >::type 
as (ScalarT1 &s1, ScalarT2 const &s2, NumericT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if
< viennacl::is_scalar
< ScalarT1 >::value
&&viennacl::is_scalar
< ScalarT2 >::value
&&viennacl::is_scalar
< ScalarT3 >::value
&&viennacl::is_any_scalar
< NumericT1 >::value
&&viennacl::is_any_scalar
< NumericT2 >::value >::type 
asbs (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if
< viennacl::is_scalar
< ScalarT1 >::value
&&viennacl::is_scalar
< ScalarT2 >::value
&&viennacl::is_scalar
< ScalarT3 >::value
&&viennacl::is_any_scalar
< NumericT1 >::value
&&viennacl::is_any_scalar
< NumericT2 >::value >::type 
asbs_s (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void scalar_swap_kernel (NumericT *s1, NumericT *s2)
 
template<typename ScalarT1 , typename ScalarT2 >
viennacl::enable_if
< viennacl::is_scalar
< ScalarT1 >::value
&&viennacl::is_scalar
< ScalarT2 >::value >::type 
swap (ScalarT1 &s1, ScalarT2 &s2)
 Swaps the contents of two scalars, data is copied. More...
 
template<unsigned int SubWarpSizeV, typename AlphaBetaHandlerT , typename NumericT >
__global__ void compressed_matrix_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT alpha, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result, NumericT beta)
 
template<typename AlphaBetaHandlerT , typename NumericT >
__global__ void compressed_matrix_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT alpha, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result, NumericT beta)
 
template<class NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, NumericT alpha, viennacl::vector_base< NumericT > &result, NumericT beta)
 Carries out matrix-vector multiplication with a compressed_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void compressed_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void compressed_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed. More...
 
template<typename NumericT >
__global__ void compressed_matrix_diagonal_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *result, unsigned int size)
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixT >::value >
::type 
inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixT >::value >
::type 
inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixT >::value >
::type 
inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixT >::value >
::type 
inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixT >::value >
::type 
inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixT >::value >
::type 
inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixT >::value >
::type 
inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixT >::value >
::type 
inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename NumericT >
__global__ void compressed_compressed_matrix_vec_mul_kernel (const unsigned int *row_jumper, const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, unsigned int nonzero_rows, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT alpha, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result, NumericT beta)
 
template<typename NumericT >
void prod_impl (const viennacl::compressed_compressed_matrix< NumericT > &mat, const viennacl::vector_base< NumericT > &vec, NumericT alpha, viennacl::vector_base< NumericT > &result, NumericT beta)
 Carries out matrix-vector multiplication with a compressed_compressed_matrix. More...
 
template<typename NumericT >
__global__ void coordinate_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT alpha, NumericT *result, unsigned int start_result, unsigned int inc_result, NumericT beta)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, NumericT alpha, viennacl::vector_base< NumericT > &result, NumericT beta)
 Carries out matrix-vector multiplication with a coordinate_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void coordinate_matrix_d_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Compressed Matrix(COO)-Dense Matrix multiplication. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void coordinate_matrix_d_tr_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication. More...
 
template<typename AlphaBetaHandlerT , typename NumericT >
__global__ void ell_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT alpha, NumericT *result, unsigned int start_result, unsigned int inc_result, NumericT beta, unsigned int row_num, unsigned int col_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, NumericT alpha, viennacl::vector_base< NumericT > &result, NumericT beta)
 Carries out matrix-vector multiplication with a ell_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void ell_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Sparse Matrix(ELL)-Dense Matrix multiplication. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void ell_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication. More...
 
template<typename AlphaBetaHandlerT , typename NumericT >
__global__ void sliced_ell_matrix_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, unsigned int size_x, NumericT alpha, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result, NumericT beta, unsigned int block_size)
 
template<typename NumericT , typename IndexT >
void prod_impl (const viennacl::sliced_ell_matrix< NumericT, IndexT > &mat, const viennacl::vector_base< NumericT > &vec, NumericT alpha, viennacl::vector_base< NumericT > &result, NumericT beta)
 Carries out matrix-vector multiplication with a sliced_ell_matrix. More...
 
template<typename AlphaBetaHandlerT , typename NumericT >
__global__ void hyb_matrix_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT alpha, NumericT *result, unsigned int start_result, unsigned int inc_result, NumericT beta, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, NumericT alpha, viennacl::vector_base< NumericT > &result, NumericT beta)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void hyb_matrix_d_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void hyb_matrix_d_tr_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename NumericT >
__global__ void csr_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_forward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_backward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_block_trans_unit_lu_forward (const unsigned int *row_jumper_L, const unsigned int *column_indices_L, const NumericT *elements_L, const unsigned int *block_offsets, NumericT *result, unsigned int size)
 
template<typename NumericT >
__global__ void csr_block_trans_lu_backward (const unsigned int *row_jumper_U, const unsigned int *column_indices_U, const NumericT *elements_U, const NumericT *diagonal_U, const unsigned int *block_offsets, NumericT *result, unsigned int size)
 
template<typename IndexT >
__device__ IndexT round_to_next_power_of_2 (IndexT val)
 
template<typename IndexT >
__global__ void compressed_matrix_gemm_stage_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, IndexT *subwarpsize_per_group, IndexT *max_nnz_row_A_per_group, IndexT *max_nnz_row_B_per_group)
 
__device__ unsigned int merge_subwarp_symbolic (unsigned int row_B_start, unsigned int row_B_end, unsigned int const *B_col_indices, unsigned int B_size2, unsigned int subwarpsize)
 
__device__ unsigned int merge_subwarp_symbolic_double (unsigned int row_B_start, unsigned int row_B_end, unsigned int const *B_col_indices, unsigned int B_size2, unsigned int *output_array, unsigned int id_in_warp, unsigned int subwarpsize)
 
template<typename IndexT >
__global__ void compressed_matrix_gemm_stage_2 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, IndexT B_size2, IndexT *C_row_indices, unsigned int *subwarpsize_array, unsigned int *max_row_size_A, unsigned int *max_row_size_B, unsigned int *scratchpad_offsets, unsigned int *scratchpad_indices)
 
template<typename NumericT >
__device__ unsigned int merge_subwarp_numeric (NumericT scaling_factor, unsigned int input_start, unsigned int input_end, const unsigned int *input_indices, const NumericT *input_values, unsigned int invalid_token, unsigned int *output_indices, NumericT *output_values, unsigned int id_in_warp, unsigned int subwarpsize)
 
template<typename IndexT , typename NumericT >
__global__ void compressed_matrix_gemm_stage_3 (const IndexT *A_row_indices, const IndexT *A_col_indices, const NumericT *A_elements, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, const NumericT *B_elements, IndexT B_size2, IndexT const *C_row_indices, IndexT *C_col_indices, NumericT *C_elements, unsigned int *subwarpsize_array, unsigned int *max_row_size_A, unsigned int *max_row_size_B, unsigned int *scratchpad_offsets, unsigned int *scratchpad_indices, NumericT *scratchpad_values)
 
template<typename IndexT >
__global__ void compressed_matrix_gemm_decompose_1 (const IndexT *A_row_indices, IndexT A_size1, IndexT max_per_row, IndexT *chunks_per_row)
 
template<typename IndexT , typename NumericT >
__global__ void compressed_matrix_gemm_A2 (IndexT *A2_row_indices, IndexT *A2_col_indices, NumericT *A2_elements, IndexT A2_size1, IndexT *new_row_buffer)
 
template<typename IndexT , typename NumericT >
__global__ void compressed_matrix_gemm_G1 (IndexT *G1_row_indices, IndexT *G1_col_indices, NumericT *G1_elements, IndexT G1_size1, IndexT const *A_row_indices, IndexT const *A_col_indices, NumericT const *A_elements, IndexT A_size1, IndexT A_nnz, IndexT max_per_row, IndexT *new_row_buffer)
 
template<class NumericT , unsigned int AlignmentV>
void prod_impl (viennacl::compressed_matrix< NumericT, AlignmentV > const &A, viennacl::compressed_matrix< NumericT, AlignmentV > const &B, viennacl::compressed_matrix< NumericT, AlignmentV > &C)
 Carries out sparse_matrix-sparse_matrix multiplication for CSR matrices. More...
 
template<unsigned int SubWarpSizeV, typename IndexT >
__device__ IndexT subwarp_minimum_shuffle (IndexT min_index)
 
template<unsigned int SubWarpSizeV, typename IndexT >
__device__ IndexT subwarp_minimum_shared (IndexT min_index, IndexT id_in_warp, IndexT *shared_buffer)
 
template<unsigned int SubWarpSizeV, typename IndexT >
__global__ void compressed_matrix_gemm_stage_2 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, IndexT B_size2, IndexT *C_row_indices)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__device__ NumericT subwarp_accumulate_shuffle (NumericT output_value)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__device__ NumericT subwarp_accumulate_shared (NumericT output_value, unsigned int id_in_warp, NumericT *shared_buffer)
 
template<unsigned int SubWarpSizeV, typename IndexT , typename NumericT >
__global__ void compressed_matrix_gemm_stage_3 (const IndexT *A_row_indices, const IndexT *A_col_indices, const NumericT *A_elements, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, const NumericT *B_elements, IndexT B_size2, IndexT const *C_row_indices, IndexT *C_col_indices, NumericT *C_elements)
 
template<typename DestNumericT , typename SrcNumericT >
__global__ void convert_kernel (DestNumericT *dest, unsigned int start_dest, unsigned int inc_dest, unsigned int size_dest, SrcNumericT const *src, unsigned int start_src, unsigned int inc_src)
 
template<typename DestNumericT , typename SrcNumericT >
void convert (vector_base< DestNumericT > &dest, vector_base< SrcNumericT > const &src)
 
template<typename NumericT >
__global__ void av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
__global__ void av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT , typename ScalarType1 >
void av (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void avbv (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void avbv_v (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void vector_assign_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, NumericT alpha)
 
template<typename NumericT , typename ScalarT1 >
void vector_assign (vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false)
 Assign a constant value to a vector (-range/-slice) More...
 
template<typename NumericT >
__global__ void vector_swap_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void vector_swap (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
 Swaps the contents of two vectors, data is copied. More...
 
template<typename NumericT >
__global__ void element_op_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
 
template<typename NumericT >
__global__ void element_op_int_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
 
template<typename NumericT , typename OpT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &proxy)
 Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) More...
 
template<typename OpT >
void element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &proxy)
 
template<typename OpT >
void element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_acos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_asin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_atan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_ceil_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_cos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_cosh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_exp_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_fabs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_abs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_floor_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_log_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_log10_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_sin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_sinh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_sqrt_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_tan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_tanh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &proxy)
 
template<typename NumericT >
__global__ void inner_prod_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void vector_sum_kernel_floats (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
__global__ void vector_sum_kernel_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
__global__ void vector_sum_kernel_unsigned_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT , typename ScalarT >
void inner_prod_impl (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More...
 
template<typename NumericT >
void inner_prod_cpu (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More...
 
template<typename NumericT >
__global__ void inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results)
 
template<typename NumericT >
__global__ void vector_multi_sum_kernel (NumericT const *vec1, NumericT *result, unsigned int start_result, unsigned int inc_result)
 
template<typename NumericT >
void inner_prod_impl (vector_base< NumericT > const &x, vector_tuple< NumericT > const &vec_tuple, vector_base< NumericT > &result)
 
template<typename NumericT >
__global__ void norm_kernel_floats (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void norm_kernel_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void norm_kernel_unsigned_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
void norm_1_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the l^1-norm of a vector. More...
 
template<typename NumericT >
void norm_1_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the l^1-norm of a vector. More...
 
template<typename NumericT >
void norm_2_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the l^2-norm of a vector - implementation. More...
 
template<typename NumericT >
void norm_2_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the l^2-norm of a vector - implementation. More...
 
template<typename NumericT >
void norm_inf_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the supremum-norm of a vector. More...
 
template<typename NumericT >
void norm_inf_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the supremum-norm of a vector. More...
 
template<typename NumericT >
__global__ void vector_maxmin_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
void max_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void max_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
void min_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void min_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
void sum_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void sum_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
__device__ NumericT cuda_abs (NumericT val)
 
__device__ unsigned long cuda_abs (unsigned long val)
 
__device__ unsigned int cuda_abs (unsigned int val)
 
__device__ unsigned short cuda_abs (unsigned short val)
 
__device__ unsigned char cuda_abs (unsigned char val)
 
template<typename NumericT >
__global__ void index_norm_inf_kernel (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result)
 
template<typename NumericT >
vcl_size_t index_norm_inf (vector_base< NumericT > const &vec1)
 Computes the index of the first entry that is equal to the supremum-norm in modulus. More...
 
template<typename NumericT >
__global__ void plane_rotation_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT alpha, NumericT beta)
 
template<typename NumericT >
void plane_rotation (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
 Computes a plane rotation of two vectors. More...
 
template<typename NumericT >
__global__ void scan_kernel_1 (NumericT const *X, unsigned int startX, unsigned int incX, unsigned int sizeX, NumericT *Y, unsigned int startY, unsigned int incY, unsigned int scan_offset, NumericT *carries)
 
template<typename NumericT >
__global__ void scan_kernel_2 (NumericT *carries)
 
template<typename NumericT >
__global__ void scan_kernel_3 (NumericT *Y, unsigned int startY, unsigned int incY, unsigned int sizeY, NumericT const *carries)
 
template<typename NumericT >
void inclusive_scan (vector_base< NumericT > const &input, vector_base< NumericT > &output)
 This function implements an inclusive scan using CUDA. More...
 
template<typename NumericT >
void exclusive_scan (vector_base< NumericT > const &input, vector_base< NumericT > &output)
 This function implements an exclusive scan using CUDA. More...
 

Detailed Description

Holds all CUDA compute kernels used by ViennaCL.

Function Documentation

template<typename NumericT , typename ScalarT >
void viennacl::linalg::cuda::am ( matrix_base< NumericT > &  mat1,
matrix_base< NumericT > const &  mat2,
ScalarT const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)

Definition at line 113 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 59 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 95 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 87 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 124 of file matrix_operations_row.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::ambm ( matrix_base< NumericT > &  mat1,
matrix_base< NumericT > const &  mat2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
matrix_base< NumericT > const &  mat3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 164 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 136 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 210 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 283 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 357 of file matrix_operations_col.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::ambm_m ( matrix_base< NumericT > &  mat1,
matrix_base< NumericT > const &  mat2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
matrix_base< NumericT > const &  mat3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 239 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 436 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 511 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 585 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 660 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 469 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 544 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 618 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 693 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 166 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 241 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 315 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 390 of file matrix_operations_row.hpp.

template<typename ScalarT1 , typename ScalarT2 , typename NumericT >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_any_scalar<NumericT>::value >::type viennacl::linalg::cuda::as ( ScalarT1 &  s1,
ScalarT2 const &  s2,
NumericT const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)

Definition at line 77 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::as_kernel ( NumericT s1,
const NumericT fac2,
unsigned int  options2,
const NumericT s2 
)

Definition at line 48 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::as_kernel ( NumericT s1,
NumericT  fac2,
unsigned int  options2,
const NumericT s2 
)

Definition at line 60 of file scalar_operations.hpp.

template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs ( ScalarT1 &  s1,
ScalarT2 const &  s2,
NumericT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
ScalarT3 const &  s3,
NumericT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 191 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT s1,
const NumericT fac2,
unsigned int  options2,
const NumericT s2,
const NumericT fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 99 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT s1,
NumericT  fac2,
unsigned int  options2,
const NumericT s2,
NumericT const *  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 120 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT s1,
NumericT const *  fac2,
unsigned int  options2,
const NumericT s2,
NumericT  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 141 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT s1,
NumericT  fac2,
unsigned int  options2,
const NumericT s2,
NumericT  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 162 of file scalar_operations.hpp.

template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs_s ( ScalarT1 &  s1,
ScalarT2 const &  s2,
NumericT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
ScalarT3 const &  s3,
NumericT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 314 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT s1,
const NumericT fac2,
unsigned int  options2,
const NumericT s2,
const NumericT fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 222 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT s1,
NumericT  fac2,
unsigned int  options2,
const NumericT s2,
NumericT const *  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 243 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT s1,
NumericT const *  fac2,
unsigned int  options2,
const NumericT s2,
NumericT  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 264 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT s1,
NumericT  fac2,
unsigned int  options2,
const NumericT s2,
NumericT  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 285 of file scalar_operations.hpp.

template<typename NumericT , typename ScalarType1 >
void viennacl::linalg::cuda::av ( vector_base< NumericT > &  vec1,
vector_base< NumericT > const &  vec2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)

Definition at line 144 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::av_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 77 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::av_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 110 of file vector_operations.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::avbv ( vector_base< NumericT > &  vec1,
vector_base< NumericT > const &  vec2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
vector_base< NumericT > const &  vec3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 433 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 179 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 242 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 305 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 368 of file vector_operations.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::avbv_v ( vector_base< NumericT > &  vec1,
vector_base< NumericT > const &  vec2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
vector_base< NumericT > const &  vec3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 735 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 483 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 546 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 609 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 672 of file vector_operations.hpp.

template<typename NumericT , typename VectorType >
void viennacl::linalg::cuda::bidiag_pack ( matrix_base< NumericT > &  A,
VectorType &  dh,
VectorType &  sh 
)

This function stores the diagonal and the superdiagonal of a matrix in two vectors.

Parameters
AThe matrix from which the vectors will be extracted of.
dhThe vector in which the diagonal of the matrix will be stored in.
shThe vector in which the superdiagonal of the matrix will be stored in.

Definition at line 2489 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::bidiag_pack_column_major_kernel ( T *  A,
T *  D,
T *  S,
unsigned int  size1,
unsigned int  size2,
unsigned int  stride 
)

Definition at line 1456 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::bidiag_pack_row_major_kernel ( T *  A,
T *  D,
T *  S,
unsigned int  size1,
unsigned int  size2,
unsigned int  stride 
)

Definition at line 1434 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelLarge ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
const NumericT  lg,
const NumericT  ug,
const unsigned int  lg_eig_count,
const unsigned int  ug_eig_count,
NumericT  epsilon,
unsigned int *  g_num_one,
unsigned int *  g_num_blocks_mult,
NumericT g_left_one,
NumericT g_right_one,
unsigned int *  g_pos_one,
NumericT g_left_mult,
NumericT g_right_mult,
unsigned int *  g_left_count_mult,
unsigned int *  g_right_count_mult,
unsigned int *  g_blocks_mult,
unsigned int *  g_blocks_mult_sum 
)

Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute.

Definition at line 537 of file bisect_kernel_large.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelLarge_MultIntervals ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
unsigned int *  blocks_mult,
unsigned int *  blocks_mult_sum,
NumericT g_left,
NumericT g_right,
unsigned int *  g_left_count,
unsigned int *  g_right_count,
NumericT g_lambda,
unsigned int *  g_pos,
NumericT  precision 
)

Perform second step of bisection algorithm for large matrices for intervals that after the first step contained more than one eigenvalue

Parameters
g_ddiagonal elements of symmetric, tridiagonal matrix
g_ssuperdiagonal elements of symmetric, tridiagonal matrix
nmatrix size
blocks_multstart addresses of blocks of intervals that are processed by one block of threads, each of the intervals contains more than one eigenvalue
blocks_mult_sumtotal number of eigenvalues / singleton intervals in one block of intervals
g_leftleft limits of intervals
g_rightright limits of intervals
g_left_countnumber of eigenvalues less than left limits
g_right_countnumber of eigenvalues less than right limits
g_lambdafinal eigenvalue
g_posindex of eigenvalue (in ascending order)
precisiondesired precision of eigenvalues

Definition at line 68 of file bisect_kernel_large_multi.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelLarge_OneIntervals ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
unsigned int  num_intervals,
NumericT g_left,
NumericT g_right,
unsigned int *  g_pos,
NumericT  precision 
)

Determine eigenvalues for large matrices for intervals that after the first step contained one eigenvalue

Parameters
g_ddiagonal elements of symmetric, tridiagonal matrix
g_ssuperdiagonal elements of symmetric, tridiagonal matrix
nmatrix size
num_intervalstotal number of intervals containing one eigenvalue after the first step
g_leftleft interval limits
g_rightright interval limits
g_posindex of interval / number of intervals that are smaller than right interval limit
precisiondesired precision of eigenvalues

Definition at line 59 of file bisect_kernel_large_onei.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelSmall ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
NumericT g_left,
NumericT g_right,
unsigned int *  g_left_count,
unsigned int *  g_right_count,
const NumericT  lg,
const NumericT  ug,
const unsigned int  lg_eig_count,
const unsigned int  ug_eig_count,
NumericT  epsilon 
)

Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix.

Parameters
g_ddiagonal elements in global memory
g_ssuperdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed an equals 0
nsize of matrix
g_lefthelper array
g_righthelper array
g_left_counthelper array
g_right_counthelper array
lglower bound of input interval (e.g. Gerschgorin interval)
ugupper bound of input interval (e.g. Gerschgorin interval)
lg_eig_countnumber of eigenvalues that are smaller than lg
ug_eig_countnumber of eigenvalues that are smaller than lu
epsilondesired accuracy of eigenvalues to compute

Definition at line 61 of file bisect_kernel_small.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectLarge ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataLarge< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  lg,
const NumericT  ug,
const NumericT  precision 
)

Definition at line 71 of file bisect_kernel_calls.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectLarge_MultIntervals ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataLarge< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  precision 
)

Definition at line 133 of file bisect_kernel_calls.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectLarge_OneIntervals ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataLarge< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  precision 
)

Definition at line 103 of file bisect_kernel_calls.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectSmall ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataSmall< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  lg,
const NumericT  ug,
const NumericT  precision 
)

Definition at line 45 of file bisect_kernel_calls.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::bluestein ( viennacl::vector< NumericT, AlignmentV > &  in,
viennacl::vector< NumericT, AlignmentV > &  out,
vcl_size_t   
)

Bluestein's algorithm for computing Fourier transformation.

Currently, Works only for sizes of input data which less than 2^16. Uses a lot of additional memory, but should be fast for any size of data. Serial implementation has something about o(n * lg n) complexity

Definition at line 622 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::bluestein_post ( Numeric2T *  Z,
Numeric2T *  out,
unsigned int  size,
NumericT  sign 
)

Definition at line 538 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::bluestein_pre ( Numeric2T *  input,
Numeric2T *  A,
Numeric2T *  B,
unsigned int  size,
unsigned int  ext_size,
NumericT  sign 
)

Definition at line 564 of file fft_operations.hpp.

__device__ int viennacl::linalg::cuda::ceilPow2 ( int  n)
inline

Compute the next higher power of two of n

Parameters
nnumber for which next higher power of two is seeked

Definition at line 66 of file bisect_util.hpp.

template<typename T >
__device__ void viennacl::linalg::cuda::col_reduce_lcl_array ( T *  sums,
unsigned int  th_Idx,
unsigned int  bl_Dim 
)

Definition at line 1672 of file matrix_operations_col.hpp.

template<class T , class NumericT >
__device__ void viennacl::linalg::cuda::compactIntervals ( NumericT s_left,
NumericT s_right,
T *  s_left_count,
T *  s_right_count,
NumericT  mid,
NumericT  right,
unsigned int  mid_count,
unsigned int  right_count,
T *  s_compaction_list,
unsigned int  num_threads_active,
unsigned int  is_active_second 
)

Perform stream compaction for second child intervals.

Parameters
s_leftshared memory storage for left interval limits
s_rightshared memory storage for right interval limits
s_left_countshared memory storage for number of eigenvalues less than left interval limits
s_right_countshared memory storage for number of eigenvalues less than right interval limits
midmidpoint of current interval (left of new interval)
rightupper limit of interval
mid_counteigenvalues less than mid
right_counteigenvalues less than right
s_compaction_listlist containing the indices where the data has to be stored
num_threads_activenumber of active threads / intervals
is_active_secondmark is thread has a second non-empty child interval

Definition at line 440 of file bisect_util.hpp.

template<typename NumericT >
__device__ void viennacl::linalg::cuda::compactStreamsFinal ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  num_threads_active,
unsigned int &  offset_mult_lambda,
NumericT s_left,
NumericT s_right,
unsigned short *  s_left_count,
unsigned short *  s_right_count,
unsigned short *  s_cl_one,
unsigned short *  s_cl_mult,
unsigned short *  s_cl_blocking,
unsigned short *  s_cl_helper,
unsigned int  is_one_lambda,
unsigned int  is_one_lambda_2,
NumericT left,
NumericT right,
NumericT left_2,
NumericT right_2,
unsigned int &  left_count,
unsigned int &  right_count,
unsigned int &  left_count_2,
unsigned int &  right_count_2,
unsigned int  c_block_iend,
unsigned int  c_sum_block,
unsigned int  c_block_iend_2,
unsigned int  c_sum_block_2 
)

Perform final stream compaction before writing data to global memory.

Definition at line 134 of file bisect_kernel_large.hpp.

template<typename ComplexT , typename RealT >
__global__ void viennacl::linalg::cuda::complex_to_real ( const ComplexT *  in,
RealT *  out,
unsigned int  size 
)

Definition at line 809 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::complex_to_real ( viennacl::vector_base< NumericT > const &  in,
viennacl::vector_base< NumericT > &  out,
vcl_size_t  size 
)

Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)

Definition at line 819 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_compressed_matrix_vec_mul_kernel ( const unsigned int *  row_jumper,
const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
unsigned int  nonzero_rows,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT  alpha,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result,
NumericT  beta 
)

Definition at line 1016 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_d_mat_mul_kernel ( const unsigned int *  sp_mat_row_indices,
const unsigned int *  sp_mat_col_indices,
const NumericT sp_mat_elements,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 430 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_d_tr_mat_mul_kernel ( const unsigned int *  sp_mat_row_indices,
const unsigned int *  sp_mat_col_indices,
const NumericT sp_mat_elements,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 584 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_diagonal_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT result,
unsigned int  size 
)

Definition at line 746 of file sparse_matrix_operations.hpp.

template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_A2 ( IndexT *  A2_row_indices,
IndexT *  A2_col_indices,
NumericT A2_elements,
IndexT  A2_size1,
IndexT *  new_row_buffer 
)

Definition at line 484 of file spgemm.hpp.

template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_decompose_1 ( const IndexT *  A_row_indices,
IndexT  A_size1,
IndexT  max_per_row,
IndexT *  chunks_per_row 
)

Definition at line 469 of file spgemm.hpp.

template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_G1 ( IndexT *  G1_row_indices,
IndexT *  G1_col_indices,
NumericT G1_elements,
IndexT  G1_size1,
IndexT const *  A_row_indices,
IndexT const *  A_col_indices,
NumericT const *  A_elements,
IndexT  A_size1,
IndexT  A_nnz,
IndexT  max_per_row,
IndexT *  new_row_buffer 
)

Definition at line 511 of file spgemm.hpp.

template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_1 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
IndexT  A_size1,
const IndexT *  B_row_indices,
IndexT *  subwarpsize_per_group,
IndexT *  max_nnz_row_A_per_group,
IndexT *  max_nnz_row_B_per_group 
)

Definition at line 82 of file spgemm.hpp.

template<unsigned int SubWarpSizeV, typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_2 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
IndexT  A_size1,
const IndexT *  B_row_indices,
const IndexT *  B_col_indices,
IndexT  B_size2,
IndexT *  C_row_indices 
)

Definition at line 162 of file spgemm_rmerge.hpp.

template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_2 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
IndexT  A_size1,
const IndexT *  B_row_indices,
const IndexT *  B_col_indices,
IndexT  B_size2,
IndexT *  C_row_indices,
unsigned int *  subwarpsize_array,
unsigned int *  max_row_size_A,
unsigned int *  max_row_size_B,
unsigned int *  scratchpad_offsets,
unsigned int *  scratchpad_indices 
)

Definition at line 217 of file spgemm.hpp.

template<unsigned int SubWarpSizeV, typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_3 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
const NumericT A_elements,
IndexT  A_size1,
const IndexT *  B_row_indices,
const IndexT *  B_col_indices,
const NumericT B_elements,
IndexT  B_size2,
IndexT const *  C_row_indices,
IndexT *  C_col_indices,
NumericT C_elements 
)

Definition at line 251 of file spgemm_rmerge.hpp.

template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_3 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
const NumericT A_elements,
IndexT  A_size1,
const IndexT *  B_row_indices,
const IndexT *  B_col_indices,
const NumericT B_elements,
IndexT  B_size2,
IndexT const *  C_row_indices,
IndexT *  C_col_indices,
NumericT C_elements,
unsigned int *  subwarpsize_array,
unsigned int *  max_row_size_A,
unsigned int *  max_row_size_B,
unsigned int *  scratchpad_offsets,
unsigned int *  scratchpad_indices,
NumericT scratchpad_values 
)

Definition at line 365 of file spgemm.hpp.

template<typename AlphaBetaHandlerT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_adaptive_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const unsigned int *  row_blocks,
const NumericT elements,
unsigned int  num_blocks,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT  alpha,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result,
NumericT  beta 
)

Definition at line 182 of file sparse_matrix_operations.hpp.

template<unsigned int SubWarpSizeV, typename AlphaBetaHandlerT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT  alpha,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result,
NumericT  beta 
)

Definition at line 138 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__device__ NumericT viennacl::linalg::cuda::computeMidpoint ( const NumericT  left,
const NumericT  right 
)
inline

Compute midpoint of interval [left, right] avoiding overflow if possible

Parameters
leftleft / lower limit of interval
rightright / upper limit of interval

Definition at line 89 of file bisect_util.hpp.

template<typename NumericT >
__device__ unsigned int viennacl::linalg::cuda::computeNumSmallerEigenvals ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
const NumericT  x,
const unsigned int  tid,
const unsigned int  num_intervals_active,
NumericT s_d,
NumericT s_s,
unsigned int  converged 
)
inline

Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix

Parameters
g_ddiagonal elements stored in global memory
g_ssuperdiagonal elements stored in global memory
nsize of matrix
xvalue for which the number of eigenvalues that are smaller is seeked
tidthread identified (e.g. threadIdx.x or gtid)
num_intervals_activenumber of active intervals / threads that currently process an interval
s_dscratch space to store diagonal entries of the tridiagonal matrix in shared memory
s_sscratch space to store superdiagonal entries of the tridiagonal matrix in shared memory
convergedflag if the current thread is already converged (that is count does not have to be computed)

Definition at line 177 of file bisect_util.hpp.

template<typename NumericT >
__device__ unsigned int viennacl::linalg::cuda::computeNumSmallerEigenvalsLarge ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
const NumericT  x,
const unsigned int  tid,
const unsigned int  num_intervals_active,
NumericT s_d,
NumericT s_s,
unsigned int  converged 
)
inline

Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix

Parameters
g_ddiagonal elements stored in global memory
g_ssuperdiagonal elements stored in global memory
nsize of matrix
xvalue for which the number of eigenvalues that are smaller is seeked
tidthread identified (e.g. threadIdx.x or gtid)
num_intervals_activenumber of active intervals / threads that currently process an interval
s_dscratch space to store diagonal entries of the tridiagonal matrix in shared memory
s_sscratch space to store superdiagonal entries of the tridiagonal matrix in shared memory
convergedflag if the current thread is already converged (that is count does not have to be computed)

Definition at line 237 of file bisect_util.hpp.

template<typename DestNumericT , typename SrcNumericT >
void viennacl::linalg::cuda::convert ( matrix_base< DestNumericT > &  mat1,
matrix_base< SrcNumericT > const &  mat2 
)

Definition at line 57 of file matrix_operations.hpp.

template<typename DestNumericT , typename SrcNumericT >
void viennacl::linalg::cuda::convert ( vector_base< DestNumericT > &  dest,
vector_base< SrcNumericT > const &  src 
)

Definition at line 59 of file vector_operations.hpp.

template<typename DestNumericT , typename SrcNumericT >
__global__ void viennacl::linalg::cuda::convert_col_kernel ( DestNumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const SrcNumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 34 of file matrix_operations_col.hpp.

template<typename DestNumericT , typename SrcNumericT >
__global__ void viennacl::linalg::cuda::convert_kernel ( DestNumericT *  dest,
unsigned int  start_dest,
unsigned int  inc_dest,
unsigned int  size_dest,
SrcNumericT const *  src,
unsigned int  start_src,
unsigned int  inc_src 
)

Definition at line 48 of file vector_operations.hpp.

template<typename DestNumericT , typename SrcNumericT >
__global__ void viennacl::linalg::cuda::convert_row_kernel ( DestNumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const SrcNumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 34 of file matrix_operations_row.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_mat_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
const unsigned int *  group_boundaries,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 1352 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_tr_mat_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
const unsigned int *  group_boundaries,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 1547 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::coordinate_matrix_vec_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
const unsigned int *  group_boundaries,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT  alpha,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result,
NumericT  beta 
)

Definition at line 1239 of file sparse_matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_col_column_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size,
unsigned int  stride 
)

Definition at line 1498 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_col_row_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size,
unsigned int  stride 
)

Definition at line 1480 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_row_column_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size,
unsigned int  stride 
)

Definition at line 1535 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_row_row_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size,
unsigned int  stride 
)

Definition at line 1516 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::copy_vec ( matrix_base< NumericT > &  A,
vector_base< NumericT > &  V,
vcl_size_t  row_start,
vcl_size_t  col_start,
bool  copy_col 
)

This function copies a row or a column from a matrix to a vector.

Parameters
AThe matrix where to copy from.
VThe vector to fill with data.
row_startThe number of the first row to copy.
col_startThe number of the first column to copy.
copy_colSet to TRUE to copy a column, FALSE to copy a row.

Definition at line 2526 of file matrix_operations.hpp.

template<class T >
__device__ void viennacl::linalg::cuda::createIndicesCompaction ( T *  s_compaction_list_exc,
unsigned int  num_threads_compaction 
)

Create indices for compaction, that is process s_compaction_list_exc which is 1 for intervals that generated a second child and 0 otherwise and create for each of the non-zero elements the index where the new interval belongs to in a compact representation of all generated second childs

Parameters
s_compaction_list_exclist containing the flags which threads generated two childs
num_threads_compactionnumber of threads to employ for compaction

Definition at line 373 of file bisect_util.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_block_trans_lu_backward ( const unsigned int *  row_jumper_U,
const unsigned int *  column_indices_U,
const NumericT elements_U,
const NumericT diagonal_U,
const unsigned int *  block_offsets,
NumericT result,
unsigned int  size 
)

Definition at line 700 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_block_trans_unit_lu_forward ( const unsigned int *  row_jumper_L,
const unsigned int *  column_indices_L,
const NumericT elements_L,
const unsigned int *  block_offsets,
NumericT result,
unsigned int  size 
)

Definition at line 668 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 257 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 110 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
const NumericT diagonal_entries,
NumericT vector,
unsigned int  size 
)

Definition at line 597 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel2 ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
const NumericT diagonal_entries,
NumericT vector,
unsigned int  size 
)

Definition at line 563 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
const NumericT diagonal_entries,
NumericT vector,
unsigned int  size 
)

Definition at line 429 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel2 ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 342 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 497 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 367 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_unit_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 180 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_unit_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 42 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__device__ NumericT viennacl::linalg::cuda::cuda_abs ( NumericT  val)

Definition at line 2910 of file vector_operations.hpp.

__device__ unsigned long viennacl::linalg::cuda::cuda_abs ( unsigned long  val)
inline

Definition at line 2911 of file vector_operations.hpp.

__device__ unsigned int viennacl::linalg::cuda::cuda_abs ( unsigned int  val)
inline

Definition at line 2912 of file vector_operations.hpp.

__device__ unsigned short viennacl::linalg::cuda::cuda_abs ( unsigned short  val)
inline

Definition at line 2913 of file vector_operations.hpp.

__device__ unsigned char viennacl::linalg::cuda::cuda_abs ( unsigned char  val)
inline

Definition at line 2914 of file vector_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::direct ( viennacl::vector< NumericT, AlignmentV > const &  in,
viennacl::vector< NumericT, AlignmentV > &  out,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  batch_num,
NumericT  sign = NumericT(-1),
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Direct 1D algorithm for computing Fourier transformation.

Works on any sizes of data. Serial implementation has o(n^2) complexity

Definition at line 197 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::direct ( viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &  in,
viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &  out,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  batch_num,
NumericT  sign = NumericT(-1),
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Direct 2D algorithm for computing Fourier transformation.

Works on any sizes of data. Serial implementation has o(n^2) complexity

Definition at line 222 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::el_wise_mul_div ( NumericT matrix1,
NumericT const *  matrix2,
NumericT const *  matrix3,
unsigned int  size 
)

Main CUDA kernel for nonnegative matrix factorization of a dense matrices.

Definition at line 38 of file nmf_operations.hpp.

template<typename NumericT , typename SizeT , typename OpT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT, SizeT > &  A,
matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 548 of file matrix_operations.hpp.

template<typename SizeT , typename OpT >
void viennacl::linalg::cuda::element_op ( matrix_base< float, SizeT > &  A,
matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 608 of file matrix_operations.hpp.

template<typename SizeT , typename OpT >
void viennacl::linalg::cuda::element_op ( matrix_base< double, SizeT > &  A,
matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 668 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const &  proxy 
)

Definition at line 736 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const &  proxy 
)

Definition at line 778 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const &  proxy 
)

Definition at line 820 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const &  proxy 
)

Definition at line 862 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const &  proxy 
)

Definition at line 904 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const &  proxy 
)

Definition at line 946 of file matrix_operations.hpp.

template<typename NumericT , typename OpT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &  proxy 
)

Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax)

Parameters
vec1The result vector (or -range, or -slice)
proxyThe proxy object holding v2, v3 and the operation

Definition at line 957 of file vector_operations.hpp.

template<typename OpT >
void viennacl::linalg::cuda::element_op ( vector_base< float > &  vec1,
vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 985 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const &  proxy 
)

Definition at line 988 of file matrix_operations.hpp.

template<typename OpT >
void viennacl::linalg::cuda::element_op ( vector_base< double > &  vec1,
vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 1013 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const &  proxy 
)

Definition at line 1030 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &  proxy 
)

Definition at line 1056 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const &  proxy 
)

Definition at line 1072 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &  proxy 
)

Definition at line 1083 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &  proxy 
)

Definition at line 1109 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const &  proxy 
)

Definition at line 1114 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &  proxy 
)

Definition at line 1135 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const &  proxy 
)

Definition at line 1156 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &  proxy 
)

Definition at line 1161 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &  proxy 
)

Definition at line 1187 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const &  proxy 
)

Definition at line 1198 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &  proxy 
)

Definition at line 1213 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &  proxy 
)

Definition at line 1239 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const &  proxy 
)

Definition at line 1240 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &  proxy 
)

Definition at line 1264 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const &  proxy 
)

Definition at line 1282 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &  proxy 
)

Definition at line 1291 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &  proxy 
)

Definition at line 1317 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const &  proxy 
)

Definition at line 1324 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &  proxy 
)

Definition at line 1343 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const &  proxy 
)

Definition at line 1366 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &  proxy 
)

Definition at line 1369 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &  proxy 
)

Definition at line 1395 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const &  proxy 
)

Definition at line 1408 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &  proxy 
)

Definition at line 1421 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &  proxy 
)

Definition at line 1447 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &  proxy 
)

Definition at line 1473 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 776 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_int_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 825 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_int_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT const *  vec3,
unsigned int  start3,
unsigned int  inc3,
unsigned int  op_type 
)

Definition at line 915 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_int_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 856 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT const *  vec3,
unsigned int  start3,
unsigned int  inc3,
unsigned int  op_type 
)

Definition at line 869 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_row_kernel (