|
template<typename IndexT > |
__device__ IndexT | viennacl::linalg::cuda::round_to_next_power_of_2 (IndexT val) |
|
template<typename IndexT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_stage_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, IndexT *subwarpsize_per_group, IndexT *max_nnz_row_A_per_group, IndexT *max_nnz_row_B_per_group) |
|
__device__ unsigned int | viennacl::linalg::cuda::merge_subwarp_symbolic (unsigned int row_B_start, unsigned int row_B_end, unsigned int const *B_col_indices, unsigned int B_size2, unsigned int subwarpsize) |
|
__device__ unsigned int | viennacl::linalg::cuda::merge_subwarp_symbolic_double (unsigned int row_B_start, unsigned int row_B_end, unsigned int const *B_col_indices, unsigned int B_size2, unsigned int *output_array, unsigned int id_in_warp, unsigned int subwarpsize) |
|
template<typename IndexT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_stage_2 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, IndexT B_size2, IndexT *C_row_indices, unsigned int *subwarpsize_array, unsigned int *max_row_size_A, unsigned int *max_row_size_B, unsigned int *scratchpad_offsets, unsigned int *scratchpad_indices) |
|
template<typename NumericT > |
__device__ unsigned int | viennacl::linalg::cuda::merge_subwarp_numeric (NumericT scaling_factor, unsigned int input_start, unsigned int input_end, const unsigned int *input_indices, const NumericT *input_values, unsigned int invalid_token, unsigned int *output_indices, NumericT *output_values, unsigned int id_in_warp, unsigned int subwarpsize) |
|
template<typename IndexT , typename NumericT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_stage_3 (const IndexT *A_row_indices, const IndexT *A_col_indices, const NumericT *A_elements, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, const NumericT *B_elements, IndexT B_size2, IndexT const *C_row_indices, IndexT *C_col_indices, NumericT *C_elements, unsigned int *subwarpsize_array, unsigned int *max_row_size_A, unsigned int *max_row_size_B, unsigned int *scratchpad_offsets, unsigned int *scratchpad_indices, NumericT *scratchpad_values) |
|
template<typename IndexT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_decompose_1 (const IndexT *A_row_indices, IndexT A_size1, IndexT max_per_row, IndexT *chunks_per_row) |
|
template<typename IndexT , typename NumericT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_A2 (IndexT *A2_row_indices, IndexT *A2_col_indices, NumericT *A2_elements, IndexT A2_size1, IndexT *new_row_buffer) |
|
template<typename IndexT , typename NumericT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_G1 (IndexT *G1_row_indices, IndexT *G1_col_indices, NumericT *G1_elements, IndexT G1_size1, IndexT const *A_row_indices, IndexT const *A_col_indices, NumericT const *A_elements, IndexT A_size1, IndexT A_nnz, IndexT max_per_row, IndexT *new_row_buffer) |
|
template<class NumericT , unsigned int AlignmentV> |
void | viennacl::linalg::cuda::prod_impl (viennacl::compressed_matrix< NumericT, AlignmentV > const &A, viennacl::compressed_matrix< NumericT, AlignmentV > const &B, viennacl::compressed_matrix< NumericT, AlignmentV > &C) |
| Carries out sparse_matrix-sparse_matrix multiplication for CSR matrices. More...
|
|