|
template<typename IndexT > |
__device__ IndexT | viennacl::linalg::cuda::round_to_next_power_of_2 (IndexT val) |
|
template<typename IndexT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_stage_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, IndexT *subwarpsize_per_group, IndexT *max_nnz_row_A_per_group, IndexT *max_nnz_row_B_per_group) |
|
template<unsigned int SubWarpSizeV, typename IndexT > |
__device__ IndexT | viennacl::linalg::cuda::subwarp_minimum_shuffle (IndexT min_index) |
|
template<unsigned int SubWarpSizeV, typename IndexT > |
__device__ IndexT | viennacl::linalg::cuda::subwarp_minimum_shared (IndexT min_index, IndexT id_in_warp, IndexT *shared_buffer) |
|
template<unsigned int SubWarpSizeV, typename IndexT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_stage_2 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, IndexT B_size2, IndexT *C_row_indices) |
|
template<unsigned int SubWarpSizeV, typename NumericT > |
__device__ NumericT | viennacl::linalg::cuda::subwarp_accumulate_shuffle (NumericT output_value) |
|
template<unsigned int SubWarpSizeV, typename NumericT > |
__device__ NumericT | viennacl::linalg::cuda::subwarp_accumulate_shared (NumericT output_value, unsigned int id_in_warp, NumericT *shared_buffer) |
|
template<unsigned int SubWarpSizeV, typename IndexT , typename NumericT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_stage_3 (const IndexT *A_row_indices, const IndexT *A_col_indices, const NumericT *A_elements, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, const NumericT *B_elements, IndexT B_size2, IndexT const *C_row_indices, IndexT *C_col_indices, NumericT *C_elements) |
|
template<typename IndexT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_decompose_1 (const IndexT *A_row_indices, IndexT A_size1, IndexT max_per_row, IndexT *chunks_per_row) |
|
template<typename IndexT , typename NumericT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_A2 (IndexT *A2_row_indices, IndexT *A2_col_indices, NumericT *A2_elements, IndexT A2_size1, IndexT *new_row_buffer) |
|
template<typename IndexT , typename NumericT > |
__global__ void | viennacl::linalg::cuda::compressed_matrix_gemm_G1 (IndexT *G1_row_indices, IndexT *G1_col_indices, NumericT *G1_elements, IndexT G1_size1, IndexT const *A_row_indices, IndexT const *A_col_indices, NumericT const *A_elements, IndexT A_size1, IndexT A_nnz, IndexT max_per_row, IndexT *new_row_buffer) |
|
template<class NumericT , unsigned int AlignmentV> |
void | viennacl::linalg::cuda::prod_impl (viennacl::compressed_matrix< NumericT, AlignmentV > const &A, viennacl::compressed_matrix< NumericT, AlignmentV > const &B, viennacl::compressed_matrix< NumericT, AlignmentV > &C) |
| Carries out sparse_matrix-sparse_matrix multiplication for CSR matrices. More...
|
|