ViennaCL - The Vienna Computing Library  1.7.1
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
spgemm.hpp File Reference
#include <stdexcept>
#include <thrust/scan.h>
#include <thrust/device_ptr.h>
#include "viennacl/forwards.h"
#include "viennacl/scalar.hpp"
#include "viennacl/vector.hpp"
#include "viennacl/tools/tools.hpp"
#include "viennacl/linalg/cuda/common.hpp"
#include "viennacl/tools/timer.hpp"
#include "viennacl/linalg/cuda/sparse_matrix_operations_solve.hpp"

Go to the source code of this file.

Namespaces

 viennacl
 Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
 
 viennacl::linalg
 Provides all linear algebra operations which are not covered by operator overloads.
 
 viennacl::linalg::cuda
 Holds all CUDA compute kernels used by ViennaCL.
 

Functions

template<typename IndexT >
__device__ IndexT viennacl::linalg::cuda::round_to_next_power_of_2 (IndexT val)
 
template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, IndexT *subwarpsize_per_group, IndexT *max_nnz_row_A_per_group, IndexT *max_nnz_row_B_per_group)
 
__device__ unsigned int viennacl::linalg::cuda::merge_subwarp_symbolic (unsigned int row_B_start, unsigned int row_B_end, unsigned int const *B_col_indices, unsigned int B_size2, unsigned int subwarpsize)
 
__device__ unsigned int viennacl::linalg::cuda::merge_subwarp_symbolic_double (unsigned int row_B_start, unsigned int row_B_end, unsigned int const *B_col_indices, unsigned int B_size2, unsigned int *output_array, unsigned int id_in_warp, unsigned int subwarpsize)
 
template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_2 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, IndexT B_size2, IndexT *C_row_indices, unsigned int *subwarpsize_array, unsigned int *max_row_size_A, unsigned int *max_row_size_B, unsigned int *scratchpad_offsets, unsigned int *scratchpad_indices)
 
template<typename NumericT >
__device__ unsigned int viennacl::linalg::cuda::merge_subwarp_numeric (NumericT scaling_factor, unsigned int input_start, unsigned int input_end, const unsigned int *input_indices, const NumericT *input_values, unsigned int invalid_token, unsigned int *output_indices, NumericT *output_values, unsigned int id_in_warp, unsigned int subwarpsize)
 
template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_3 (const IndexT *A_row_indices, const IndexT *A_col_indices, const NumericT *A_elements, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, const NumericT *B_elements, IndexT B_size2, IndexT const *C_row_indices, IndexT *C_col_indices, NumericT *C_elements, unsigned int *subwarpsize_array, unsigned int *max_row_size_A, unsigned int *max_row_size_B, unsigned int *scratchpad_offsets, unsigned int *scratchpad_indices, NumericT *scratchpad_values)
 
template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_decompose_1 (const IndexT *A_row_indices, IndexT A_size1, IndexT max_per_row, IndexT *chunks_per_row)
 
template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_A2 (IndexT *A2_row_indices, IndexT *A2_col_indices, NumericT *A2_elements, IndexT A2_size1, IndexT *new_row_buffer)
 
template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_G1 (IndexT *G1_row_indices, IndexT *G1_col_indices, NumericT *G1_elements, IndexT G1_size1, IndexT const *A_row_indices, IndexT const *A_col_indices, NumericT const *A_elements, IndexT A_size1, IndexT A_nnz, IndexT max_per_row, IndexT *new_row_buffer)
 
template<class NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl (viennacl::compressed_matrix< NumericT, AlignmentV > const &A, viennacl::compressed_matrix< NumericT, AlignmentV > const &B, viennacl::compressed_matrix< NumericT, AlignmentV > &C)
 Carries out sparse_matrix-sparse_matrix multiplication for CSR matrices. More...