1 #ifndef VIENNACL_LINALG_CUDA_MISC_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_CUDA_MISC_OPERATIONS_HPP_
41 template<
typename NumericT>
43 const unsigned int * row_index_array,
44 const unsigned int * row_indices,
45 const unsigned int * column_indices,
50 for (
unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
52 row += gridDim.x * blockDim.x)
54 unsigned int eq_row = row_index_array[
row];
56 unsigned int row_end = row_indices[
row+1];
58 for (
unsigned int j = row_indices[
row]; j < row_end; ++j)
59 vec_entry -= vec[column_indices[j]] * elements[j];
61 vec[eq_row] = vec_entry;
67 template<
typename NumericT>
76 level_scheduling_substitute_kernel<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(row_index_array),
77 viennacl::cuda_arg<unsigned int>(row_buffer),
78 viennacl::cuda_arg<unsigned int>(col_buffer),
79 viennacl::cuda_arg<NumericT>(element_buffer),
81 static_cast<unsigned int>(num_rows)
This file provides the forward declarations for the main types used within ViennaCL.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Common routines for CUDA execution.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void level_scheduling_substitute(vector< NumericT > &vec, viennacl::backend::mem_handle const &row_index_array, viennacl::backend::mem_handle const &row_buffer, viennacl::backend::mem_handle const &col_buffer, viennacl::backend::mem_handle const &element_buffer, vcl_size_t num_rows)
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
__global__ void level_scheduling_substitute_kernel(const unsigned int *row_index_array, const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vec, unsigned int size)
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version.
Implementation of the ViennaCL scalar class.