doc/cuda_2bisect__kernel__calls_8hpp_source.html

 #ifndef VIENNACL_LINALG_CUDA_BISECT_KERNEL_CALLS_HPP_

 #define VIENNACL_LINALG_CUDA_BISECT_KERNEL_CALLS_HPP_


 /* =========================================================================

    Copyright (c) 2010-2016, Institute for Microelectronics,

                             Institute for Analysis and Scientific Computing,

                             TU Wien.

    Portions of this software are copyright by UChicago Argonne, LLC.

                             -----------------

                   ViennaCL - The Vienna Computing Library

                             -----------------


    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at


    (A list of authors and contributors can be found in the manual)


    License:         MIT (X11), see file LICENSE in the base directory

 ============================================================================= */


 #include "viennacl/linalg/detail/bisect/structs.hpp"


 // includes, kernels

 #include "viennacl/linalg/cuda/bisect_kernel_small.hpp"

 #include "viennacl/linalg/cuda/bisect_kernel_large.hpp"

 #include "viennacl/linalg/cuda/bisect_kernel_large_onei.hpp"

 #include "viennacl/linalg/cuda/bisect_kernel_large_multi.hpp"


 namespace viennacl

 {

 namespace linalg

 {

 namespace cuda

 {

 template<typename NumericT>

 void bisectSmall(const viennacl::linalg::detail::InputData<NumericT> &input, viennacl::linalg::detail::ResultDataSmall<NumericT> &result,

                        const unsigned int mat_size,

                        const NumericT lg, const NumericT ug,

                        const NumericT precision)

 {


   dim3  blocks(1, 1, 1);

   dim3  threads(VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX, 1, 1);


   bisectKernelSmall<<< blocks, threads >>>(

     viennacl::cuda_arg(input.g_a),

     viennacl::cuda_arg(input.g_b) + 1,

     mat_size,

     viennacl::cuda_arg(result.vcl_g_left),

     viennacl::cuda_arg(result.vcl_g_right),

     viennacl::cuda_arg(result.vcl_g_left_count),

     viennacl::cuda_arg(result.vcl_g_right_count),

     lg, ug, 0, mat_size,

     precision

     );

   viennacl::linalg::cuda::VIENNACL_CUDA_LAST_ERROR_CHECK("Kernel launch failed");

 }


 template<typename NumericT>

 void bisectLarge(const viennacl::linalg::detail::InputData<NumericT> &input, viennacl::linalg::detail::ResultDataLarge<NumericT> &result,

                    const unsigned int mat_size,

                    const NumericT lg, const NumericT ug,

                    const NumericT precision)

  {


   dim3  blocks(1, 1, 1);

   dim3  threads(mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2 , 1, 1);

   bisectKernelLarge<<< blocks, threads >>>

     (viennacl::cuda_arg(input.g_a),

      viennacl::cuda_arg(input.g_b) + 1,

      mat_size,

      lg, ug, static_cast<unsigned int>(0), mat_size, precision,

      viennacl::cuda_arg(result.g_num_one),

      viennacl::cuda_arg(result.g_num_blocks_mult),

      viennacl::cuda_arg(result.g_left_one),

      viennacl::cuda_arg(result.g_right_one),

      viennacl::cuda_arg(result.g_pos_one),

      viennacl::cuda_arg(result.g_left_mult),

      viennacl::cuda_arg(result.g_right_mult),

      viennacl::cuda_arg(result.g_left_count_mult),

      viennacl::cuda_arg(result.g_right_count_mult),

      viennacl::cuda_arg(result.g_blocks_mult),

      viennacl::cuda_arg(result.g_blocks_mult_sum)

      );

   viennacl::linalg::cuda::VIENNACL_CUDA_LAST_ERROR_CHECK("Kernel launch failed.");

 }


 // compute eigenvalues for intervals that contained only one eigenvalue

 // after the first processing step

 template<typename NumericT>

 void bisectLarge_OneIntervals(const viennacl::linalg::detail::InputData<NumericT> &input, viennacl::linalg::detail::ResultDataLarge<NumericT> &result,

                    const unsigned int mat_size,

                    const NumericT precision)

  {


   unsigned int num_one_intervals = result.g_num_one;

   unsigned int num_blocks = viennacl::linalg::detail::getNumBlocksLinear(num_one_intervals,

                                                                          mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2);

   dim3 grid_onei;

   grid_onei.x = num_blocks;

   grid_onei.y = 1, grid_onei.z = 1;

   dim3 threads_onei(mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2, 1, 1);


   bisectKernelLarge_OneIntervals<<< grid_onei , threads_onei >>>

     (viennacl::cuda_arg(input.g_a),

      viennacl::cuda_arg(input.g_b) + 1,

      mat_size, num_one_intervals,

      viennacl::cuda_arg(result.g_left_one),

      viennacl::cuda_arg(result.g_right_one),

      viennacl::cuda_arg(result.g_pos_one),

      precision

      );

   viennacl::linalg::cuda::VIENNACL_CUDA_LAST_ERROR_CHECK("bisectKernelLarge_OneIntervals() FAILED.");

 }


 // process intervals that contained more than one eigenvalue after

 // the first processing step

 template<typename NumericT>

 void bisectLarge_MultIntervals(const viennacl::linalg::detail::InputData<NumericT> &input, viennacl::linalg::detail::ResultDataLarge<NumericT> &result,

                    const unsigned int mat_size,

                    const NumericT precision)

  {

     // get the number of blocks of intervals that contain, in total when

     // each interval contains only one eigenvalue, not more than

     // MAX_THREADS_BLOCK threads

     unsigned int  num_blocks_mult = result.g_num_blocks_mult;


     // setup the execution environment

     dim3  grid_mult(num_blocks_mult, 1, 1);

     dim3  threads_mult(mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2, 1, 1);


     bisectKernelLarge_MultIntervals<<< grid_mult, threads_mult >>>

       (viennacl::cuda_arg(input.g_a),

        viennacl::cuda_arg(input.g_b) + 1,

        mat_size,

        viennacl::cuda_arg(result.g_blocks_mult),

        viennacl::cuda_arg(result.g_blocks_mult_sum),

        viennacl::cuda_arg(result.g_left_mult),

        viennacl::cuda_arg(result.g_right_mult),

        viennacl::cuda_arg(result.g_left_count_mult),

        viennacl::cuda_arg(result.g_right_count_mult),

        viennacl::cuda_arg(result.g_lambda_mult),

        viennacl::cuda_arg(result.g_pos_mult),

        precision

       );

     viennacl::linalg::cuda::VIENNACL_CUDA_LAST_ERROR_CHECK("bisectKernelLarge_MultIntervals() FAILED.");

 }

 }

 }

 }


 #endif

viennacl::linalg::detail::InputData::g_b
viennacl::vector< NumericT > g_b
device side representation of superdiagonal
Definition: structs.hpp:62

viennacl::linalg::cuda::bisectLarge
void bisectLarge(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
Definition: bisect_kernel_calls.hpp:71

viennacl::linalg::detail::ResultDataLarge::g_left_mult
viennacl::vector< NumericT > g_left_mult
left interval limits of intervals containing multiple eigenvalues after the first iteration step ...
Definition: structs.hpp:146

VIENNACL_BISECT_MAX_THREADS_BLOCK
#define VIENNACL_BISECT_MAX_THREADS_BLOCK
Definition: config.hpp:32

structs.hpp
Helper structures to simplify variable handling.

bisect_kernel_small.hpp
Determine eigenvalues for small symmetric, tridiagonal matrix.

viennacl::linalg::detail::ResultDataLarge::g_left_one
viennacl::vector< NumericT > g_left_one
left interval limits of intervals containing one eigenvalue after the first iteration step ...
Definition: structs.hpp:137

viennacl::linalg::detail::ResultDataLarge::g_num_blocks_mult
viennacl::scalar< unsigned int > g_num_blocks_mult
number of (thread) blocks of intervals containing multiple eigenvalues after the first steo ...
Definition: structs.hpp:134

viennacl::linalg::detail::ResultDataLarge::g_blocks_mult
viennacl::vector< unsigned int > g_blocks_mult
start addresses in g_left_mult etc. of blocks of intervals containing more than one eigenvalue after ...
Definition: structs.hpp:156

viennacl::linalg::detail::ResultDataLarge::g_left_count_mult
viennacl::vector< unsigned int > g_left_count_mult
number of eigenvalues less than the left limit of the eigenvalue intervals containing multiple eigenv...
Definition: structs.hpp:151

NumericT
float NumericT
Definition: bisect.cpp:40

viennacl::linalg::detail::ResultDataLarge::g_right_count_mult
viennacl::vector< unsigned int > g_right_count_mult
number of eigenvalues less than the right limit of the eigenvalue intervals containing multiple eigen...
Definition: structs.hpp:154

viennacl::linalg::detail::InputData
In this class the input matrix is stored.
Definition: structs.hpp:53

viennacl::linalg::detail::getNumBlocksLinear
unsigned int getNumBlocksLinear(const unsigned int num_threads, const unsigned int num_threads_block)
Definition: util.hpp:96

viennacl::linalg::detail::ResultDataSmall::vcl_g_right
viennacl::vector< NumericT > vcl_g_right
right interval limits at the end of the computation
Definition: structs.hpp:103

viennacl::linalg::detail::ResultDataLarge::g_lambda_mult
viennacl::vector< NumericT > g_lambda_mult
eigenvalues that have been generated in the second step from intervals that still contained multiple ...
Definition: structs.hpp:162

viennacl::linalg::detail::ResultDataSmall::vcl_g_left
viennacl::vector< NumericT > vcl_g_left
left interval limits at the end of the computation
Definition: structs.hpp:101

viennacl::linalg::detail::ResultDataSmall::vcl_g_left_count
viennacl::vector< unsigned int > vcl_g_left_count
number of eigenvalues smaller than the left interval limit
Definition: structs.hpp:105

viennacl::linalg::detail::ResultDataLarge::g_pos_one
viennacl::vector< unsigned int > g_pos_one
interval indices (position in sorted listed of eigenvalues) of intervals containing one eigenvalue af...
Definition: structs.hpp:143

VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX
#define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX
Definition: config.hpp:38

bisect_kernel_large.hpp
First step of the bisection algorithm for the computation of eigenvalues.

bisect_kernel_large_onei.hpp
Determine eigenvalues for large matrices for intervals that contained after the first step one eigenv...

viennacl::linalg::detail::ResultDataLarge::g_right_one
viennacl::vector< NumericT > g_right_one
right interval limits of intervals containing one eigenvalue after the first iteration step ...
Definition: structs.hpp:140

viennacl::linalg::detail::ResultDataLarge::g_pos_mult
viennacl::vector< unsigned int > g_pos_mult
eigenvalue index of intervals that have been generated in the second processing step ...
Definition: structs.hpp:165

viennacl::linalg::cuda::bisectLarge_OneIntervals
void bisectLarge_OneIntervals(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
Definition: bisect_kernel_calls.hpp:103

viennacl::linalg::detail::ResultDataSmall::vcl_g_right_count
viennacl::vector< unsigned int > vcl_g_right_count
number of eigenvalues bigger than the right interval limit
Definition: structs.hpp:107

viennacl::linalg::detail::ResultDataLarge
In this class the data of the result for large matrices is stored.
Definition: structs.hpp:125

viennacl::linalg::cuda::bisectSmall
void bisectSmall(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataSmall< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
Definition: bisect_kernel_calls.hpp:45

viennacl::linalg::detail::ResultDataLarge::g_blocks_mult_sum
viennacl::vector< unsigned int > g_blocks_mult_sum
accumulated number of intervals in g_left_mult etc. of blocks of intervals containing more than one e...
Definition: structs.hpp:159

viennacl::linalg::detail::ResultDataLarge::g_num_one
viennacl::scalar< unsigned int > g_num_one
number of intervals containing one eigenvalue after the first step
Definition: structs.hpp:131

VIENNACL_CUDA_LAST_ERROR_CHECK
#define VIENNACL_CUDA_LAST_ERROR_CHECK(message)
Definition: common.hpp:30

viennacl::cuda_arg
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version.
Definition: common.hpp:39

viennacl::linalg::cuda::bisectLarge_MultIntervals
void bisectLarge_MultIntervals(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
Definition: bisect_kernel_calls.hpp:133

viennacl::linalg::detail::ResultDataSmall
In this class the data of the result for small matrices is stored.
Definition: structs.hpp:96

bisect_kernel_large_multi.hpp
Second step of the bisection algorithm for the computation of eigenvalues for large matrices...

viennacl::linalg::detail::InputData::g_a
viennacl::vector< NumericT > g_a
device side representation of diagonal
Definition: structs.hpp:60

viennacl::linalg::detail::ResultDataLarge::g_right_mult
viennacl::vector< NumericT > g_right_mult
right interval limits of intervals containing multiple eigenvalues after the first iteration step ...
Definition: structs.hpp:148