doc/matrix-free_8cpp_source.html

 /* =========================================================================

    Copyright (c) 2010-2016, Institute for Microelectronics,

                             Institute for Analysis and Scientific Computing,

                             TU Wien.

    Portions of this software are copyright by UChicago Argonne, LLC.


                             -----------------

                   ViennaCL - The Vienna Computing Library

                             -----------------


    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at


    (A list of authors and contributors can be found in the PDF manual)


    License:         MIT (X11), see file LICENSE in the base directory

 ============================================================================= */


 //

 // include necessary system headers

 //

 #include <iostream>


 //

 // ViennaCL includes

 //

 #include "viennacl/scalar.hpp"

 #include "viennacl/vector.hpp"

 #include "viennacl/linalg/prod.hpp"

 #include "viennacl/linalg/cg.hpp"

 #include "viennacl/linalg/bicgstab.hpp"

 #include "viennacl/linalg/gmres.hpp"


 template<typename NumericT>

 class MyOperator

 {

 public:

   MyOperator(std::size_t N) : N_(N) {}


   // Dispatcher for y = Ax

   void apply(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const

   {

 #if defined(VIENNACL_WITH_CUDA)

     if (viennacl::traits::active_handle_id(x) == viennacl::CUDA_MEMORY)

       apply_cuda(x, y);

 #endif


 #if defined(VIENNACL_WITH_OPENCL)

     if (viennacl::traits::active_handle_id(x) == viennacl::OPENCL_MEMORY)

       apply_opencl(x, y);

 #endif


     if (viennacl::traits::active_handle_id(x) == viennacl::MAIN_MEMORY)

       apply_host(x, y);

   }


   std::size_t size1() const { return N_ * N_; }


 private:


 #if defined(VIENNACL_WITH_CUDA)

   void apply_cuda(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const;

 #endif


 #if defined(VIENNACL_WITH_OPENCL)

   void apply_opencl(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const;

 #endif


   void apply_host(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const;


   std::size_t N_;

 };


 int main()

 {

   typedef float       ScalarType;  // feel free to change to double (and change OpenCL kernel argument types accordingly)


   std::size_t N = 10;

   viennacl::vector<ScalarType> rhs = viennacl::scalar_vector<ScalarType>(N*N, ScalarType(-1));

   MyOperator<ScalarType> op(N);


   viennacl::vector<ScalarType> result = viennacl::linalg::solve(op, rhs, viennacl::linalg::cg_tag());


   std::cout.precision(3);

   std::cout << std::fixed;

   std::cout << "Result value map: " << std::endl;

   std::cout << std::endl << "^ y " << std::endl;

   for (std::size_t i=0; i<N; ++i)

   {

     std::cout << "|  ";

     for (std::size_t j=0; j<N; ++j)

       std::cout << result[i * N + j] << "  ";

     std::cout << std::endl;

   }

   std::cout << "*---------------------------------------------> x" << std::endl;


   std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;


   return EXIT_SUCCESS;

 }


 template<typename NumericT>

 void MyOperator<NumericT>::apply_host(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const

 {

   NumericT const * values_x = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(x.handle());

   NumericT       * values_y = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(y.handle());


   NumericT dx = NumericT(1) / NumericT(N_ + 1);

   NumericT dy = NumericT(1) / NumericT(N_ + 1);


   // feel free to use

   //  #pragma omp parallel for

   // here

   for (std::size_t i=0; i<N_; ++i)

     for (std::size_t j=0; j<N_; ++j)

     {

       NumericT value_right  = (j < N_ - 1) ? values_x[ i   *N_ + j + 1] : 0;

       NumericT value_left   = (j > 0     ) ? values_x[ i   *N_ + j - 1] : 0;

       NumericT value_top    = (i < N_ - 1) ? values_x[(i+1)*N_ + j    ] : 0;

       NumericT value_bottom = (i > 0     ) ? values_x[(i-1)*N_ + j    ] : 0;

       NumericT value_center = values_x[i*N_ + j];


       values_y[i*N_ + j] =   ((value_right - value_center) / dx - (value_center - value_left)   / dx) / dx

                            + ((value_top   - value_center) / dy - (value_center - value_bottom) / dy) / dy;

     }

 }


 #if defined(VIENNACL_WITH_CUDA)

 template<typename NumericT>

 __global__ void apply_cuda_kernel(NumericT const * values_x,

                                   NumericT       * values_y,

                                   std::size_t N)

 {

   NumericT dx = NumericT(1) / (N + 1);

   NumericT dy = NumericT(1) / (N + 1);


   for (std::size_t i = blockIdx.x; i < N; i += gridDim.x)

     for (std::size_t j = threadIdx.x; j < N; j += blockDim.x)

     {

       NumericT value_right  = (j < N - 1) ? values_x[ i   *N + j + 1] : 0;

       NumericT value_left   = (j > 0    ) ? values_x[ i   *N + j - 1] : 0;

       NumericT value_top    = (i < N - 1) ? values_x[(i+1)*N + j    ] : 0;

       NumericT value_bottom = (i > 0    ) ? values_x[(i-1)*N + j    ] : 0;

       NumericT value_center = values_x[i*N + j];


       values_y[i*N + j] =   ((value_right - value_center) / dx - (value_center - value_left)   / dx) / dx

                           + ((value_top   - value_center) / dy - (value_center - value_bottom) / dy) / dy;

     }

 }

 #endif


 #if defined(VIENNACL_WITH_CUDA)

 template<typename NumericT>

 void MyOperator<NumericT>::apply_cuda(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const

 {

   apply_cuda_kernel<<<128, 128>>>(viennacl::cuda_arg(x), viennacl::cuda_arg(y), N_);

 }

 #endif


 #if defined(VIENNACL_WITH_OPENCL)

 static const char * my_compute_program =

 "typedef float NumericT; \n"

 "__kernel void apply_opencl_kernel(__global NumericT const * values_x, \n"

 "                                  __global NumericT       * values_y, \n"

 "                                  unsigned int N) {\n"


 "      NumericT dx = (NumericT)1 / (N + 1); \n"

 "      NumericT dy = (NumericT)1 / (N + 1); \n"


 "      for (unsigned int i = get_group_id(0); i < N; i += get_num_groups(0)) \n"

 "        for (unsigned int j = get_local_id(0); j < N; j += get_local_size(0)) { \n"


 "          NumericT value_right  = (j < N - 1) ? values_x[ i   *N + j + 1] : 0; \n"

 "          NumericT value_left   = (j > 0    ) ? values_x[ i   *N + j - 1] : 0; \n"

 "          NumericT value_top    = (i < N - 1) ? values_x[(i+1)*N + j    ] : 0; \n"

 "          NumericT value_bottom = (i > 0    ) ? values_x[(i-1)*N + j    ] : 0; \n"

 "          NumericT value_center = values_x[i*N + j]; \n"


 "          values_y[i*N + j] =   ((value_right - value_center) / dx - (value_center - value_left)   / dx) / dx  \n"

 "                              + ((value_top   - value_center) / dy - (value_center - value_bottom) / dy) / dy; \n"

 "        }  \n"

 "    } \n";

 #endif


 #if defined(VIENNACL_WITH_OPENCL)

 template<typename NumericT>

 void MyOperator<NumericT>::apply_opencl(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const

   {

     viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(x).context());

     static bool first_run = true;

     if (first_run) {

       ctx.add_program(my_compute_program, "my_compute_program");

       first_run = false;

     }

     viennacl::ocl::kernel & my_kernel = ctx.get_kernel("my_compute_program", "apply_opencl_kernel");


     viennacl::ocl::enqueue(my_kernel(x, y, static_cast<cl_uint>(N_)));

   }

 #endif


prod.hpp
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...

viennacl::ocl::kernel
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58

viennacl::traits::size1
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163

main
int main()
Definition: bisect.cpp:91

bicgstab.hpp
The stabilized bi-conjugate gradient method is implemented here.

viennacl::ocl::context
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55

viennacl::OPENCL_MEMORY
Definition: forwards.h:349

viennacl::linalg::solve
VectorT solve(MatrixT const &matrix, VectorT const &rhs, bicgstab_tag const &tag, PreconditionerT const &precond)
Definition: bicgstab.hpp:496

NumericT
float NumericT
Definition: bisect.cpp:40

viennacl::ocl::context::add_program
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
Definition: context.hpp:368

gmres.hpp
Implementations of the generalized minimum residual method are in this file.

viennacl::CUDA_MEMORY
Definition: forwards.h:350

viennacl::ocl::context::get_kernel
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605

viennacl::vector_base< NumericT >

cg.hpp
The conjugate gradient method is implemented here.

viennacl::vector< ScalarType >

viennacl::traits::active_handle_id
viennacl::memory_types active_handle_id(T const &obj)
Returns an ID for the currently active memory domain of an object.
Definition: handle.hpp:218

viennacl::MAIN_MEMORY
Definition: forwards.h:348

viennacl::ocl::enqueue
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50

vector.hpp
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...

viennacl::scalar_vector
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87

ScalarType
float ScalarType
Definition: fft_1d.cpp:42

viennacl::linalg::cg_tag
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Definition: cg.hpp:48

viennacl::cuda_arg
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version.
Definition: common.hpp:39

scalar.hpp
Implementation of the ViennaCL scalar class.

viennacl::vector_base::handle
const handle_type & handle() const
Returns the memory handle.
Definition: vector_def.hpp:128