doc/cuda_8hpp_source.html

 #ifndef VIENNACL_BACKEND_CUDA_HPP_

 #define VIENNACL_BACKEND_CUDA_HPP_


 /* =========================================================================

    Copyright (c) 2010-2016, Institute for Microelectronics,

                             Institute for Analysis and Scientific Computing,

                             TU Wien.

    Portions of this software are copyright by UChicago Argonne, LLC.


                             -----------------

                   ViennaCL - The Vienna Computing Library

                             -----------------


    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at


    (A list of authors and contributors can be found in the manual)


    License:         MIT (X11), see file LICENSE in the base directory

 ============================================================================= */


 #include <iostream>

 #include <vector>

 #include <cassert>

 #include <stdexcept>

 #include <sstream>


 #include "viennacl/forwards.h"

 #include "viennacl/tools/shared_ptr.hpp"


 // includes CUDA

 #include <cuda_runtime.h>


 #define VIENNACL_CUDA_ERROR_CHECK(err)  detail::cuda_error_check (err, __FILE__, __LINE__)


 namespace viennacl

 {

 namespace backend

 {

 namespace cuda

 {


 typedef viennacl::tools::shared_ptr<char>  handle_type;

 // Requirements for backend:


 // * memory_create(size, host_ptr)

 // * memory_copy(src, dest, offset_src, offset_dest, size)

 // * memory_write_from_main_memory(src, offset, size,

 //                                 dest, offset, size)

 // * memory_read_to_main_memory(src, offset, size

 //                              dest, offset, size)

 // *

 //


 class cuda_exception : public std::runtime_error

 {

 public:

   cuda_exception(std::string const & what_arg, cudaError_t err_code) : std::runtime_error(what_arg), error_code_(err_code) {}


   cudaError_t error_code() const { return error_code_; }


 private:

   cudaError_t error_code_;

 };


 namespace detail

 {


   inline void cuda_error_check(cudaError error_code, const char *file, const int line )

   {

     if (cudaSuccess != error_code)

     {

       std::stringstream ss;

       ss << file << "(" << line << "): " << ": CUDA Runtime API error " << error_code << ": " << cudaGetErrorString( error_code ) << std::endl;

       throw viennacl::backend::cuda::cuda_exception(ss.str(), error_code);

     }

   }


   template<typename U>

   struct cuda_deleter

   {

     void operator()(U * p) const

     {

       //std::cout << "Freeing handle " << reinterpret_cast<void *>(p) << std::endl;

       cudaFree(p);

     }

   };


 }


 inline handle_type  memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)

 {

   void * dev_ptr = NULL;

   VIENNACL_CUDA_ERROR_CHECK( cudaMalloc(&dev_ptr, size_in_bytes) );

   //std::cout << "Allocated new dev_ptr " << dev_ptr << " of size " <<  size_in_bytes << std::endl;


   if (!host_ptr)

     return handle_type(reinterpret_cast<char *>(dev_ptr), detail::cuda_deleter<char>());


   handle_type new_handle(reinterpret_cast<char*>(dev_ptr), detail::cuda_deleter<char>());


   // copy data:

   //std::cout << "Filling new handle from host_ptr " << host_ptr << std::endl;

   cudaMemcpy(new_handle.get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice);


   return new_handle;

 }


 inline void memory_copy(handle_type const & src_buffer,

                         handle_type & dst_buffer,

                         vcl_size_t src_offset,

                         vcl_size_t dst_offset,

                         vcl_size_t bytes_to_copy)

 {

   assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));

   assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));


   cudaMemcpy(reinterpret_cast<void *>(dst_buffer.get() + dst_offset),

              reinterpret_cast<void *>(src_buffer.get() + src_offset),

              bytes_to_copy,

              cudaMemcpyDeviceToDevice);

 }


 inline void memory_write(handle_type & dst_buffer,

                          vcl_size_t dst_offset,

                          vcl_size_t bytes_to_copy,

                          const void * ptr,

                          bool async = false)

 {

   assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));


   if (async)

     cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,

                     reinterpret_cast<const char *>(ptr),

                     bytes_to_copy,

                     cudaMemcpyHostToDevice);

   else

     cudaMemcpy(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,

                reinterpret_cast<const char *>(ptr),

                bytes_to_copy,

                cudaMemcpyHostToDevice);

 }


 inline void memory_read(handle_type const & src_buffer,

                         vcl_size_t src_offset,

                         vcl_size_t bytes_to_copy,

                         void * ptr,

                         bool async = false)

 {

   assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));


   if (async)

     cudaMemcpyAsync(reinterpret_cast<char *>(ptr),

                     reinterpret_cast<char *>(src_buffer.get()) + src_offset,

                     bytes_to_copy,

                     cudaMemcpyDeviceToHost);

   else

     cudaMemcpy(reinterpret_cast<char *>(ptr),

                reinterpret_cast<char *>(src_buffer.get()) + src_offset,

                bytes_to_copy,

                cudaMemcpyDeviceToHost);

 }


 } //cuda

 } //backend

 } //viennacl

 #endif

viennacl::backend::cuda::detail::cuda_error_check
void cuda_error_check(cudaError error_code, const char *file, const int line)
Definition: cuda.hpp:73

viennacl::backend::cuda::detail::cuda_deleter::operator()
void operator()(U *p) const
Definition: cuda.hpp:88

viennacl::backend::cuda::memory_write
void memory_write(handle_type &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_copy, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer'.
Definition: cuda.hpp:154

viennacl::backend::cuda::memory_copy
void memory_copy(handle_type const &src_buffer, handle_type &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory star...
Definition: cuda.hpp:130

forwards.h
This file provides the forward declarations for the main types used within ViennaCL.

shared_ptr.hpp
Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.

viennacl::backend::cuda::handle_type
viennacl::tools::shared_ptr< char > handle_type
Definition: cuda.hpp:47

viennacl::tools::shared_ptr< char >

viennacl::vcl_size_t
std::size_t vcl_size_t
Definition: forwards.h:75

VIENNACL_CUDA_ERROR_CHECK
#define VIENNACL_CUDA_ERROR_CHECK(err)
Definition: cuda.hpp:38

viennacl::backend::cuda::detail::cuda_deleter
Functor for deleting a CUDA handle. Used within the smart pointer class.
Definition: cuda.hpp:86

viennacl::backend::cuda::memory_create
handle_type memory_create(vcl_size_t size_in_bytes, const void *host_ptr=NULL)
Creates an array of the specified size on the CUDA device. If the second argument is provided...
Definition: cuda.hpp:103

viennacl::backend::cuda::memory_read
void memory_read(handle_type const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_copy, void *ptr, bool async=false)
Reads data from a CUDA buffer back to main RAM.
Definition: cuda.hpp:183

viennacl::backend::cuda::cuda_exception::error_code
cudaError_t error_code() const
Definition: cuda.hpp:64

viennacl::tools::shared_ptr::get
T * get() const
Definition: shared_ptr.hpp:101

viennacl::backend::cuda::cuda_exception
Definition: cuda.hpp:59

viennacl::backend::cuda::cuda_exception::cuda_exception
cuda_exception(std::string const &what_arg, cudaError_t err_code)
Definition: cuda.hpp:62