1 #ifndef VIENNACL_MATRIX_PROXY_HPP_
2 #define VIENNACL_MATRIX_PROXY_HPP_
38 template<
typename NumericT,
typename MatrixT>
44 template<
typename NumericT>
52 template<
typename NumericT,
typename MatrixT>
58 template<
typename NumericT>
70 template<
typename MatrixType>
71 class matrix_range :
public matrix_base<typename MatrixType::cpu_value_type>
73 typedef matrix_base<typename MatrixType::cpu_value_type> base_type;
74 typedef matrix_range<MatrixType> self_type;
87 range const & row_range,
94 range const & row_range,
106 using base_type::operator=;
109 template<
typename OtherNumericT,
typename F>
112 template<
typename OtherNumericT,
typename F>
115 template<
typename OtherNumericT,
typename F>
119 template<
typename MatrixType>
127 range const & row_range,
134 range const & row_range,
146 template<
typename CPUMatrixT,
typename NumericT>
147 void copy(
const CPUMatrixT & cpu_matrix,
152 &&
bool(
"Matrix size mismatch!"));
154 if ( gpu_matrix_range.start2() != 0)
156 std::vector<NumericT> entries(gpu_matrix_range.size2());
159 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
161 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
162 entries[j] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
164 vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
165 vcl_size_t num_entries = gpu_matrix_range.size2();
173 std::vector<NumericT> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
176 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
177 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
178 entries[i*gpu_matrix_range.internal_size2() + j] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
180 vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
181 vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.internal_size2();
188 template<
typename CPUMatrixT,
typename NumericT>
189 void copy(
const CPUMatrixT & cpu_matrix,
194 &&
bool(
"Matrix size mismatch!"));
196 if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.size1())
198 std::vector<NumericT> entries(gpu_matrix_range.size1());
201 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
203 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
204 entries[i] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
206 vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
207 vcl_size_t num_entries = gpu_matrix_range.size1();
215 std::vector<NumericT> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
218 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
219 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
220 entries[i + j*gpu_matrix_range.internal_size1()] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
222 vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
223 vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
237 template<
typename CPUMatrixT,
typename NumericT>
239 CPUMatrixT & cpu_matrix)
243 &&
bool(
"Matrix size mismatch!"));
245 if ( gpu_matrix_range.start2() != 0)
247 std::vector<NumericT> entries(gpu_matrix_range.size2());
250 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
252 vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
253 vcl_size_t num_entries = gpu_matrix_range.size2();
257 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
258 detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[j];
264 std::vector<NumericT> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
266 vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
270 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
271 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
272 detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[i*gpu_matrix_range.internal_size2() + j];
279 template<
typename CPUMatrixT,
typename NumericT>
281 CPUMatrixT & cpu_matrix)
285 &&
bool(
"Matrix size mismatch!"));
287 if ( gpu_matrix_range.start1() != 0)
289 std::vector<NumericT> entries(gpu_matrix_range.size1());
292 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
294 vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
295 vcl_size_t num_entries = gpu_matrix_range.size1();
299 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
300 detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[i];
306 std::vector<NumericT> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
309 vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
310 vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
314 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
315 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
316 detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[i + j*gpu_matrix_range.internal_size1()];
325 template<
typename MatrixType>
328 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of range invalid!"));
334 template<
typename MatrixType>
337 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of range invalid!"));
361 template<
typename MatrixType>
362 class matrix_slice :
public matrix_base<typename MatrixType::cpu_value_type>
364 typedef matrix_base<typename MatrixType::cpu_value_type> base_type;
365 typedef matrix_slice<MatrixType> self_type;
378 slice const & row_slice,
385 slice const & row_slice,
397 using base_type::operator=;
400 template<
typename OtherNumericT,
typename F>
403 template<
typename OtherNumericT,
typename F>
406 template<
typename OtherNumericT,
typename F>
410 template<
typename MatrixType>
418 slice const & row_slice,
425 slice const & row_slice,
438 template<
typename CPUMatrixT,
typename NumericT>
439 void copy(
const CPUMatrixT & cpu_matrix,
444 &&
bool(
"Matrix size mismatch!"));
446 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
448 vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2();
450 std::vector<NumericT> entries(num_entries);
453 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
455 vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
458 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
459 entries[j * gpu_matrix_slice.stride2()] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
467 template<
typename CPUMatrixT,
typename NumericT>
468 void copy(
const CPUMatrixT & cpu_matrix,
473 &&
bool(
"Matrix size mismatch!"));
476 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
478 vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1();
480 std::vector<NumericT> entries(num_entries);
483 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
485 vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
489 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
490 entries[i * gpu_matrix_slice.stride1()] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
505 template<
typename CPUMatrixT,
typename NumericT>
507 CPUMatrixT & cpu_matrix)
511 &&
bool(
"Matrix size mismatch!"));
513 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
515 vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2();
517 std::vector<NumericT> entries(num_entries);
520 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
522 vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
526 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
527 detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[j * gpu_matrix_slice.stride2()];
535 template<
typename CPUMatrixT,
typename NumericT>
537 CPUMatrixT & cpu_matrix)
541 &&
bool(
"Matrix size mismatch!"));
543 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
545 vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1();
547 std::vector<NumericT> entries(num_entries);
550 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
552 vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
556 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
557 detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[i * gpu_matrix_slice.stride1()];
567 template<
typename MatrixType>
570 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
575 template<
typename MatrixType>
578 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
583 template<
typename MatrixType>
586 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
viennacl::tools::shared_ptr< char > handle_type
base_type & operator=(viennacl::matrix_slice< viennacl::matrix< OtherNumericT, F > > const &B)
MatrixType::handle_type handle_type
matrix_slice(MatrixType const &A, slice const &row_slice, slice const &col_slice)
DistanceT difference_type
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
base_type & operator=(viennacl::matrix< OtherNumericT, F > const &B)
matrix_range(matrix_range< MatrixType > const &A, range const &row_range, range const &col_range)
Generic size and resize functionality for different vector and matrix types.
Class for representing strided submatrices of a bigger matrix A.
self_type & operator=(const self_type &other)
range::size_type size_type
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
MatrixType::value_type value_type
MatrixType::handle_type handle_type
matrix_range(MatrixType const &A, range const &row_range, range const &col_range)
MatrixType::handle_type handle_type
size_type stride2() const
Returns the number of columns.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
range::difference_type difference_type
Forward declaration of dense matrix classes.
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
viennacl::result_of::cpu_value_type< value_type >::type cpu_value_type
matrix_slice(self_type const &other)
MatrixType::value_type value_type
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
range::size_type size_type
matrix_range(self_type const &A, range const &row_range, range const &col_range)
result_of::size_type< T >::type start(T const &obj)
matrix_slice(MatrixType const &A, slice const &row_slice, slice const &col_slice)
range::difference_type difference_type
const value_type & const_reference
size_type stride1() const
Returns the number of rows.
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
matrix_range(self_type const &other)
size_type size2() const
Returns the number of columns.
handle_type & handle()
Returns the OpenCL handle, non-const-version.
base_type & operator=(viennacl::matrix_slice< viennacl::matrix< OtherNumericT, F > > const &B)
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
size_type size1() const
Returns the number of rows.
MatrixType::handle_type handle_type
base_type & operator=(viennacl::matrix< OtherNumericT, F > const &B)
matrix_slice(self_type const &A, slice const &row_slice, slice const &col_slice)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Implementation of a slice object for use with proxy objects.
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
base_type & operator=(viennacl::matrix_range< viennacl::matrix< OtherNumericT, F > > const &B)
Implementation of a range object for use with proxy objects.
size_type start2() const
Returns the number of columns.
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Class for representing non-strided submatrices of a bigger matrix A.
NumericT const & matrix_access(MatrixT const &A, vcl_size_t i, vcl_size_t j)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
viennacl::result_of::cpu_value_type< value_type >::type cpu_value_type
const value_type & const_reference
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
A tag for row-major storage of a dense matrix.
matrix_range(MatrixType const &A, range const &row_range, range const &col_range)
size_type start1() const
Returns the number of rows.
base_type & operator=(viennacl::matrix_range< viennacl::matrix< OtherNumericT, F > > const &B)
matrix_slice(matrix_slice< MatrixType > const &A, slice const &row_slice, slice const &col_slice)