40 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP 41 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP 43 #include "TpetraCore_config.h" 44 #include "Teuchos_Array.hpp" 45 #include "Teuchos_ArrayView.hpp" 89 namespace PackCrsMatrixImpl {
97 template<
class OutputOffsetsViewType,
99 class InputOffsetsViewType,
100 class InputLocalRowIndicesViewType,
101 class InputLocalRowPidsViewType,
103 #ifdef HAVE_TPETRA_DEBUG 107 #endif // HAVE_TPETRA_DEBUG 111 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
112 typedef typename CountsViewType::non_const_value_type count_type;
113 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
114 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
115 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
117 typedef typename OutputOffsetsViewType::device_type device_type;
118 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
119 typename device_type::execution_space>::value,
120 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
121 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
122 "OutputOffsetsViewType must be a Kokkos::View.");
123 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
124 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
125 static_assert (std::is_integral<output_offset_type>::value,
126 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
127 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
128 "CountsViewType must be a Kokkos::View.");
129 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
130 "CountsViewType must be a nonconst Kokkos::View.");
131 static_assert (std::is_integral<count_type>::value,
132 "The type of each entry of CountsViewType must be a built-in integer type.");
133 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
134 "InputOffsetsViewType must be a Kokkos::View.");
135 static_assert (std::is_integral<input_offset_type>::value,
136 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
137 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
138 "InputLocalRowIndicesViewType must be a Kokkos::View.");
139 static_assert (std::is_integral<local_row_index_type>::value,
140 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
143 const CountsViewType& counts,
144 const InputOffsetsViewType& rowOffsets,
145 const InputLocalRowIndicesViewType& lclRowInds,
146 const InputLocalRowPidsViewType& lclRowPids,
147 const count_type sizeOfLclCount,
148 const count_type sizeOfGblColInd,
149 const count_type sizeOfPid,
150 const count_type sizeOfValue) :
151 outputOffsets_ (outputOffsets),
153 rowOffsets_ (rowOffsets),
154 lclRowInds_ (lclRowInds),
155 lclRowPids_ (lclRowPids),
156 sizeOfLclCount_ (sizeOfLclCount),
157 sizeOfGblColInd_ (sizeOfGblColInd),
158 sizeOfPid_ (sizeOfPid),
159 sizeOfValue_ (sizeOfValue),
163 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
165 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
166 std::ostringstream os;
167 os <<
"lclRowInds.extent(0) = " << numRowsToPack
168 <<
" != counts.extent(0) = " << counts_.extent (0)
170 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
172 if (static_cast<size_t> (numRowsToPack + 1) !=
173 static_cast<size_t> (outputOffsets_.extent (0))) {
174 std::ostringstream os;
175 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
176 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
178 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
183 KOKKOS_INLINE_FUNCTION
void 184 operator() (
const local_row_index_type& curInd,
185 output_offset_type& update,
186 const bool final)
const 189 if (curInd < static_cast<local_row_index_type> (0)) {
197 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
202 outputOffsets_(curInd) = update;
205 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
206 const auto lclRow = lclRowInds_(curInd);
207 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
208 static_cast<local_row_index_type> (lclRow) <
static_cast<local_row_index_type
> (0)) {
216 const count_type count =
217 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
223 const count_type numBytes = (count == 0) ?
224 static_cast<count_type> (0) :
225 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
226 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
230 counts_(curInd) = numBytes;
242 auto error_h = Kokkos::create_mirror_view (error_);
248 OutputOffsetsViewType outputOffsets_;
249 CountsViewType counts_;
250 typename InputOffsetsViewType::const_type rowOffsets_;
251 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
252 typename InputLocalRowPidsViewType::const_type lclRowPids_;
253 count_type sizeOfLclCount_;
254 count_type sizeOfGblColInd_;
255 count_type sizeOfPid_;
256 count_type sizeOfValue_;
257 Kokkos::View<int, device_type> error_;
269 template<
class OutputOffsetsViewType,
270 class CountsViewType,
271 class InputOffsetsViewType,
272 class InputLocalRowIndicesViewType,
273 class InputLocalRowPidsViewType>
274 typename CountsViewType::non_const_value_type
275 computeNumPacketsAndOffsets (
const OutputOffsetsViewType& outputOffsets,
276 const CountsViewType& counts,
277 const InputOffsetsViewType& rowOffsets,
278 const InputLocalRowIndicesViewType& lclRowInds,
279 const InputLocalRowPidsViewType& lclRowPids,
280 const typename CountsViewType::non_const_value_type sizeOfLclCount,
281 const typename CountsViewType::non_const_value_type sizeOfGblColInd,
282 const typename CountsViewType::non_const_value_type sizeOfPid,
283 const typename CountsViewType::non_const_value_type sizeOfValue)
286 CountsViewType,
typename InputOffsetsViewType::const_type,
287 typename InputLocalRowIndicesViewType::const_type,
288 typename InputLocalRowPidsViewType::const_type> functor_type;
289 typedef typename CountsViewType::non_const_value_type count_type;
290 typedef typename OutputOffsetsViewType::size_type size_type;
291 typedef typename OutputOffsetsViewType::execution_space execution_space;
292 typedef typename functor_type::local_row_index_type LO;
293 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
294 const char prefix[] =
"computeNumPacketsAndOffsets: ";
296 count_type count = 0;
297 const count_type numRowsToPack = lclRowInds.extent (0);
299 if (numRowsToPack == 0) {
303 TEUCHOS_TEST_FOR_EXCEPTION
304 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
305 std::invalid_argument, prefix <<
"There is at least one row to pack, " 306 "but the matrix has no rows. lclRowInds.extent(0) = " <<
307 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
308 rowOffsets.extent (0) <<
" <= 1.");
309 TEUCHOS_TEST_FOR_EXCEPTION
310 (outputOffsets.extent (0) !=
311 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
312 prefix <<
"Output dimension does not match number of rows to pack. " 313 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
314 <<
" != lclRowInds.extent(0) + 1 = " 315 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
316 TEUCHOS_TEST_FOR_EXCEPTION
317 (counts.extent (0) != numRowsToPack, std::invalid_argument,
318 prefix <<
"counts.extent(0) = " << counts.extent (0)
319 <<
" != numRowsToPack = " << numRowsToPack <<
".");
321 functor_type f (outputOffsets, counts, rowOffsets,
322 lclRowInds, lclRowPids, sizeOfLclCount,
323 sizeOfGblColInd, sizeOfPid, sizeOfValue);
324 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
327 const int errCode = f.getError ();
328 TEUCHOS_TEST_FOR_EXCEPTION
329 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code " 330 << errCode <<
" != 0.");
334 for (LO k = 0; k < numRowsToPack; ++k) {
337 if (outputOffsets(numRowsToPack) != total) {
338 if (errStr.get () == NULL) {
339 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
341 std::ostringstream& os = *errStr;
343 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") " 344 << outputOffsets(numRowsToPack) <<
" != sum of counts = " 345 << total <<
"." << std::endl;
346 if (numRowsToPack != 0) {
348 if (numRowsToPack < static_cast<LO> (10)) {
349 os <<
"outputOffsets: [";
350 for (LO i = 0; i <= numRowsToPack; ++i) {
351 os << outputOffsets(i);
352 if (static_cast<LO> (i + 1) <= numRowsToPack) {
356 os <<
"]" << std::endl;
358 for (LO i = 0; i < numRowsToPack; ++i) {
360 if (static_cast<LO> (i + 1) < numRowsToPack) {
364 os <<
"]" << std::endl;
367 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = " 368 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
371 count = outputOffsets(numRowsToPack);
372 return {
false, errStr};
374 #endif // HAVE_TPETRA_DEBUG 378 using Tpetra::Details::getEntryOnHost;
379 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
399 template<
class ST,
class ColumnMap,
class BufferDeviceType>
401 Kokkos::pair<int, size_t>
403 const Kokkos::View<char*, BufferDeviceType>& exports,
408 const size_t num_ent,
409 const size_t num_bytes_per_value,
410 const bool pack_pids)
412 using Kokkos::subview;
413 using LO =
typename ColumnMap::local_ordinal_type;
414 using GO =
typename ColumnMap::global_ordinal_type;
415 using return_type = Kokkos::pair<int, size_t>;
419 return return_type (0, 0);
422 const LO num_ent_LO =
static_cast<LO
> (num_ent);
423 const size_t num_ent_beg = offset;
426 const size_t gids_beg = num_ent_beg + num_ent_len;
429 const size_t pids_beg = gids_beg + gids_len;
430 const size_t pids_len = pack_pids ?
432 static_cast<size_t> (0);
434 const size_t vals_beg = gids_beg + gids_len + pids_len;
435 const size_t vals_len = num_ent * num_bytes_per_value;
437 char*
const num_ent_out = exports.data () + num_ent_beg;
438 char*
const gids_out = exports.data () + gids_beg;
439 char*
const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
440 char*
const vals_out = exports.data () + vals_beg;
442 size_t num_bytes_out = 0;
449 for (
size_t k = 0; k < num_ent; ++k) {
450 const LO lid = lids_in[k];
451 const GO gid = col_map.getGlobalElement (lid);
456 for (
size_t k = 0; k < num_ent; ++k) {
457 const LO lid = lids_in[k];
458 const int pid = pids_in[lid];
464 error_code += p.first;
465 num_bytes_out += p.second;
468 if (error_code != 0) {
469 return return_type (10, num_bytes_out);
472 const size_t expected_num_bytes =
473 num_ent_len + gids_len + pids_len + vals_len;
474 if (num_bytes_out != expected_num_bytes) {
475 return return_type (11, num_bytes_out);
477 return return_type (0, num_bytes_out);
480 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
481 struct PackCrsMatrixFunctor {
482 typedef LocalMatrix local_matrix_device_type;
484 typedef typename local_matrix_device_type::value_type ST;
487 typedef typename local_matrix_device_type::device_type DT;
489 typedef Kokkos::View<const size_t*, BufferDeviceType>
490 num_packets_per_lid_view_type;
491 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
492 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
496 typedef typename num_packets_per_lid_view_type::non_const_value_type
498 typedef typename offsets_view_type::non_const_value_type
500 typedef Kokkos::pair<int, LO> value_type;
502 static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
503 "local_map_type::local_ordinal_type and " 504 "local_matrix_device_type::ordinal_type must be the same.");
506 local_matrix_device_type local_matrix;
507 local_map_type local_col_map;
508 exports_view_type exports;
509 num_packets_per_lid_view_type num_packets_per_lid;
510 export_lids_view_type export_lids;
511 source_pids_view_type source_pids;
512 offsets_view_type offsets;
513 size_t num_bytes_per_value;
516 PackCrsMatrixFunctor (
const local_matrix_device_type& local_matrix_in,
517 const local_map_type& local_col_map_in,
518 const exports_view_type& exports_in,
519 const num_packets_per_lid_view_type& num_packets_per_lid_in,
520 const export_lids_view_type& export_lids_in,
521 const source_pids_view_type& source_pids_in,
522 const offsets_view_type& offsets_in,
523 const size_t num_bytes_per_value_in,
524 const bool pack_pids_in) :
525 local_matrix (local_matrix_in),
526 local_col_map (local_col_map_in),
527 exports (exports_in),
528 num_packets_per_lid (num_packets_per_lid_in),
529 export_lids (export_lids_in),
530 source_pids (source_pids_in),
531 offsets (offsets_in),
532 num_bytes_per_value (num_bytes_per_value_in),
533 pack_pids (pack_pids_in)
535 const LO numRows = local_matrix_in.numRows ();
537 static_cast<LO
> (local_matrix.graph.row_map.extent (0));
538 TEUCHOS_TEST_FOR_EXCEPTION
539 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
540 std::logic_error,
"local_matrix.graph.row_map.extent(0) = " 541 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
544 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const 546 using ::Tpetra::Details::OrdinalTraits;
547 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
550 KOKKOS_INLINE_FUNCTION
void 551 join (
volatile value_type& dst,
const volatile value_type& src)
const 555 if (src.first != 0 && dst.first == 0) {
560 KOKKOS_INLINE_FUNCTION
561 void operator() (
const LO i, value_type& dst)
const 563 const size_t offset = offsets[i];
564 const LO export_lid = export_lids[i];
565 const size_t buf_size = exports.size();
566 const size_t num_bytes = num_packets_per_lid(i);
567 const size_t num_ent =
568 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
569 - local_matrix.graph.row_map[export_lid]);
579 if (export_lid >= local_matrix.numRows ()) {
580 if (dst.first != 0) {
581 dst = Kokkos::make_pair (1, i);
585 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
586 if (dst.first != 0) {
587 dst = Kokkos::make_pair (2, i);
597 const auto row_beg = local_matrix.graph.row_map[export_lid];
598 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
599 auto vals_in = subview (local_matrix.values,
600 Kokkos::make_pair (row_beg, row_end));
601 auto lids_in = subview (local_matrix.graph.entries,
602 Kokkos::make_pair (row_beg, row_end));
603 typedef local_map_type LMT;
604 typedef BufferDeviceType BDT;
605 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
606 source_pids, vals_in, offset,
607 num_ent, num_bytes_per_value,
609 int error_code_this_row = p.first;
610 size_t num_bytes_packed_this_row = p.second;
611 if (error_code_this_row != 0) {
612 if (dst.first != 0) {
613 dst = Kokkos::make_pair (error_code_this_row, i);
616 else if (num_bytes_packed_this_row != num_bytes) {
617 if (dst.first != 0) {
618 dst = Kokkos::make_pair (3, i);
631 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
633 do_pack (
const LocalMatrix& local_matrix,
635 const Kokkos::View<char*, BufferDeviceType>& exports,
639 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
640 const size_t num_bytes_per_value,
641 const bool pack_pids)
644 using DT =
typename LocalMatrix::device_type;
645 using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
646 const char prefix[] =
"Tpetra::Details::do_pack: ";
648 if (export_lids.extent (0) != 0) {
649 TEUCHOS_TEST_FOR_EXCEPTION
650 (static_cast<size_t> (offsets.extent (0)) !=
651 static_cast<size_t> (export_lids.extent (0) + 1),
652 std::invalid_argument, prefix <<
"offsets.extent(0) = " 653 << offsets.extent (0) <<
" != export_lids.extent(0) (= " 654 << export_lids.extent (0) <<
") + 1.");
655 TEUCHOS_TEST_FOR_EXCEPTION
656 (export_lids.extent (0) != num_packets_per_lid.extent (0),
657 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
658 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = " 659 << num_packets_per_lid.extent (0) <<
".");
663 TEUCHOS_TEST_FOR_EXCEPTION
664 (pack_pids && exports.extent (0) != 0 &&
665 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
666 "pack_pids is true, and exports.extent(0) = " <<
667 exports.extent (0) <<
" != 0, meaning that we need to pack at " 668 "least one matrix entry, but source_pids.extent(0) = 0.");
671 using pack_functor_type =
672 PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
673 pack_functor_type f (local_matrix, local_map, exports,
674 num_packets_per_lid, export_lids,
675 source_pids, offsets, num_bytes_per_value,
678 typename pack_functor_type::value_type result;
679 range_type range (0, num_packets_per_lid.extent (0));
680 Kokkos::parallel_reduce (range, f, result);
682 if (result.first != 0) {
685 TEUCHOS_TEST_FOR_EXCEPTION
686 (
true, std::runtime_error, prefix <<
"PackCrsMatrixFunctor " 687 "reported error code " << result.first <<
" for the first " 688 "bad row " << result.second <<
".");
721 template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
724 Kokkos::DualView<char*, BufferDeviceType>& exports,
725 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
726 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
727 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
728 size_t& constant_num_packets,
729 const bool pack_pids)
732 "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
736 typedef BufferDeviceType DT;
737 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
738 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
739 constexpr
bool debug =
false;
742 auto local_col_map = sourceMatrix.
getColMap ()->getLocalMap ();
747 constant_num_packets = 0;
749 const size_t num_export_lids =
750 static_cast<size_t> (export_lids.extent (0));
751 TEUCHOS_TEST_FOR_EXCEPTION
753 static_cast<size_t> (num_packets_per_lid.extent (0)),
754 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = " 755 << num_export_lids <<
" != num_packets_per_lid.extent(0) = " 756 << num_packets_per_lid.extent (0) <<
".");
757 if (num_export_lids != 0) {
758 TEUCHOS_TEST_FOR_EXCEPTION
759 (num_packets_per_lid.data () == NULL, std::invalid_argument,
760 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but " 761 "num_packets_per_lid.data() = " 762 << num_packets_per_lid.data () <<
" == NULL.");
769 size_t num_bytes_per_value = 0;
784 size_t num_bytes_per_value_l = 0;
785 if (local_matrix.values.extent(0) > 0) {
786 const ST& val = local_matrix.values(0);
789 using Teuchos::reduceAll;
790 reduceAll<int, size_t> (* (sourceMatrix.
getComm ()),
792 num_bytes_per_value_l,
793 Teuchos::outArg (num_bytes_per_value));
796 if (num_export_lids == 0) {
797 exports = exports_view_type (
"exports", 0);
802 Kokkos::View<size_t*, DT> offsets (
"offsets", num_export_lids + 1);
807 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
808 local_matrix.graph.row_map, export_lids,
810 num_bytes_per_lid, num_bytes_per_gid,
811 num_bytes_per_pid, num_bytes_per_value);
814 if (count > static_cast<size_t> (exports.extent (0))) {
815 exports = exports_view_type (
"exports", count);
817 std::ostringstream os;
818 os <<
"*** exports resized to " << count << std::endl;
819 std::cerr << os.str ();
823 std::ostringstream os;
824 os <<
"*** count: " << count <<
", exports.extent(0): " 825 << exports.extent (0) << std::endl;
826 std::cerr << os.str ();
832 TEUCHOS_TEST_FOR_EXCEPTION
833 (pack_pids && exports.extent (0) != 0 &&
834 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
835 "pack_pids is true, and exports.extent(0) = " <<
836 exports.extent (0) <<
" != 0, meaning that we need to pack at least " 837 "one matrix entry, but export_pids.extent(0) = 0.");
839 typedef typename std::decay<decltype (local_matrix)>::type
840 local_matrix_device_type;
841 typedef typename std::decay<decltype (local_col_map)>::type
844 exports.modify_device ();
845 auto exports_d = exports.view_device ();
846 do_pack<local_matrix_device_type, local_map_type, DT>
847 (local_matrix, local_col_map, exports_d, num_packets_per_lid,
848 export_lids, export_pids, offsets, num_bytes_per_value,
855 template<
typename ST,
typename LO,
typename GO,
typename NT>
858 Teuchos::Array<char>& exports,
859 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
860 const Teuchos::ArrayView<const LO>& exportLIDs,
861 size_t& constantNumPackets)
864 using device_type =
typename local_matrix_device_type::device_type;
866 using host_exec_space =
typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
867 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
873 Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
875 numPacketsPerLID.getRawPtr (),
876 numPacketsPerLID.size (),
false,
877 "num_packets_per_lid");
884 Kokkos::View<const LO*, buffer_device_type> export_lids_d =
886 exportLIDs.getRawPtr (),
887 exportLIDs.size (),
true,
890 Kokkos::View<int*, device_type> export_pids_d;
891 Kokkos::DualView<char*, buffer_device_type> exports_dv;
892 constexpr
bool pack_pids =
false;
893 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
894 sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
895 export_pids_d, constantNumPackets, pack_pids);
899 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
900 (numPacketsPerLID.getRawPtr (),
901 numPacketsPerLID.size ());
909 if (static_cast<size_t> (exports.size ()) !=
910 static_cast<size_t> (exports_dv.extent (0))) {
911 exports.resize (exports_dv.extent (0));
913 Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
918 template<
typename ST,
typename LO,
typename GO,
typename NT>
925 size_t& constantNumPackets)
931 Kokkos::View<int*, device_type> exportPIDs_d (
"exportPIDs", 0);
932 constexpr
bool pack_pids =
false;
935 auto numPacketsPerLID_nc = numPacketsPerLID;
936 numPacketsPerLID_nc.clear_sync_state ();
937 numPacketsPerLID_nc.modify_device ();
938 auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
941 TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
942 auto exportLIDs_d = exportLIDs.view_device ();
945 "Tpetra::Details::packCrsMatrixNew",
948 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
949 sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
950 exportPIDs_d, constantNumPackets, pack_pids);
953 template<
typename ST,
typename LO,
typename GO,
typename NT>
957 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
958 const Teuchos::ArrayView<const LO>& exportLIDs,
959 const Teuchos::ArrayView<const int>& sourcePIDs,
960 size_t& constantNumPackets)
964 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
965 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
967 typename local_matrix_device_type::device_type outputDevice;
970 std::unique_ptr<std::string> prefix;
972 const int myRank = [&] () {
973 auto map = sourceMatrix.
getMap ();
974 if (map.get () ==
nullptr) {
977 auto comm = map->getComm ();
978 if (comm.get () ==
nullptr) {
981 return comm->getRank ();
983 std::ostringstream os;
984 os <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs: ";
985 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
987 std::ostringstream os2;
988 os2 << *prefix <<
"start" << std::endl;
989 std::cerr << os2.str ();
996 auto num_packets_per_lid_d =
998 numPacketsPerLID.getRawPtr (),
999 numPacketsPerLID.size (),
false,
1000 "num_packets_per_lid");
1004 auto export_lids_d =
1006 exportLIDs.getRawPtr (),
1007 exportLIDs.size (),
true,
1011 auto export_pids_d =
1013 sourcePIDs.getRawPtr (),
1014 sourcePIDs.size (),
true,
1016 constexpr
bool pack_pids =
true;
1018 PackCrsMatrixImpl::packCrsMatrix
1019 (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1020 export_pids_d, constantNumPackets, pack_pids);
1022 catch (std::exception& e) {
1024 std::ostringstream os;
1025 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw: " 1026 << e.what () << std::endl;
1027 std::cerr << os.str ();
1033 std::ostringstream os;
1034 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw an exception " 1035 "not a subclass of std::exception" << std::endl;
1036 std::cerr << os.str ();
1041 if (numPacketsPerLID.size () != 0) {
1045 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1046 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1049 catch (std::exception& e) {
1051 std::ostringstream os;
1052 os << *prefix <<
"Kokkos::deep_copy threw: " << e.what () << std::endl;
1053 std::cerr << os.str ();
1059 std::ostringstream os;
1060 os << *prefix <<
"Kokkos::deep_copy threw an exception not a subclass " 1061 "of std::exception" << std::endl;
1062 std::cerr << os.str ();
1069 std::ostringstream os;
1070 os << *prefix <<
"done" << std::endl;
1071 std::cerr << os.str ();
1078 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \ 1080 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \ 1081 Teuchos::Array<char>&, \ 1082 const Teuchos::ArrayView<size_t>&, \ 1083 const Teuchos::ArrayView<const LO>&, \ 1086 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \ 1087 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \ 1088 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \ 1089 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \ 1092 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \ 1093 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \ 1094 const Teuchos::ArrayView<size_t>&, \ 1095 const Teuchos::ArrayView<const LO>&, \ 1096 const Teuchos::ArrayView<const int>&, \ 1099 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP Namespace Tpetra contains the class and methods constituting the Tpetra library.
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
GlobalOrdinal global_ordinal_type
The type of global indices.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
Declaration of the Tpetra::CrsMatrix class.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
"Local" part of Map suitable for Kokkos kernels.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Implementation details of Tpetra.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Compute the number of packets and offsets for the pack procedure.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
static bool verbose()
Whether Tpetra is in verbose mode.
typename Node::device_type device_type
The Kokkos device type.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
int getError() const
Host function for getting the error.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Base class for distributed Tpetra objects that support data redistribution.
LocalOrdinal local_ordinal_type
The type of local indices.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.