40 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
84 #ifndef DOXYGEN_SHOULD_SKIP_THIS
94 namespace PackCrsMatrixImpl {
102 template<
class OutputOffsetsViewType,
103 class CountsViewType,
104 class InputOffsetsViewType,
105 class InputLocalRowIndicesViewType,
106 class InputLocalRowPidsViewType,
108 #ifdef HAVE_TPETRA_DEBUG
116 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
117 typedef typename CountsViewType::non_const_value_type count_type;
118 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
119 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
120 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
122 typedef typename OutputOffsetsViewType::device_type device_type;
123 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
124 typename device_type::execution_space>::value,
125 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
126 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
127 "OutputOffsetsViewType must be a Kokkos::View.");
128 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
129 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
130 static_assert (std::is_integral<output_offset_type>::value,
131 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
132 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
133 "CountsViewType must be a Kokkos::View.");
134 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
135 "CountsViewType must be a nonconst Kokkos::View.");
136 static_assert (std::is_integral<count_type>::value,
137 "The type of each entry of CountsViewType must be a built-in integer type.");
138 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
139 "InputOffsetsViewType must be a Kokkos::View.");
140 static_assert (std::is_integral<input_offset_type>::value,
141 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
142 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
143 "InputLocalRowIndicesViewType must be a Kokkos::View.");
144 static_assert (std::is_integral<local_row_index_type>::value,
145 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
148 const CountsViewType& counts,
149 const InputOffsetsViewType& rowOffsets,
150 const InputLocalRowIndicesViewType& lclRowInds,
151 const InputLocalRowPidsViewType& lclRowPids,
152 const count_type sizeOfLclCount,
153 const count_type sizeOfGblColInd,
154 const count_type sizeOfPid,
155 const count_type sizeOfValue) :
156 outputOffsets_ (outputOffsets),
158 rowOffsets_ (rowOffsets),
159 lclRowInds_ (lclRowInds),
160 lclRowPids_ (lclRowPids),
161 sizeOfLclCount_ (sizeOfLclCount),
162 sizeOfGblColInd_ (sizeOfGblColInd),
163 sizeOfPid_ (sizeOfPid),
164 sizeOfValue_ (sizeOfValue),
168 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
170 if (numRowsToPack !=
static_cast<size_t> (counts_.extent (0))) {
171 std::ostringstream os;
172 os <<
"lclRowInds.extent(0) = " << numRowsToPack
173 <<
" != counts.extent(0) = " << counts_.extent (0)
175 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
177 if (
static_cast<size_t> (numRowsToPack + 1) !=
178 static_cast<size_t> (outputOffsets_.extent (0))) {
179 std::ostringstream os;
180 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
181 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
183 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
188 KOKKOS_INLINE_FUNCTION
void
189 operator() (
const local_row_index_type& curInd,
190 output_offset_type& update,
191 const bool final)
const
194 if (curInd <
static_cast<local_row_index_type
> (0)) {
202 if (curInd >=
static_cast<local_row_index_type
> (outputOffsets_.extent (0))) {
207 outputOffsets_(curInd) = update;
210 if (curInd <
static_cast<local_row_index_type
> (counts_.extent (0))) {
211 const auto lclRow = lclRowInds_(curInd);
212 if (
static_cast<size_t> (lclRow + 1) >=
static_cast<size_t> (rowOffsets_.extent (0)) ||
213 static_cast<local_row_index_type
> (lclRow) <
static_cast<local_row_index_type
> (0)) {
221 const count_type count =
222 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
228 const count_type numBytes = (count == 0) ?
229 static_cast<count_type
> (0) :
230 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
231 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
235 counts_(curInd) = numBytes;
247 auto error_h = Kokkos::create_mirror_view (error_);
253 OutputOffsetsViewType outputOffsets_;
254 CountsViewType counts_;
255 typename InputOffsetsViewType::const_type rowOffsets_;
256 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
257 typename InputLocalRowPidsViewType::const_type lclRowPids_;
258 count_type sizeOfLclCount_;
259 count_type sizeOfGblColInd_;
260 count_type sizeOfPid_;
261 count_type sizeOfValue_;
262 Kokkos::View<int, device_type> error_;
274 template<
class OutputOffsetsViewType,
275 class CountsViewType,
276 class InputOffsetsViewType,
277 class InputLocalRowIndicesViewType,
278 class InputLocalRowPidsViewType>
279 typename CountsViewType::non_const_value_type
280 computeNumPacketsAndOffsets (
const OutputOffsetsViewType& outputOffsets,
281 const CountsViewType& counts,
282 const InputOffsetsViewType& rowOffsets,
283 const InputLocalRowIndicesViewType& lclRowInds,
284 const InputLocalRowPidsViewType& lclRowPids,
285 const typename CountsViewType::non_const_value_type sizeOfLclCount,
286 const typename CountsViewType::non_const_value_type sizeOfGblColInd,
287 const typename CountsViewType::non_const_value_type sizeOfPid,
288 const typename CountsViewType::non_const_value_type sizeOfValue)
291 CountsViewType,
typename InputOffsetsViewType::const_type,
292 typename InputLocalRowIndicesViewType::const_type,
293 typename InputLocalRowPidsViewType::const_type> functor_type;
294 typedef typename CountsViewType::non_const_value_type count_type;
295 typedef typename OutputOffsetsViewType::size_type size_type;
296 typedef typename OutputOffsetsViewType::execution_space execution_space;
297 typedef typename functor_type::local_row_index_type LO;
298 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
299 const char prefix[] =
"computeNumPacketsAndOffsets: ";
301 count_type count = 0;
302 const count_type numRowsToPack = lclRowInds.extent (0);
304 if (numRowsToPack == 0) {
308 TEUCHOS_TEST_FOR_EXCEPTION
309 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
310 std::invalid_argument, prefix <<
"There is at least one row to pack, "
311 "but the matrix has no rows. lclRowInds.extent(0) = " <<
312 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
313 rowOffsets.extent (0) <<
" <= 1.");
314 TEUCHOS_TEST_FOR_EXCEPTION
315 (outputOffsets.extent (0) !=
316 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
317 prefix <<
"Output dimension does not match number of rows to pack. "
318 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
319 <<
" != lclRowInds.extent(0) + 1 = "
320 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
321 TEUCHOS_TEST_FOR_EXCEPTION
322 (counts.extent (0) != numRowsToPack, std::invalid_argument,
323 prefix <<
"counts.extent(0) = " << counts.extent (0)
324 <<
" != numRowsToPack = " << numRowsToPack <<
".");
326 functor_type f (outputOffsets, counts, rowOffsets,
327 lclRowInds, lclRowPids, sizeOfLclCount,
328 sizeOfGblColInd, sizeOfPid, sizeOfValue);
329 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
332 const int errCode = f.getError ();
333 TEUCHOS_TEST_FOR_EXCEPTION
334 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
335 << errCode <<
" != 0.");
339 for (LO k = 0; k < numRowsToPack; ++k) {
342 if (outputOffsets(numRowsToPack) != total) {
343 if (errStr.get () == NULL) {
344 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
346 std::ostringstream& os = *errStr;
348 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
349 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
350 << total <<
"." << std::endl;
351 if (numRowsToPack != 0) {
353 if (numRowsToPack <
static_cast<LO
> (10)) {
354 os <<
"outputOffsets: [";
355 for (LO i = 0; i <= numRowsToPack; ++i) {
356 os << outputOffsets(i);
357 if (
static_cast<LO
> (i + 1) <= numRowsToPack) {
361 os <<
"]" << std::endl;
363 for (LO i = 0; i < numRowsToPack; ++i) {
365 if (
static_cast<LO
> (i + 1) < numRowsToPack) {
369 os <<
"]" << std::endl;
372 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
373 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
376 count = outputOffsets(numRowsToPack);
377 return {
false, errStr};
383 using Tpetra::Details::getEntryOnHost;
384 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
404 template<
class ST,
class ColumnMap,
class BufferDeviceType>
406 Kokkos::pair<int, size_t>
408 const Kokkos::View<char*, BufferDeviceType>& exports,
413 const size_t num_ent,
414 const size_t num_bytes_per_value,
415 const bool pack_pids)
417 using Kokkos::subview;
418 using LO =
typename ColumnMap::local_ordinal_type;
419 using GO =
typename ColumnMap::global_ordinal_type;
420 using return_type = Kokkos::pair<int, size_t>;
424 return return_type (0, 0);
427 const LO num_ent_LO =
static_cast<LO
> (num_ent);
428 const size_t num_ent_beg = offset;
431 const size_t gids_beg = num_ent_beg + num_ent_len;
434 const size_t pids_beg = gids_beg + gids_len;
435 const size_t pids_len = pack_pids ?
437 static_cast<size_t> (0);
439 const size_t vals_beg = gids_beg + gids_len + pids_len;
440 const size_t vals_len = num_ent * num_bytes_per_value;
442 char*
const num_ent_out = exports.data () + num_ent_beg;
443 char*
const gids_out = exports.data () + gids_beg;
444 char*
const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
445 char*
const vals_out = exports.data () + vals_beg;
447 size_t num_bytes_out = 0;
454 for (
size_t k = 0; k < num_ent; ++k) {
455 const LO lid = lids_in[k];
456 const GO gid = col_map.getGlobalElement (lid);
461 for (
size_t k = 0; k < num_ent; ++k) {
462 const LO lid = lids_in[k];
463 const int pid = pids_in[lid];
469 error_code += p.first;
470 num_bytes_out += p.second;
473 if (error_code != 0) {
474 return return_type (10, num_bytes_out);
477 const size_t expected_num_bytes =
478 num_ent_len + gids_len + pids_len + vals_len;
479 if (num_bytes_out != expected_num_bytes) {
480 return return_type (11, num_bytes_out);
482 return return_type (0, num_bytes_out);
485 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
486 struct PackCrsMatrixFunctor {
487 typedef LocalMatrix local_matrix_type;
489 typedef typename local_matrix_type::value_type ST;
490 typedef typename local_map_type::local_ordinal_type LO;
491 typedef typename local_map_type::global_ordinal_type GO;
492 typedef typename local_matrix_type::device_type DT;
494 typedef Kokkos::View<const size_t*, BufferDeviceType>
495 num_packets_per_lid_view_type;
496 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
497 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
501 typedef typename num_packets_per_lid_view_type::non_const_value_type
503 typedef typename offsets_view_type::non_const_value_type
505 typedef Kokkos::pair<int, LO> value_type;
507 static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
508 "local_map_type::local_ordinal_type and "
509 "local_matrix_type::ordinal_type must be the same.");
511 local_matrix_type local_matrix;
512 local_map_type local_col_map;
513 exports_view_type exports;
514 num_packets_per_lid_view_type num_packets_per_lid;
515 export_lids_view_type export_lids;
516 source_pids_view_type source_pids;
517 offsets_view_type offsets;
518 size_t num_bytes_per_value;
521 PackCrsMatrixFunctor (
const local_matrix_type& local_matrix_in,
522 const local_map_type& local_col_map_in,
523 const exports_view_type& exports_in,
524 const num_packets_per_lid_view_type& num_packets_per_lid_in,
525 const export_lids_view_type& export_lids_in,
526 const source_pids_view_type& source_pids_in,
527 const offsets_view_type& offsets_in,
528 const size_t num_bytes_per_value_in,
529 const bool pack_pids_in) :
530 local_matrix (local_matrix_in),
531 local_col_map (local_col_map_in),
532 exports (exports_in),
533 num_packets_per_lid (num_packets_per_lid_in),
534 export_lids (export_lids_in),
535 source_pids (source_pids_in),
536 offsets (offsets_in),
537 num_bytes_per_value (num_bytes_per_value_in),
538 pack_pids (pack_pids_in)
540 const LO numRows = local_matrix_in.numRows ();
542 static_cast<LO
> (local_matrix.graph.row_map.extent (0));
543 TEUCHOS_TEST_FOR_EXCEPTION
544 (numRows != 0 && rowMapDim != numRows +
static_cast<LO
> (1),
545 std::logic_error,
"local_matrix.graph.row_map.extent(0) = "
546 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
549 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
551 using ::Tpetra::Details::OrdinalTraits;
552 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
555 KOKKOS_INLINE_FUNCTION
void
556 join (
volatile value_type& dst,
const volatile value_type& src)
const
560 if (src.first != 0 && dst.first == 0) {
565 KOKKOS_INLINE_FUNCTION
566 void operator() (
const LO i, value_type& dst)
const
568 const size_t offset = offsets[i];
569 const LO export_lid = export_lids[i];
570 const size_t buf_size = exports.size();
571 const size_t num_bytes = num_packets_per_lid(i);
572 const size_t num_ent =
573 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
574 - local_matrix.graph.row_map[export_lid]);
584 if (export_lid >= local_matrix.numRows ()) {
585 if (dst.first != 0) {
586 dst = Kokkos::make_pair (1, i);
590 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
591 if (dst.first != 0) {
592 dst = Kokkos::make_pair (2, i);
602 const auto row_beg = local_matrix.graph.row_map[export_lid];
603 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
604 auto vals_in = subview (local_matrix.values,
605 Kokkos::make_pair (row_beg, row_end));
606 auto lids_in = subview (local_matrix.graph.entries,
607 Kokkos::make_pair (row_beg, row_end));
608 typedef local_map_type LMT;
609 typedef BufferDeviceType BDT;
610 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
611 source_pids, vals_in, offset,
612 num_ent, num_bytes_per_value,
614 int error_code_this_row = p.first;
615 size_t num_bytes_packed_this_row = p.second;
616 if (error_code_this_row != 0) {
617 if (dst.first != 0) {
618 dst = Kokkos::make_pair (error_code_this_row, i);
621 else if (num_bytes_packed_this_row != num_bytes) {
622 if (dst.first != 0) {
623 dst = Kokkos::make_pair (3, i);
636 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
638 do_pack (
const LocalMatrix& local_matrix,
640 const Kokkos::View<char*, BufferDeviceType>& exports,
644 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
645 const size_t num_bytes_per_value,
646 const bool pack_pids)
648 using LO =
typename LocalMap::local_ordinal_type;
649 using DT =
typename LocalMatrix::device_type;
650 using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
651 const char prefix[] =
"Tpetra::Details::do_pack: ";
653 if (export_lids.extent (0) != 0) {
654 TEUCHOS_TEST_FOR_EXCEPTION
655 (
static_cast<size_t> (offsets.extent (0)) !=
656 static_cast<size_t> (export_lids.extent (0) + 1),
657 std::invalid_argument, prefix <<
"offsets.extent(0) = "
658 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
659 << export_lids.extent (0) <<
") + 1.");
660 TEUCHOS_TEST_FOR_EXCEPTION
661 (export_lids.extent (0) != num_packets_per_lid.extent (0),
662 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
663 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
664 << num_packets_per_lid.extent (0) <<
".");
668 TEUCHOS_TEST_FOR_EXCEPTION
669 (pack_pids && exports.extent (0) != 0 &&
670 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
671 "pack_pids is true, and exports.extent(0) = " <<
672 exports.extent (0) <<
" != 0, meaning that we need to pack at "
673 "least one matrix entry, but source_pids.extent(0) = 0.");
676 using pack_functor_type =
677 PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
678 pack_functor_type f (local_matrix, local_map, exports,
679 num_packets_per_lid, export_lids,
680 source_pids, offsets, num_bytes_per_value,
683 typename pack_functor_type::value_type result;
684 range_type range (0, num_packets_per_lid.extent (0));
685 Kokkos::parallel_reduce (range, f, result);
687 if (result.first != 0) {
690 TEUCHOS_TEST_FOR_EXCEPTION
691 (
true, std::runtime_error, prefix <<
"PackCrsMatrixFunctor "
692 "reported error code " << result.first <<
" for the first "
693 "bad row " << result.second <<
".");
726 template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
729 Kokkos::DualView<char*, BufferDeviceType>& exports,
730 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
731 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
732 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
733 size_t& constant_num_packets,
734 const bool pack_pids,
738 "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
742 typedef BufferDeviceType DT;
743 typedef typename DT::execution_space execution_space;
744 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
745 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
746 constexpr
bool debug =
false;
749 auto local_col_map = sourceMatrix.
getColMap ()->getLocalMap ();
754 constant_num_packets = 0;
756 const size_t num_export_lids =
757 static_cast<size_t> (export_lids.extent (0));
758 TEUCHOS_TEST_FOR_EXCEPTION
760 static_cast<size_t> (num_packets_per_lid.extent (0)),
761 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
762 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
763 << num_packets_per_lid.extent (0) <<
".");
764 if (num_export_lids != 0) {
765 TEUCHOS_TEST_FOR_EXCEPTION
766 (num_packets_per_lid.data () == NULL, std::invalid_argument,
767 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
768 "num_packets_per_lid.data() = "
769 << num_packets_per_lid.data () <<
" == NULL.");
776 size_t num_bytes_per_value = 0;
791 size_t num_bytes_per_value_l = 0;
792 if (local_matrix.values.extent(0) > 0) {
793 const ST& val = local_matrix.values(0);
796 using Teuchos::reduceAll;
797 reduceAll<int, size_t> (* (sourceMatrix.
getComm ()),
799 num_bytes_per_value_l,
800 Teuchos::outArg (num_bytes_per_value));
803 if (num_export_lids == 0) {
804 exports = exports_view_type (
"exports", 0);
809 Kokkos::View<size_t*, DT> offsets (
"offsets", num_export_lids + 1);
814 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
815 local_matrix.graph.row_map, export_lids,
817 num_bytes_per_lid, num_bytes_per_gid,
818 num_bytes_per_pid, num_bytes_per_value);
821 if (count >
static_cast<size_t> (exports.extent (0))) {
822 exports = exports_view_type (
"exports", count);
824 std::ostringstream os;
825 os <<
"*** exports resized to " << count << std::endl;
826 std::cerr << os.str ();
830 std::ostringstream os;
831 os <<
"*** count: " << count <<
", exports.extent(0): "
832 << exports.extent (0) << std::endl;
833 std::cerr << os.str ();
839 TEUCHOS_TEST_FOR_EXCEPTION
840 (pack_pids && exports.extent (0) != 0 &&
841 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
842 "pack_pids is true, and exports.extent(0) = " <<
843 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
844 "one matrix entry, but export_pids.extent(0) = 0.");
846 typedef typename std::decay<decltype (local_matrix)>::type
848 typedef typename std::decay<decltype (local_col_map)>::type
851 exports.modify_device ();
852 auto exports_d = exports.view_device ();
853 do_pack<local_matrix_type, local_map_type, DT>
854 (local_matrix, local_col_map, exports_d, num_packets_per_lid,
855 export_lids, export_pids, offsets, num_bytes_per_value,
862 template<
typename ST,
typename LO,
typename GO,
typename NT>
865 Teuchos::Array<char>& exports,
866 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
867 const Teuchos::ArrayView<const LO>& exportLIDs,
868 size_t& constantNumPackets,
872 using device_type =
typename local_matrix_type::device_type;
874 using host_exec_space =
typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
875 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
881 Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
883 numPacketsPerLID.getRawPtr (),
884 numPacketsPerLID.size (),
false,
885 "num_packets_per_lid");
892 Kokkos::View<const LO*, buffer_device_type> export_lids_d =
894 exportLIDs.getRawPtr (),
895 exportLIDs.size (),
true,
898 Kokkos::View<int*, device_type> export_pids_d;
899 Kokkos::DualView<char*, buffer_device_type> exports_dv;
900 constexpr
bool pack_pids =
false;
901 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
902 sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
903 export_pids_d, constantNumPackets, pack_pids, distor);
907 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
908 (numPacketsPerLID.getRawPtr (),
909 numPacketsPerLID.size ());
917 if (
static_cast<size_t> (exports.size ()) !=
918 static_cast<size_t> (exports_dv.extent (0))) {
919 exports.resize (exports_dv.extent (0));
921 Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
926 template<
typename ST,
typename LO,
typename GO,
typename NT>
933 size_t& constantNumPackets,
941 Kokkos::View<int*, device_type> exportPIDs_d (
"exportPIDs", 0);
942 constexpr
bool pack_pids =
false;
945 auto numPacketsPerLID_nc = numPacketsPerLID;
946 numPacketsPerLID_nc.clear_sync_state ();
947 numPacketsPerLID_nc.modify_device ();
948 auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
951 TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
952 auto exportLIDs_d = exportLIDs.view_device ();
955 "Tpetra::Details::packCrsMatrixNew",
958 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
959 sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
960 exportPIDs_d, constantNumPackets, pack_pids, distor);
963 template<
typename ST,
typename LO,
typename GO,
typename NT>
967 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
968 const Teuchos::ArrayView<const LO>& exportLIDs,
969 const Teuchos::ArrayView<const int>& sourcePIDs,
970 size_t& constantNumPackets,
975 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
976 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
978 typename local_matrix_type::device_type outputDevice;
981 std::unique_ptr<std::string> prefix;
983 const int myRank = [&] () {
984 auto map = sourceMatrix.
getMap ();
985 if (map.get () ==
nullptr) {
988 auto comm = map->getComm ();
989 if (comm.get () ==
nullptr) {
992 return comm->getRank ();
994 std::ostringstream os;
995 os <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs: ";
996 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
998 std::ostringstream os2;
999 os2 << *prefix <<
"start" << std::endl;
1000 std::cerr << os2.str ();
1007 auto num_packets_per_lid_d =
1009 numPacketsPerLID.getRawPtr (),
1010 numPacketsPerLID.size (),
false,
1011 "num_packets_per_lid");
1015 auto export_lids_d =
1017 exportLIDs.getRawPtr (),
1018 exportLIDs.size (),
true,
1022 auto export_pids_d =
1024 sourcePIDs.getRawPtr (),
1025 sourcePIDs.size (),
true,
1027 constexpr
bool pack_pids =
true;
1029 PackCrsMatrixImpl::packCrsMatrix
1030 (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1031 export_pids_d, constantNumPackets, pack_pids, distor);
1033 catch (std::exception& e) {
1035 std::ostringstream os;
1036 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw: "
1037 << e.what () << std::endl;
1038 std::cerr << os.str ();
1044 std::ostringstream os;
1045 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw an exception "
1046 "not a subclass of std::exception" << std::endl;
1047 std::cerr << os.str ();
1052 if (numPacketsPerLID.size () != 0) {
1056 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1057 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1060 catch (std::exception& e) {
1062 std::ostringstream os;
1063 os << *prefix <<
"Kokkos::deep_copy threw: " << e.what () << std::endl;
1064 std::cerr << os.str ();
1070 std::ostringstream os;
1071 os << *prefix <<
"Kokkos::deep_copy threw an exception not a subclass "
1072 "of std::exception" << std::endl;
1073 std::cerr << os.str ();
1080 std::ostringstream os;
1081 os << *prefix <<
"done" << std::endl;
1082 std::cerr << os.str ();
1089 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1091 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1092 Teuchos::Array<char>&, \
1093 const Teuchos::ArrayView<size_t>&, \
1094 const Teuchos::ArrayView<const LO>&, \
1098 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1099 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1100 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1101 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1105 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1106 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1107 const Teuchos::ArrayView<size_t>&, \
1108 const Teuchos::ArrayView<const LO>&, \
1109 const Teuchos::ArrayView<const int>&, \
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
local_matrix_type getLocalMatrix() const
The local sparse matrix.
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Base class for distributed Tpetra objects that support data redistribution.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Sets up and executes a communication plan for a Tpetra DistObject.
Implementation details of Tpetra.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...