40 #ifndef TPETRA_DISTRIBUTOR_HPP
41 #define TPETRA_DISTRIBUTOR_HPP
44 #include "Teuchos_as.hpp"
45 #include "Teuchos_Describable.hpp"
46 #include "Teuchos_ParameterListAcceptorDefaultBase.hpp"
47 #include "Teuchos_VerboseObject.hpp"
50 #include "KokkosCompat_View.hpp"
51 #include "Kokkos_Core.hpp"
52 #include "Kokkos_TeuchosCommAdapters.hpp"
55 #include <type_traits>
83 DISTRIBUTOR_NOT_INITIALIZED,
84 DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS,
85 DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS,
86 DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS_N_RECVS,
87 DISTRIBUTOR_INITIALIZED_BY_REVERSE,
88 DISTRIBUTOR_INITIALIZED_BY_COPY,
176 public Teuchos::Describable,
177 public Teuchos::ParameterListAcceptorDefaultBase {
190 explicit Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm);
203 Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
204 const Teuchos::RCP<Teuchos::FancyOStream>& out);
219 Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
220 const Teuchos::RCP<Teuchos::ParameterList>& plist);
238 Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
239 const Teuchos::RCP<Teuchos::FancyOStream>& out,
240 const Teuchos::RCP<Teuchos::ParameterList>& plist);
297 size_t createFromSends (
const Teuchos::ArrayView<const int>& exportProcIDs);
332 template <
class Ordinal>
335 const Teuchos::ArrayView<const int>& remoteProcIDs,
336 Teuchos::Array<Ordinal>& exportIDs,
337 Teuchos::Array<int>& exportProcIDs);
348 const Teuchos::ArrayView<const int>& remoteProcIDs);
383 Teuchos::ArrayView<const int>
getProcsTo()
const;
408 return howInitialized_;
425 Teuchos::RCP<Distributor>
getReverse(
bool create=
true)
const;
451 template <
class Packet>
455 const Teuchos::ArrayView<Packet> &imports);
478 template <
class Packet>
481 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
482 const Teuchos::ArrayView<Packet> &imports,
483 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
509 template <
class Packet>
511 doPosts (
const Teuchos::ArrayRCP<const Packet> &exports,
513 const Teuchos::ArrayRCP<Packet> &imports);
533 template <
class Packet>
535 doPosts (
const Teuchos::ArrayRCP<const Packet> &exports,
536 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
537 const Teuchos::ArrayRCP<Packet> &imports,
538 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
552 template <
class Packet>
556 const Teuchos::ArrayView<Packet> &imports);
562 template <
class Packet>
565 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
566 const Teuchos::ArrayView<Packet> &imports,
567 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
573 template <
class Packet>
577 const Teuchos::ArrayRCP<Packet> &imports);
583 template <
class Packet>
586 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
587 const Teuchos::ArrayRCP<Packet> &imports,
588 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
618 template <
class ExpView,
class ImpView>
619 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
621 const ExpView &exports,
623 const ImpView &imports);
646 template <
class ExpView,
class ImpView>
647 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
649 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
650 const ImpView &imports,
651 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
677 template <
class ExpView,
class ImpView>
678 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
679 doPosts (
const ExpView &exports,
681 const ImpView &imports);
701 template <
class ExpView,
class ImpView>
702 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
703 doPosts (
const ExpView &exports,
704 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
705 const ImpView &imports,
706 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
712 template <
class ExpView,
class ImpView>
713 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
716 const ImpView &imports);
722 template <
class ExpView,
class ImpView>
723 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
725 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
726 const ImpView &imports,
727 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
733 template <
class ExpView,
class ImpView>
734 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
737 const ImpView &imports);
743 template <
class ExpView,
class ImpView>
744 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
746 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
747 const ImpView &imports,
748 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
754 bytes_sent = lastRoundBytesSend_;
755 bytes_recvd = lastRoundBytesRecv_;
787 describe (Teuchos::FancyOStream& out,
788 const Teuchos::EVerbosityLevel verbLevel =
789 Teuchos::Describable::verbLevel_default)
const;
794 Teuchos::RCP<const Teuchos::Comm<int> > comm_;
806 bool barrierBetween_;
809 static bool getVerbose();
815 std::unique_ptr<std::string>
816 createPrefix(
const char methodName[])
const;
819 bool verbose_ = getVerbose();
842 Teuchos::Array<int> procsTo_;
852 Teuchos::Array<size_t> startsTo_;
859 Teuchos::Array<size_t> lengthsTo_;
864 size_t maxSendLength_;
881 Teuchos::Array<size_t> indicesTo_;
900 size_t totalReceiveLength_;
907 Teuchos::Array<size_t> lengthsFrom_;
914 Teuchos::Array<int> procsFrom_;
921 Teuchos::Array<size_t> startsFrom_;
928 Teuchos::Array<size_t> indicesFrom_;
936 Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest<int> > > requests_;
942 mutable Teuchos::RCP<Distributor> reverseDistributor_;
945 size_t lastRoundBytesSend_;
948 size_t lastRoundBytesRecv_;
950 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
951 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_;
952 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_;
953 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_;
954 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_;
955 Teuchos::RCP<Teuchos::Time> timer_doWaits_;
956 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_recvs_;
957 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_recvs_;
958 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_barrier_;
959 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_barrier_;
960 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_sends_;
961 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_sends_;
962 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_sends_slow_;
963 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_sends_slow_;
964 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_sends_fast_;
965 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_sends_fast_;
966 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_recvs_;
967 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_recvs_;
968 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_barrier_;
969 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_barrier_;
970 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_sends_;
971 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_sends_;
972 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_sends_slow_;
973 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_sends_slow_;
974 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_sends_fast_;
975 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_sends_fast_;
992 bool useDistinctTags_;
998 int getTag (
const int pathTag)
const;
1010 void computeReceives ();
1024 template <
class Ordinal>
1025 void computeSends (
const Teuchos::ArrayView<const Ordinal> &remoteGIDs,
1026 const Teuchos::ArrayView<const int> &remoteProcIDs,
1027 Teuchos::Array<Ordinal> &exportGIDs,
1028 Teuchos::Array<int> &exportProcIDs);
1031 void createReverseDistributor()
const;
1039 localDescribeToString (
const Teuchos::EVerbosityLevel vl)
const;
1043 template <
class Packet>
1047 const Teuchos::ArrayView<Packet>& imports)
1049 using Teuchos::arcp;
1050 using Teuchos::ArrayRCP;
1051 typedef typename ArrayRCP<const Packet>::size_type size_type;
1053 TEUCHOS_TEST_FOR_EXCEPTION(
1054 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
1055 "doPostsAndWaits(3 args): There are " << requests_.size () <<
1056 " outstanding nonblocking messages pending. It is incorrect to call "
1057 "this method with posts outstanding.");
1069 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (),
1070 static_cast<size_type
> (0),
1071 exports.size(),
false);
1086 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false));
1089 lastRoundBytesSend_ = exports.size () *
sizeof (Packet);
1090 lastRoundBytesRecv_ = imports.size () *
sizeof (Packet);
1093 template <
class Packet>
1096 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1097 const Teuchos::ArrayView<Packet> &imports,
1098 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1100 using Teuchos::arcp;
1101 using Teuchos::ArrayRCP;
1103 TEUCHOS_TEST_FOR_EXCEPTION(
1104 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
1105 "doPostsAndWaits: There are " << requests_.size () <<
" outstanding "
1106 "nonblocking messages pending. It is incorrect to call doPostsAndWaits "
1107 "with posts outstanding.");
1120 typedef typename ArrayRCP<const Packet>::size_type size_type;
1121 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (),
1122 static_cast<size_type
> (0),
1123 exports.size (),
false);
1129 numExportPacketsPerLID,
1130 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false),
1131 numImportPacketsPerLID);
1134 lastRoundBytesSend_ = exports.size () *
sizeof (Packet);
1135 lastRoundBytesRecv_ = imports.size () *
sizeof (Packet);
1139 template <
class Packet>
1141 doPosts (
const Teuchos::ArrayRCP<const Packet>& exports,
1143 const Teuchos::ArrayRCP<Packet>& imports)
1145 using Teuchos::Array;
1146 using Teuchos::ArrayRCP;
1147 using Teuchos::ArrayView;
1149 using Teuchos::FancyOStream;
1150 using Teuchos::includesVerbLevel;
1151 using Teuchos::ireceive;
1152 using Teuchos::isend;
1153 using Teuchos::readySend;
1154 using Teuchos::send;
1155 using Teuchos::ssend;
1156 using Teuchos::TypeNameTraits;
1157 using Teuchos::typeName;
1159 using size_type = Array<size_t>::size_type;
1161 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1162 Teuchos::TimeMonitor timeMon (*timer_doPosts3TA_);
1166 const int myRank = comm_->getRank ();
1170 const bool doBarrier = barrierBetween_;
1172 std::unique_ptr<std::string> prefix;
1174 prefix = createPrefix(
"doPosts(3-arg, ArrayRCP)");
1175 std::ostringstream os;
1176 os << *prefix <<
"Start" << endl;
1177 std::cerr << os.str();
1180 TEUCHOS_TEST_FOR_EXCEPTION(
1181 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier, std::logic_error,
1182 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Ready-send "
1183 "version requires a barrier between posting receives and posting ready "
1184 "sends. This should have been checked before. "
1185 "Please report this bug to the Tpetra developers.");
1187 size_t selfReceiveOffset = 0;
1192 if (howInitialized_ != Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE) {
1199 const size_t totalNumImportPackets = totalReceiveLength_ * numPackets;
1200 TEUCHOS_TEST_FOR_EXCEPTION
1201 (
static_cast<size_t> (imports.size ()) < totalNumImportPackets,
1202 std::invalid_argument,
1203 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1204 "The 'imports' array must have enough entries to hold the expected number "
1205 "of import packets. imports.size() = " << imports.size () <<
" < "
1206 "totalNumImportPackets = " << totalNumImportPackets <<
".");
1214 const int pathTag = 0;
1215 const int tag = this->getTag (pathTag);
1218 TEUCHOS_TEST_FOR_EXCEPTION
1219 (requests_.size () != 0,
1221 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Process "
1222 << myRank <<
": requests_.size() = " << requests_.size () <<
" != 0.");
1238 const size_type actualNumReceives = as<size_type> (numReceives_) +
1239 as<size_type> (selfMessage_ ? 1 : 0);
1240 requests_.resize (0);
1243 std::ostringstream os;
1244 os << *prefix << (indicesTo_.empty () ?
"Fast" :
"Slow")
1245 <<
": Post receives" << endl;
1246 std::cerr << os.str();
1255 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1256 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3TA_recvs_);
1259 size_t curBufOffset = 0;
1260 for (size_type i = 0; i < actualNumReceives; ++i) {
1261 const size_t curBufLen = lengthsFrom_[i] * numPackets;
1262 if (procsFrom_[i] != myRank) {
1264 std::ostringstream os;
1265 os << *prefix << (indicesTo_.empty () ?
"Fast" :
"Slow")
1266 <<
": Post irecv: {source: " << procsFrom_[i]
1267 <<
", tag: " << tag <<
"}" << endl;
1268 std::cerr << os.str();
1277 TEUCHOS_TEST_FOR_EXCEPTION(
1278 curBufOffset + curBufLen >
static_cast<size_t> (imports.size ()),
1280 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1281 "Exceeded size of 'imports' array in packing loop on Process " <<
1282 myRank <<
". imports.size() = " << imports.size () <<
" < "
1283 "curBufOffset(" << curBufOffset <<
") + curBufLen(" << curBufLen
1285 ArrayRCP<Packet> recvBuf =
1286 imports.persistingView (curBufOffset, curBufLen);
1287 requests_.push_back (ireceive<int, Packet> (recvBuf, procsFrom_[i],
1291 selfReceiveOffset = curBufOffset;
1293 curBufOffset += curBufLen;
1298 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1299 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts3TA_barrier_);
1303 std::ostringstream os;
1304 os << *prefix << (indicesTo_.empty () ?
"Fast" :
"Slow")
1305 <<
": Barrier" << endl;
1306 std::cerr << os.str();
1316 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1317 Teuchos::TimeMonitor timeMonSends (*timer_doPosts3TA_sends_);
1325 size_t numBlocks = numSends_ + selfMessage_;
1326 size_t procIndex = 0;
1327 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myRank)) {
1330 if (procIndex == numBlocks) {
1335 size_t selfIndex = 0;
1338 std::ostringstream os;
1339 os << *prefix << (indicesTo_.empty () ?
"Fast" :
"Slow")
1340 <<
": Post sends" << endl;
1341 std::cerr << os.str();
1344 if (indicesTo_.empty ()) {
1346 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1347 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts3TA_sends_fast_);
1352 for (
size_t i = 0; i < numBlocks; ++i) {
1353 size_t p = i + procIndex;
1354 if (p > (numBlocks - 1)) {
1358 if (procsTo_[p] != myRank) {
1360 std::ostringstream os;
1361 os << *prefix <<
": Post send: {target: "
1362 << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
1363 std::cerr << os.str();
1366 ArrayView<const Packet> tmpSend =
1367 exports.view (startsTo_[p]*numPackets, lengthsTo_[p]*numPackets);
1369 if (sendType == Details::DISTRIBUTOR_SEND) {
1370 send<int, Packet> (tmpSend.getRawPtr (),
1371 as<int> (tmpSend.size ()),
1372 procsTo_[p], tag, *comm_);
1374 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1375 ArrayRCP<const Packet> tmpSendBuf =
1376 exports.persistingView (startsTo_[p] * numPackets,
1377 lengthsTo_[p] * numPackets);
1378 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1381 else if (sendType == Details::DISTRIBUTOR_RSEND) {
1382 readySend<int, Packet> (tmpSend.getRawPtr (),
1383 as<int> (tmpSend.size ()),
1384 procsTo_[p], tag, *comm_);
1386 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1387 ssend<int, Packet> (tmpSend.getRawPtr (),
1388 as<int> (tmpSend.size ()),
1389 procsTo_[p], tag, *comm_);
1391 TEUCHOS_TEST_FOR_EXCEPTION(
1392 true, std::logic_error,
1393 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1394 "Invalid send type. We should never get here. "
1395 "Please report this bug to the Tpetra developers.");
1405 std::ostringstream os;
1406 os << *prefix <<
"Fast: Self-send" << endl;
1407 std::cerr << os.str();
1416 std::copy (exports.begin()+startsTo_[selfNum]*numPackets,
1417 exports.begin()+startsTo_[selfNum]*numPackets+lengthsTo_[selfNum]*numPackets,
1418 imports.begin()+selfReceiveOffset);
1423 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1424 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts3TA_sends_slow_);
1429 ArrayRCP<Packet> sendArray (maxSendLength_ * numPackets);
1431 TEUCHOS_TEST_FOR_EXCEPTION(
1432 sendType == Details::DISTRIBUTOR_ISEND, std::logic_error,
1433 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1434 "The \"send buffer\" code path doesn't currently work with "
1435 "nonblocking sends.");
1437 for (
size_t i = 0; i < numBlocks; ++i) {
1438 size_t p = i + procIndex;
1439 if (p > (numBlocks - 1)) {
1443 if (procsTo_[p] != myRank) {
1445 std::ostringstream os;
1446 os << *prefix <<
"Slow: Post send: "
1447 "{target: " << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
1448 std::cerr << os.str();
1451 typename ArrayView<const Packet>::iterator srcBegin, srcEnd;
1452 size_t sendArrayOffset = 0;
1453 size_t j = startsTo_[p];
1454 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
1455 srcBegin = exports.begin() + indicesTo_[j]*numPackets;
1456 srcEnd = srcBegin + numPackets;
1457 std::copy (srcBegin, srcEnd, sendArray.begin()+sendArrayOffset);
1458 sendArrayOffset += numPackets;
1460 ArrayView<const Packet> tmpSend =
1461 sendArray.view (0, lengthsTo_[p]*numPackets);
1463 if (sendType == Details::DISTRIBUTOR_SEND) {
1464 send<int, Packet> (tmpSend.getRawPtr (),
1465 as<int> (tmpSend.size ()),
1466 procsTo_[p], tag, *comm_);
1468 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1469 ArrayRCP<const Packet> tmpSendBuf =
1470 sendArray.persistingView (0, lengthsTo_[p] * numPackets);
1471 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1474 else if (sendType == Details::DISTRIBUTOR_RSEND) {
1475 readySend<int, Packet> (tmpSend.getRawPtr (),
1476 as<int> (tmpSend.size ()),
1477 procsTo_[p], tag, *comm_);
1479 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1480 ssend<int, Packet> (tmpSend.getRawPtr (),
1481 as<int> (tmpSend.size ()),
1482 procsTo_[p], tag, *comm_);
1485 TEUCHOS_TEST_FOR_EXCEPTION(
1486 true, std::logic_error,
1487 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1488 "Invalid send type. We should never get here. "
1489 "Please report this bug to the Tpetra developers.");
1494 selfIndex = startsTo_[p];
1500 std::ostringstream os;
1501 os << *prefix <<
"Slow: Self-send" << endl;
1502 std::cerr << os.str();
1504 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
1505 std::copy (exports.begin()+indicesTo_[selfIndex]*numPackets,
1506 exports.begin()+indicesTo_[selfIndex]*numPackets + numPackets,
1507 imports.begin() + selfReceiveOffset);
1509 selfReceiveOffset += numPackets;
1515 std::ostringstream os;
1516 os << *prefix <<
"Done!" << endl;
1517 std::cerr << os.str();
1521 template <
class Packet>
1523 doPosts (
const Teuchos::ArrayRCP<const Packet>& exports,
1524 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1525 const Teuchos::ArrayRCP<Packet>& imports,
1526 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1528 using Teuchos::Array;
1529 using Teuchos::ArrayRCP;
1530 using Teuchos::ArrayView;
1532 using Teuchos::ireceive;
1533 using Teuchos::isend;
1534 using Teuchos::readySend;
1535 using Teuchos::send;
1536 using Teuchos::ssend;
1537 using Teuchos::TypeNameTraits;
1539 typedef Array<size_t>::size_type size_type;
1541 std::unique_ptr<std::string> prefix;
1543 prefix = createPrefix(
"doPosts(4-arg, Teuchos)");
1544 std::ostringstream os;
1545 os << *prefix <<
"Start" << endl;
1546 std::cerr << os.str();
1549 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1550 Teuchos::TimeMonitor timeMon (*timer_doPosts4TA_);
1556 const bool doBarrier = barrierBetween_;
1580 TEUCHOS_TEST_FOR_EXCEPTION(
1581 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
1583 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): Ready-send "
1584 "version requires a barrier between posting receives and posting ready "
1585 "ends. This should have been checked before. "
1586 "Please report this bug to the Tpetra developers.");
1588 const int myProcID = comm_->getRank ();
1589 size_t selfReceiveOffset = 0;
1591 #ifdef HAVE_TEUCHOS_DEBUG
1593 size_t totalNumImportPackets = 0;
1594 for (
size_t ii = 0; ii < static_cast<size_t> (numImportPacketsPerLID.size ()); ++ii) {
1595 totalNumImportPackets += numImportPacketsPerLID[ii];
1597 TEUCHOS_TEST_FOR_EXCEPTION(
1598 static_cast<size_t> (imports.size ()) < totalNumImportPackets,
1600 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): The 'imports' "
1601 "array must have enough entries to hold the expected number of import "
1602 "packets. imports.size() = " << imports.size() <<
" < "
1603 "totalNumImportPackets = " << totalNumImportPackets <<
".");
1611 const int pathTag = 1;
1612 const int tag = this->getTag (pathTag);
1614 #ifdef HAVE_TEUCHOS_DEBUG
1615 TEUCHOS_TEST_FOR_EXCEPTION
1616 (requests_.size () != 0,
1618 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): Process "
1619 << myProcID <<
": requests_.size() = " << requests_.size ()
1623 std::ostringstream os;
1624 os << *prefix << (indicesTo_.empty () ?
"fast" :
"slow")
1626 std::cerr << os.str();
1642 const size_type actualNumReceives = as<size_type> (numReceives_) +
1643 as<size_type> (selfMessage_ ? 1 : 0);
1644 requests_.resize (0);
1652 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1653 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4TA_recvs_);
1656 size_t curBufferOffset = 0;
1657 size_t curLIDoffset = 0;
1658 for (size_type i = 0; i < actualNumReceives; ++i) {
1659 size_t totalPacketsFrom_i = 0;
1660 for (
size_t j = 0; j < lengthsFrom_[i]; ++j) {
1661 totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
1663 curLIDoffset += lengthsFrom_[i];
1664 if (procsFrom_[i] != myProcID && totalPacketsFrom_i) {
1673 ArrayRCP<Packet> recvBuf =
1674 imports.persistingView (curBufferOffset, totalPacketsFrom_i);
1675 requests_.push_back (ireceive<int, Packet> (recvBuf, procsFrom_[i],
1679 selfReceiveOffset = curBufferOffset;
1681 curBufferOffset += totalPacketsFrom_i;
1686 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1687 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts4TA_barrier_);
1697 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1698 Teuchos::TimeMonitor timeMonSends (*timer_doPosts4TA_sends_);
1703 Array<size_t> sendPacketOffsets(numSends_,0), packetsPerSend(numSends_,0);
1704 size_t maxNumPackets = 0;
1705 size_t curPKToffset = 0;
1706 for (
size_t pp=0; pp<numSends_; ++pp) {
1707 sendPacketOffsets[pp] = curPKToffset;
1708 size_t numPackets = 0;
1709 for (
size_t j=startsTo_[pp]; j<startsTo_[pp]+lengthsTo_[pp]; ++j) {
1710 numPackets += numExportPacketsPerLID[j];
1712 if (numPackets > maxNumPackets) maxNumPackets = numPackets;
1713 packetsPerSend[pp] = numPackets;
1714 curPKToffset += numPackets;
1719 size_t numBlocks = numSends_+ selfMessage_;
1720 size_t procIndex = 0;
1721 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myProcID)) {
1724 if (procIndex == numBlocks) {
1729 size_t selfIndex = 0;
1731 if (indicesTo_.empty()) {
1733 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1734 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4TA_sends_fast_);
1738 std::ostringstream os;
1739 os << *prefix <<
"fast path: posting sends" << endl;
1740 std::cerr << os.str();
1745 for (
size_t i = 0; i < numBlocks; ++i) {
1746 size_t p = i + procIndex;
1747 if (p > (numBlocks - 1)) {
1751 if (procsTo_[p] != myProcID && packetsPerSend[p] > 0) {
1752 ArrayView<const Packet> tmpSend =
1753 exports.view (sendPacketOffsets[p], packetsPerSend[p]);
1755 if (sendType == Details::DISTRIBUTOR_SEND) {
1756 send<int, Packet> (tmpSend.getRawPtr (),
1757 as<int> (tmpSend.size ()),
1758 procsTo_[p], tag, *comm_);
1760 else if (sendType == Details::DISTRIBUTOR_RSEND) {
1761 readySend<int, Packet> (tmpSend.getRawPtr (),
1762 as<int> (tmpSend.size ()),
1763 procsTo_[p], tag, *comm_);
1765 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1766 ArrayRCP<const Packet> tmpSendBuf =
1767 exports.persistingView (sendPacketOffsets[p], packetsPerSend[p]);
1768 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1771 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1772 ssend<int, Packet> (tmpSend.getRawPtr (),
1773 as<int> (tmpSend.size ()),
1774 procsTo_[p], tag, *comm_);
1777 TEUCHOS_TEST_FOR_EXCEPTION(
1778 true, std::logic_error,
1779 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): "
1780 "Invalid send type. We should never get here. Please report "
1781 "this bug to the Tpetra developers.");
1790 std::copy (exports.begin()+sendPacketOffsets[selfNum],
1791 exports.begin()+sendPacketOffsets[selfNum]+packetsPerSend[selfNum],
1792 imports.begin()+selfReceiveOffset);
1795 std::ostringstream os;
1796 os << *prefix <<
"fast path: done" << endl;
1797 std::cerr << os.str();
1802 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1803 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4TA_sends_slow_);
1807 std::ostringstream os;
1808 os << *prefix <<
"slow path: posting sends" << endl;
1809 std::cerr << os.str();
1813 ArrayRCP<Packet> sendArray (maxNumPackets);
1815 TEUCHOS_TEST_FOR_EXCEPTION(
1816 sendType == Details::DISTRIBUTOR_ISEND,
1818 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): "
1819 "The \"send buffer\" code path may not necessarily work with nonblocking sends.");
1821 Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
1823 for (
int j=0; j<numExportPacketsPerLID.size(); ++j) {
1824 indicesOffsets[j] = ioffset;
1825 ioffset += numExportPacketsPerLID[j];
1828 for (
size_t i = 0; i < numBlocks; ++i) {
1829 size_t p = i + procIndex;
1830 if (p > (numBlocks - 1)) {
1834 if (procsTo_[p] != myProcID) {
1835 typename ArrayView<const Packet>::iterator srcBegin, srcEnd;
1836 size_t sendArrayOffset = 0;
1837 size_t j = startsTo_[p];
1838 size_t numPacketsTo_p = 0;
1839 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
1840 srcBegin = exports.begin() + indicesOffsets[j];
1841 srcEnd = srcBegin + numExportPacketsPerLID[j];
1842 numPacketsTo_p += numExportPacketsPerLID[j];
1843 std::copy (srcBegin, srcEnd, sendArray.begin()+sendArrayOffset);
1844 sendArrayOffset += numExportPacketsPerLID[j];
1846 if (numPacketsTo_p > 0) {
1847 ArrayView<const Packet> tmpSend =
1848 sendArray.view (0, numPacketsTo_p);
1850 if (sendType == Details::DISTRIBUTOR_RSEND) {
1851 readySend<int, Packet> (tmpSend.getRawPtr (),
1852 as<int> (tmpSend.size ()),
1853 procsTo_[p], tag, *comm_);
1855 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1856 ArrayRCP<const Packet> tmpSendBuf =
1857 sendArray.persistingView (0, numPacketsTo_p);
1858 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1861 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1862 ssend<int, Packet> (tmpSend.getRawPtr (),
1863 as<int> (tmpSend.size ()),
1864 procsTo_[p], tag, *comm_);
1867 send<int, Packet> (tmpSend.getRawPtr (),
1868 as<int> (tmpSend.size ()),
1869 procsTo_[p], tag, *comm_);
1875 selfIndex = startsTo_[p];
1880 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
1881 std::copy (exports.begin()+indicesOffsets[selfIndex],
1882 exports.begin()+indicesOffsets[selfIndex]+numExportPacketsPerLID[selfIndex],
1883 imports.begin() + selfReceiveOffset);
1884 selfReceiveOffset += numExportPacketsPerLID[selfIndex];
1889 std::ostringstream os;
1890 os << *prefix <<
"slow path: done" << endl;
1891 std::cerr << os.str();
1896 template <
class Packet>
1900 const Teuchos::ArrayView<Packet>& imports)
1902 using Teuchos::arcp;
1903 using Teuchos::ArrayRCP;
1916 typedef typename ArrayRCP<const Packet>::size_type size_type;
1917 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr(), as<size_type> (0),
1918 exports.size(),
false);
1925 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false));
1928 lastRoundBytesSend_ = exports.size() *
sizeof(Packet);
1929 lastRoundBytesRecv_ = imports.size() *
sizeof(Packet);
1932 template <
class Packet>
1935 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1936 const Teuchos::ArrayView<Packet> &imports,
1937 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1940 using Teuchos::arcp;
1941 using Teuchos::ArrayRCP;
1943 TEUCHOS_TEST_FOR_EXCEPTION(
1944 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
1945 "doReversePostsAndWaits(4 args): There are " << requests_.size ()
1946 <<
" outstanding nonblocking messages pending. It is incorrect to call "
1947 "this method with posts outstanding.");
1960 typedef typename ArrayRCP<const Packet>::size_type size_type;
1961 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (), as<size_type> (0),
1962 exports.size (),
false);
1964 numExportPacketsPerLID,
1965 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false),
1966 numImportPacketsPerLID);
1969 lastRoundBytesSend_ = exports.size() *
sizeof(Packet);
1970 lastRoundBytesRecv_ = imports.size() *
sizeof(Packet);
1973 template <
class Packet>
1977 const Teuchos::ArrayRCP<Packet>& imports)
1980 TEUCHOS_TEST_FOR_EXCEPTION(
1981 ! indicesTo_.empty (), std::runtime_error,
1982 "Tpetra::Distributor::doReversePosts(3 args): Can only do reverse "
1983 "communication when original data are blocked by process.");
1984 if (reverseDistributor_.is_null ()) {
1985 createReverseDistributor ();
1987 reverseDistributor_->doPosts (exports, numPackets, imports);
1990 template <
class Packet>
1993 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1994 const Teuchos::ArrayRCP<Packet>& imports,
1995 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1998 TEUCHOS_TEST_FOR_EXCEPTION(
1999 ! indicesTo_.empty (), std::runtime_error,
2000 "Tpetra::Distributor::doReversePosts(3 args): Can only do reverse "
2001 "communication when original data are blocked by process.");
2002 if (reverseDistributor_.is_null ()) {
2003 createReverseDistributor ();
2005 reverseDistributor_->doPosts (exports, numExportPacketsPerLID,
2006 imports, numImportPacketsPerLID);
2009 template <
class ExpView,
class ImpView>
2010 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2012 doPostsAndWaits (
const ExpView& exports,
2014 const ImpView& imports)
2020 std::unique_ptr<std::string> prefix;
2022 prefix = createPrefix(
"doPostsAndWaits(3-arg, Kokkos)");
2023 std::ostringstream os;
2024 os << *prefix <<
"sendType: "
2025 << DistributorSendTypeEnumToString(sendType_)
2026 <<
", barrierBetween: "
2027 << (barrierBetween_ ?
"true" :
"false") << endl;
2028 std::cerr << os.str();
2031 TEUCHOS_TEST_FOR_EXCEPTION(
2032 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
2033 "doPostsAndWaits(3 args): There are " << requests_.size () <<
2034 " outstanding nonblocking messages pending. It is incorrect to call "
2035 "this method with posts outstanding.");
2038 std::ostringstream os;
2039 os << *prefix <<
"Call doPosts" << endl;
2040 std::cerr << os.str();
2042 doPosts (exports, numPackets, imports);
2044 std::ostringstream os;
2045 os << *prefix <<
"Call doWaits" << endl;
2046 std::cerr << os.str();
2050 std::ostringstream os;
2051 os << *prefix <<
"Done" << endl;
2052 std::cerr << os.str();
2056 template <
class ExpView,
class ImpView>
2057 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2059 doPostsAndWaits(
const ExpView& exports,
2060 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2061 const ImpView& imports,
2062 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2065 const char rawPrefix[] =
"doPostsAndWaits(4-arg, Kokkos)";
2067 std::unique_ptr<std::string> prefix;
2069 prefix = createPrefix(rawPrefix);
2070 std::ostringstream os;
2071 os << *prefix <<
"Start" << endl;
2072 std::cerr << os.str();
2074 TEUCHOS_TEST_FOR_EXCEPTION
2075 (requests_.size() != 0, std::runtime_error,
2076 "Tpetra::Distributor::" << rawPrefix <<
": There is/are "
2077 << requests_.size() <<
" outstanding nonblocking message(s) "
2078 "pending. It is incorrect to call this method with posts "
2080 doPosts(exports, numExportPacketsPerLID, imports, numImportPacketsPerLID);
2083 std::ostringstream os;
2084 os << *prefix <<
"Done" << endl;
2085 std::cerr << os.str();
2090 template <
class ExpView,
class ImpView>
2091 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2093 doPosts (
const ExpView &exports,
2095 const ImpView &imports)
2097 using Teuchos::Array;
2099 using Teuchos::FancyOStream;
2100 using Teuchos::includesVerbLevel;
2101 using Teuchos::ireceive;
2102 using Teuchos::isend;
2103 using Teuchos::readySend;
2104 using Teuchos::send;
2105 using Teuchos::ssend;
2106 using Teuchos::TypeNameTraits;
2107 using Teuchos::typeName;
2109 using Kokkos::Compat::create_const_view;
2110 using Kokkos::Compat::create_view;
2111 using Kokkos::Compat::subview_offset;
2112 using Kokkos::Compat::deep_copy_offset;
2113 typedef Array<size_t>::size_type size_type;
2114 typedef ExpView exports_view_type;
2115 typedef ImpView imports_view_type;
2117 #ifdef KOKKOS_ENABLE_CUDA
2119 (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
2120 ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
2121 "Please do not use Tpetra::Distributor with UVM allocations. "
2122 "See Trilinos GitHub issue #1088.");
2125 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2126 Teuchos::TimeMonitor timeMon (*timer_doPosts3KV_);
2129 const int myRank = comm_->getRank ();
2133 const bool doBarrier = barrierBetween_;
2135 std::unique_ptr<std::string> prefix;
2137 prefix = createPrefix(
"doPosts(3-arg, Kokkos)");
2138 std::ostringstream os;
2139 os << *prefix <<
"Start" << endl;
2140 std::cerr << os.str();
2143 TEUCHOS_TEST_FOR_EXCEPTION(
2144 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
2146 "Tpetra::Distributor::doPosts(3 args, Kokkos): Ready-send version "
2147 "requires a barrier between posting receives and posting ready sends. "
2148 "This should have been checked before. "
2149 "Please report this bug to the Tpetra developers.");
2151 size_t selfReceiveOffset = 0;
2158 const size_t totalNumImportPackets = totalReceiveLength_ * numPackets;
2161 std::ostringstream os;
2162 os << *prefix <<
"totalNumImportPackets = " <<
2163 totalNumImportPackets <<
" = " << totalReceiveLength_ <<
" * " <<
2164 numPackets <<
"; imports.extent(0) = " << imports.extent (0)
2166 std::cerr << os.str();
2169 #ifdef HAVE_TPETRA_DEBUG
2172 const size_t importBufSize =
static_cast<size_t> (imports.extent (0));
2173 const int lclBad = (importBufSize < totalNumImportPackets) ? 1 : 0;
2175 using Teuchos::reduceAll;
2176 using Teuchos::REDUCE_MAX;
2177 using Teuchos::outArg;
2178 reduceAll (*comm_, REDUCE_MAX, lclBad, outArg (gblBad));
2179 TEUCHOS_TEST_FOR_EXCEPTION
2182 "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2183 "On one or more MPI processes, the 'imports' array "
2184 "does not have enough entries to hold the expected number of "
2185 "import packets. ");
2188 TEUCHOS_TEST_FOR_EXCEPTION
2189 (
static_cast<size_t> (imports.extent (0)) < totalNumImportPackets,
2191 "Tpetra::Distributor::doPosts(3 args, Kokkos): The 'imports' "
2192 "array must have enough entries to hold the expected number of import "
2193 "packets. imports.extent(0) = " << imports.extent (0) <<
" < "
2194 "totalNumImportPackets = " << totalNumImportPackets <<
" = "
2195 "totalReceiveLength_ (" << totalReceiveLength_ <<
") * numPackets ("
2196 << numPackets <<
").");
2205 const int pathTag = 0;
2206 const int tag = this->getTag (pathTag);
2208 #ifdef HAVE_TPETRA_DEBUG
2209 TEUCHOS_TEST_FOR_EXCEPTION
2210 (requests_.size () != 0,
2212 "Tpetra::Distributor::doPosts(3 args, Kokkos): Process "
2213 << myRank <<
": requests_.size() = " << requests_.size () <<
" != 0.");
2229 const size_type actualNumReceives = as<size_type> (numReceives_) +
2230 as<size_type> (selfMessage_ ? 1 : 0);
2231 requests_.resize (0);
2234 std::ostringstream os;
2235 os << *prefix << (indicesTo_.empty() ?
"fast" :
"slow")
2236 <<
" path: post receives" << endl;
2237 std::cerr << os.str();
2246 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2247 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3KV_recvs_);
2250 size_t curBufferOffset = 0;
2251 for (size_type i = 0; i < actualNumReceives; ++i) {
2252 const size_t curBufLen = lengthsFrom_[i] * numPackets;
2253 if (procsFrom_[i] != myRank) {
2255 std::ostringstream os;
2257 << (indicesTo_.empty() ?
"fast" :
"slow") <<
" path: "
2258 <<
"post irecv: {source: " << procsFrom_[i]
2259 <<
", tag: " << tag <<
"}" << endl;
2260 std::cerr << os.str();
2269 TEUCHOS_TEST_FOR_EXCEPTION(
2270 curBufferOffset + curBufLen >
static_cast<size_t> (imports.size ()),
2271 std::logic_error,
"Tpetra::Distributor::doPosts(3 args, Kokkos): "
2272 "Exceeded size of 'imports' array in packing loop on Process " <<
2273 myRank <<
". imports.size() = " << imports.size () <<
" < "
2274 "curBufferOffset(" << curBufferOffset <<
") + curBufLen(" <<
2276 imports_view_type recvBuf =
2277 subview_offset (imports, curBufferOffset, curBufLen);
2278 requests_.push_back (ireceive<int> (recvBuf, procsFrom_[i],
2282 selfReceiveOffset = curBufferOffset;
2284 curBufferOffset += curBufLen;
2289 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2290 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts3KV_barrier_);
2294 std::ostringstream os;
2295 os << *prefix << (indicesTo_.empty() ?
"fast" :
"slow")
2296 <<
" path: barrier" << endl;
2297 std::cerr << os.str();
2307 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2308 Teuchos::TimeMonitor timeMonSends (*timer_doPosts3KV_sends_);
2316 size_t numBlocks = numSends_ + selfMessage_;
2317 size_t procIndex = 0;
2318 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myRank)) {
2321 if (procIndex == numBlocks) {
2326 size_t selfIndex = 0;
2329 std::ostringstream os;
2330 os << *prefix << (indicesTo_.empty() ?
"fast" :
"slow")
2331 <<
" path: post sends" << endl;
2332 std::cerr << os.str();
2335 if (indicesTo_.empty()) {
2337 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2338 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts3KV_sends_fast_);
2342 std::ostringstream os;
2343 os << *prefix <<
"fast path: posting sends" << endl;
2344 std::cerr << os.str();
2349 for (
size_t i = 0; i < numBlocks; ++i) {
2350 size_t p = i + procIndex;
2351 if (p > (numBlocks - 1)) {
2355 if (procsTo_[p] != myRank) {
2357 std::ostringstream os;
2358 os << *prefix <<
"fast path: post send: {target: "
2359 << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
2360 std::cerr << os.str();
2362 exports_view_type tmpSend = subview_offset(
2363 exports, startsTo_[p]*numPackets, lengthsTo_[p]*numPackets);
2365 if (sendType == Details::DISTRIBUTOR_SEND) {
2367 as<int> (tmpSend.size ()),
2368 procsTo_[p], tag, *comm_);
2370 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2371 exports_view_type tmpSendBuf =
2372 subview_offset (exports, startsTo_[p] * numPackets,
2373 lengthsTo_[p] * numPackets);
2374 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2377 else if (sendType == Details::DISTRIBUTOR_RSEND) {
2378 readySend<int> (tmpSend,
2379 as<int> (tmpSend.size ()),
2380 procsTo_[p], tag, *comm_);
2382 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2383 ssend<int> (tmpSend,
2384 as<int> (tmpSend.size ()),
2385 procsTo_[p], tag, *comm_);
2387 TEUCHOS_TEST_FOR_EXCEPTION(
2390 "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2391 "Invalid send type. We should never get here. "
2392 "Please report this bug to the Tpetra developers.");
2402 std::ostringstream os;
2403 os << *prefix <<
"fast path: self-send" << endl;
2404 std::cerr << os.str();
2413 deep_copy_offset(imports, exports, selfReceiveOffset,
2414 startsTo_[selfNum]*numPackets,
2415 lengthsTo_[selfNum]*numPackets);
2418 std::ostringstream os;
2419 os << *prefix <<
"fast path: done" << endl;
2420 std::cerr << os.str();
2425 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2426 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts3KV_sends_slow_);
2430 std::ostringstream os;
2431 os << *prefix <<
"slow path: posting sends" << endl;
2432 std::cerr << os.str();
2434 typedef typename ExpView::non_const_value_type Packet;
2435 typedef typename ExpView::array_layout Layout;
2436 typedef typename ExpView::device_type Device;
2437 typedef typename ExpView::memory_traits Mem;
2438 Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray",
2439 maxSendLength_ * numPackets);
2443 TEUCHOS_TEST_FOR_EXCEPTION(
2444 sendType == Details::DISTRIBUTOR_ISEND,
2446 "Tpetra::Distributor::doPosts(3 args, Kokkos): The \"send buffer\" code path "
2447 "doesn't currently work with nonblocking sends.");
2449 for (
size_t i = 0; i < numBlocks; ++i) {
2450 size_t p = i + procIndex;
2451 if (p > (numBlocks - 1)) {
2455 if (procsTo_[p] != myRank) {
2457 std::ostringstream os;
2458 os << *prefix <<
"slow path: post send: {target: "
2459 << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
2460 std::cerr << os.str();
2463 size_t sendArrayOffset = 0;
2464 size_t j = startsTo_[p];
2465 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
2466 deep_copy_offset(sendArray, exports, sendArrayOffset,
2467 indicesTo_[j]*numPackets, numPackets);
2468 sendArrayOffset += numPackets;
2471 subview_offset(sendArray,
size_t(0), lengthsTo_[p]*numPackets);
2473 if (sendType == Details::DISTRIBUTOR_SEND) {
2475 as<int> (tmpSend.size ()),
2476 procsTo_[p], tag, *comm_);
2478 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2479 exports_view_type tmpSendBuf =
2480 subview_offset (sendArray,
size_t(0), lengthsTo_[p] * numPackets);
2481 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2484 else if (sendType == Details::DISTRIBUTOR_RSEND) {
2485 readySend<int> (tmpSend,
2486 as<int> (tmpSend.size ()),
2487 procsTo_[p], tag, *comm_);
2489 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2490 ssend<int> (tmpSend,
2491 as<int> (tmpSend.size ()),
2492 procsTo_[p], tag, *comm_);
2495 TEUCHOS_TEST_FOR_EXCEPTION(
2498 "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2499 "Invalid send type. We should never get here. "
2500 "Please report this bug to the Tpetra developers.");
2505 selfIndex = startsTo_[p];
2511 std::ostringstream os;
2512 os << *prefix <<
"slow path: self-send" << endl;
2513 std::cerr << os.str();
2515 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
2516 deep_copy_offset(imports, exports, selfReceiveOffset,
2517 indicesTo_[selfIndex]*numPackets, numPackets);
2519 selfReceiveOffset += numPackets;
2523 std::ostringstream os;
2524 os << *prefix <<
"slow path: done" << endl;
2525 std::cerr << os.str();
2530 std::ostringstream os;
2531 os << *prefix <<
"Done" << endl;
2532 std::cerr << os.str();
2536 template <
class ExpView,
class ImpView>
2537 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2539 doPosts (
const ExpView &exports,
2540 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2541 const ImpView &imports,
2542 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2544 using Teuchos::Array;
2546 using Teuchos::ireceive;
2547 using Teuchos::isend;
2548 using Teuchos::readySend;
2549 using Teuchos::send;
2550 using Teuchos::ssend;
2551 using Teuchos::TypeNameTraits;
2553 using Kokkos::Compat::create_const_view;
2554 using Kokkos::Compat::create_view;
2555 using Kokkos::Compat::subview_offset;
2556 using Kokkos::Compat::deep_copy_offset;
2557 typedef Array<size_t>::size_type size_type;
2558 typedef ExpView exports_view_type;
2559 typedef ImpView imports_view_type;
2561 #ifdef KOKKOS_ENABLE_CUDA
2562 static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
2563 ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
2564 "Please do not use Tpetra::Distributor with UVM "
2565 "allocations. See GitHub issue #1088.");
2568 std::unique_ptr<std::string> prefix;
2570 prefix = createPrefix(
"doPosts(4-arg, Kokkos)");
2571 std::ostringstream os;
2572 os << *prefix <<
"Start" << endl;
2573 std::cerr << os.str();
2576 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2577 Teuchos::TimeMonitor timeMon (*timer_doPosts4KV_);
2583 const bool doBarrier = barrierBetween_;
2607 TEUCHOS_TEST_FOR_EXCEPTION(
2608 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
2609 std::logic_error,
"Tpetra::Distributor::doPosts(4 args, Kokkos): Ready-send "
2610 "version requires a barrier between posting receives and posting ready "
2611 "sends. This should have been checked before. "
2612 "Please report this bug to the Tpetra developers.");
2614 const int myProcID = comm_->getRank ();
2615 size_t selfReceiveOffset = 0;
2617 #ifdef HAVE_TEUCHOS_DEBUG
2619 size_t totalNumImportPackets = 0;
2620 for (size_type ii = 0; ii < numImportPacketsPerLID.size (); ++ii) {
2621 totalNumImportPackets += numImportPacketsPerLID[ii];
2623 TEUCHOS_TEST_FOR_EXCEPTION(
2624 imports.extent (0) < totalNumImportPackets, std::runtime_error,
2625 "Tpetra::Distributor::doPosts(4 args, Kokkos): The 'imports' array must have "
2626 "enough entries to hold the expected number of import packets. "
2627 "imports.extent(0) = " << imports.extent (0) <<
" < "
2628 "totalNumImportPackets = " << totalNumImportPackets <<
".");
2636 const int pathTag = 1;
2637 const int tag = this->getTag (pathTag);
2639 #ifdef HAVE_TEUCHOS_DEBUG
2640 TEUCHOS_TEST_FOR_EXCEPTION
2641 (requests_.size () != 0, std::logic_error,
"Tpetra::Distributor::"
2642 "doPosts(4 args, Kokkos): Process " << myProcID <<
": requests_.size () = "
2643 << requests_.size () <<
" != 0.");
2646 std::ostringstream os;
2647 os << *prefix << (indicesTo_.empty() ?
"fast" :
"slow")
2648 <<
" path, tag=" << tag << endl;
2649 std::cerr << os.str();
2664 const size_type actualNumReceives = as<size_type> (numReceives_) +
2665 as<size_type> (selfMessage_ ? 1 : 0);
2666 requests_.resize (0);
2674 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2675 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4KV_recvs_);
2678 size_t curBufferOffset = 0;
2679 size_t curLIDoffset = 0;
2680 for (size_type i = 0; i < actualNumReceives; ++i) {
2681 size_t totalPacketsFrom_i = 0;
2682 for (
size_t j = 0; j < lengthsFrom_[i]; ++j) {
2683 totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
2685 curLIDoffset += lengthsFrom_[i];
2686 if (procsFrom_[i] != myProcID && totalPacketsFrom_i) {
2695 imports_view_type recvBuf =
2696 subview_offset (imports, curBufferOffset, totalPacketsFrom_i);
2697 requests_.push_back (ireceive<int> (recvBuf, procsFrom_[i],
2701 selfReceiveOffset = curBufferOffset;
2703 curBufferOffset += totalPacketsFrom_i;
2708 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2709 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts4KV_barrier_);
2719 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2720 Teuchos::TimeMonitor timeMonSends (*timer_doPosts4KV_sends_);
2725 Array<size_t> sendPacketOffsets(numSends_,0), packetsPerSend(numSends_,0);
2726 size_t maxNumPackets = 0;
2727 size_t curPKToffset = 0;
2728 for (
size_t pp=0; pp<numSends_; ++pp) {
2729 sendPacketOffsets[pp] = curPKToffset;
2730 size_t numPackets = 0;
2731 for (
size_t j=startsTo_[pp]; j<startsTo_[pp]+lengthsTo_[pp]; ++j) {
2732 numPackets += numExportPacketsPerLID[j];
2734 if (numPackets > maxNumPackets) maxNumPackets = numPackets;
2735 packetsPerSend[pp] = numPackets;
2736 curPKToffset += numPackets;
2741 size_t numBlocks = numSends_+ selfMessage_;
2742 size_t procIndex = 0;
2743 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myProcID)) {
2746 if (procIndex == numBlocks) {
2751 size_t selfIndex = 0;
2752 if (indicesTo_.empty()) {
2754 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2755 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4KV_sends_fast_);
2759 std::ostringstream os;
2760 os << *prefix <<
"fast path: posting sends" << endl;
2761 std::cerr << os.str();
2766 for (
size_t i = 0; i < numBlocks; ++i) {
2767 size_t p = i + procIndex;
2768 if (p > (numBlocks - 1)) {
2772 if (procsTo_[p] != myProcID && packetsPerSend[p] > 0) {
2773 exports_view_type tmpSend =
2774 subview_offset(exports, sendPacketOffsets[p], packetsPerSend[p]);
2776 if (sendType == Details::DISTRIBUTOR_SEND) {
2778 as<int> (tmpSend.size ()),
2779 procsTo_[p], tag, *comm_);
2781 else if (sendType == Details::DISTRIBUTOR_RSEND) {
2782 readySend<int> (tmpSend,
2783 as<int> (tmpSend.size ()),
2784 procsTo_[p], tag, *comm_);
2786 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2787 exports_view_type tmpSendBuf =
2788 subview_offset (exports, sendPacketOffsets[p], packetsPerSend[p]);
2789 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2792 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2793 ssend<int> (tmpSend,
2794 as<int> (tmpSend.size ()),
2795 procsTo_[p], tag, *comm_);
2798 TEUCHOS_TEST_FOR_EXCEPTION(
2799 true, std::logic_error,
2800 "Tpetra::Distributor::doPosts(4 args, Kokkos): "
2801 "Invalid send type. We should never get here. "
2802 "Please report this bug to the Tpetra developers.");
2811 deep_copy_offset(imports, exports, selfReceiveOffset,
2812 sendPacketOffsets[selfNum], packetsPerSend[selfNum]);
2815 std::ostringstream os;
2816 os << *prefix <<
"fast path: done" << endl;
2817 std::cerr << os.str();
2822 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2823 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4KV_sends_slow_);
2827 std::ostringstream os;
2828 os << *prefix <<
"slow path: posting sends" << endl;
2829 std::cerr << os.str();
2832 typedef typename ExpView::non_const_value_type Packet;
2833 typedef typename ExpView::array_layout Layout;
2834 typedef typename ExpView::device_type Device;
2835 typedef typename ExpView::memory_traits Mem;
2836 Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray", maxNumPackets);
2838 TEUCHOS_TEST_FOR_EXCEPTION(
2839 sendType == Details::DISTRIBUTOR_ISEND,
2841 "Tpetra::Distributor::doPosts(4-arg, Kokkos): "
2842 "The \"send buffer\" code path may not necessarily work with nonblocking sends.");
2844 Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
2846 for (
int j=0; j<numExportPacketsPerLID.size(); ++j) {
2847 indicesOffsets[j] = ioffset;
2848 ioffset += numExportPacketsPerLID[j];
2851 for (
size_t i = 0; i < numBlocks; ++i) {
2852 size_t p = i + procIndex;
2853 if (p > (numBlocks - 1)) {
2857 if (procsTo_[p] != myProcID) {
2858 size_t sendArrayOffset = 0;
2859 size_t j = startsTo_[p];
2860 size_t numPacketsTo_p = 0;
2861 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
2862 numPacketsTo_p += numExportPacketsPerLID[j];
2863 deep_copy_offset(sendArray, exports, sendArrayOffset,
2864 indicesOffsets[j], numExportPacketsPerLID[j]);
2865 sendArrayOffset += numExportPacketsPerLID[j];
2867 if (numPacketsTo_p > 0) {
2869 subview_offset(sendArray,
size_t(0), numPacketsTo_p);
2871 if (sendType == Details::DISTRIBUTOR_RSEND) {
2872 readySend<int> (tmpSend,
2873 as<int> (tmpSend.size ()),
2874 procsTo_[p], tag, *comm_);
2876 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2877 exports_view_type tmpSendBuf =
2878 subview_offset (sendArray,
size_t(0), numPacketsTo_p);
2879 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2882 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2883 ssend<int> (tmpSend,
2884 as<int> (tmpSend.size ()),
2885 procsTo_[p], tag, *comm_);
2889 as<int> (tmpSend.size ()),
2890 procsTo_[p], tag, *comm_);
2896 selfIndex = startsTo_[p];
2901 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
2902 deep_copy_offset(imports, exports, selfReceiveOffset,
2903 indicesOffsets[selfIndex],
2904 numExportPacketsPerLID[selfIndex]);
2905 selfReceiveOffset += numExportPacketsPerLID[selfIndex];
2910 std::ostringstream os;
2911 os << *prefix <<
"slow path: done" << endl;
2912 std::cerr << os.str();
2917 template <
class ExpView,
class ImpView>
2918 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2920 doReversePostsAndWaits (
const ExpView& exports,
2922 const ImpView& imports)
2924 doReversePosts (exports, numPackets, imports);
2928 template <
class ExpView,
class ImpView>
2929 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2931 doReversePostsAndWaits (
const ExpView& exports,
2932 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2933 const ImpView& imports,
2934 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2936 TEUCHOS_TEST_FOR_EXCEPTION(requests_.size() != 0, std::runtime_error,
2937 "Tpetra::Distributor::doReversePostsAndWaits(4 args): There are "
2938 << requests_.size() <<
" outstanding nonblocking messages pending. It "
2939 "is incorrect to call this method with posts outstanding.");
2941 doReversePosts (exports, numExportPacketsPerLID, imports,
2942 numImportPacketsPerLID);
2946 template <
class ExpView,
class ImpView>
2947 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2949 doReversePosts (
const ExpView &exports,
2951 const ImpView &imports)
2954 TEUCHOS_TEST_FOR_EXCEPTION(
2955 ! indicesTo_.empty (), std::runtime_error,
2956 "Tpetra::Distributor::doReversePosts(3 args): Can only do "
2957 "reverse communication when original data are blocked by process.");
2958 if (reverseDistributor_.is_null ()) {
2959 createReverseDistributor ();
2961 reverseDistributor_->doPosts (exports, numPackets, imports);
2964 template <
class ExpView,
class ImpView>
2965 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2967 doReversePosts (
const ExpView &exports,
2968 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2969 const ImpView &imports,
2970 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2973 TEUCHOS_TEST_FOR_EXCEPTION(
2974 ! indicesTo_.empty (), std::runtime_error,
2975 "Tpetra::Distributor::doReversePosts(3 args): Can only do "
2976 "reverse communication when original data are blocked by process.");
2977 if (reverseDistributor_.is_null ()) {
2978 createReverseDistributor ();
2980 reverseDistributor_->doPosts (exports, numExportPacketsPerLID,
2981 imports, numImportPacketsPerLID);
2984 template <
class OrdinalType>
2986 computeSends(
const Teuchos::ArrayView<const OrdinalType>& importGIDs,
2987 const Teuchos::ArrayView<const int>& importProcIDs,
2988 Teuchos::Array<OrdinalType>& exportGIDs,
2989 Teuchos::Array<int>& exportProcIDs)
2998 using Teuchos::Array;
2999 using Teuchos::ArrayView;
3001 using size_type =
typename ArrayView<const OrdinalType>::size_type;
3002 const char errPrefix[] =
"Tpetra::Distributor::computeSends: ";
3003 const char suffix[] =
3004 " Please report this bug to the Tpetra developers.";
3006 const int myRank = comm_->getRank ();
3007 std::unique_ptr<std::string> prefix;
3009 prefix = createPrefix(
"computeSends");
3010 std::ostringstream os;
3011 os << *prefix <<
"Start" << endl;
3012 std::cerr << os.str();
3015 TEUCHOS_TEST_FOR_EXCEPTION
3016 (importGIDs.size () != importProcIDs.size (),
3017 std::invalid_argument, errPrefix <<
"On Process " << myRank
3018 <<
": importProcIDs.size()=" << importProcIDs.size()
3019 <<
" != importGIDs.size()=" << importGIDs.size() <<
".");
3021 const size_type numImports = importProcIDs.size();
3022 Array<size_t> importObjs(2*numImports);
3024 for (size_type i = 0; i < numImports; ++i) {
3025 importObjs[2*i] =
static_cast<size_t>(importGIDs[i]);
3026 importObjs[2*i+1] =
static_cast<size_t>(myRank);
3034 std::ostringstream os;
3035 os << *prefix <<
"Call tempPlan.createFromSends" << endl;
3036 std::cerr << os.str();
3040 const size_t numExportsAsSizeT =
3041 tempPlan.createFromSends(importProcIDs);
3042 const size_type numExports =
3043 static_cast<size_type
>(numExportsAsSizeT);
3044 TEUCHOS_TEST_FOR_EXCEPTION
3045 (numExports < 0, std::logic_error, errPrefix <<
3046 "tempPlan.createFromSends() returned numExports="
3047 << numExportsAsSizeT <<
" as a size_t, which overflows to "
3048 << numExports <<
" when cast to " <<
3049 Teuchos::TypeNameTraits<size_type>::name () <<
"." << suffix);
3050 TEUCHOS_TEST_FOR_EXCEPTION
3051 (size_type(tempPlan.getTotalReceiveLength()) != numExports,
3052 std::logic_error, errPrefix <<
"tempPlan.getTotalReceiveLength()="
3053 << tempPlan.getTotalReceiveLength () <<
" != numExports="
3054 << numExports <<
"." << suffix);
3056 if (numExports > 0) {
3057 exportGIDs.resize(numExports);
3058 exportProcIDs.resize(numExports);
3069 static_assert(
sizeof(
size_t) >=
sizeof(OrdinalType),
3070 "Tpetra::Distributor::computeSends: "
3071 "sizeof(size_t) < sizeof(OrdinalType).");
3073 TEUCHOS_TEST_FOR_EXCEPTION
3074 (tempPlan.getTotalReceiveLength () <
size_t(numExports),
3076 errPrefix <<
"tempPlan.getTotalReceiveLength()="
3077 << tempPlan.getTotalReceiveLength() <<
" < numExports="
3078 << numExports <<
"." << suffix);
3080 Array<size_t> exportObjs (tempPlan.getTotalReceiveLength () * 2);
3082 std::ostringstream os;
3083 os << *prefix <<
"Call tempPlan.doPostsAndWaits" << endl;
3084 std::cerr << os.str();
3086 tempPlan.doPostsAndWaits<
size_t> (importObjs (), 2, exportObjs ());
3089 for (size_type i = 0; i < numExports; ++i) {
3090 exportGIDs[i] =
static_cast<OrdinalType
> (exportObjs[2*i]);
3091 exportProcIDs[i] =
static_cast<int> (exportObjs[2*i+1]);
3095 std::ostringstream os;
3096 os << *prefix <<
"Done" << endl;
3097 std::cerr << os.str();
3101 template <
class OrdinalType>
3103 createFromRecvs (
const Teuchos::ArrayView<const OrdinalType> &remoteGIDs,
3104 const Teuchos::ArrayView<const int> &remoteProcIDs,
3105 Teuchos::Array<OrdinalType> &exportGIDs,
3106 Teuchos::Array<int> &exportProcIDs)
3109 const char errPrefix[] =
"Tpetra::Distributor::createFromRecvs: ";
3110 const int myRank = comm_->getRank();
3112 std::unique_ptr<std::string> prefix;
3114 prefix = createPrefix(
"createFromRecvs");
3115 std::ostringstream os;
3116 os << *prefix <<
"Start" << endl;
3117 std::cerr << os.str();
3122 using Teuchos::outArg;
3123 using Teuchos::REDUCE_MAX;
3124 using Teuchos::reduceAll;
3128 (remoteGIDs.size () != remoteProcIDs.size ()) ? myRank : -1;
3129 int maxErrProc = -1;
3130 reduceAll(*comm_, REDUCE_MAX, errProc, outArg(maxErrProc));
3131 TEUCHOS_TEST_FOR_EXCEPTION
3132 (maxErrProc != -1, std::runtime_error, errPrefix <<
"Lists "
3133 "of remote IDs and remote process IDs must have the same "
3134 "size on all participating processes. Maximum process ID "
3135 "with error: " << maxErrProc <<
".");
3140 TEUCHOS_TEST_FOR_EXCEPTION
3141 (remoteGIDs.size() != remoteProcIDs.size(), std::runtime_error,
3142 errPrefix <<
"On Process " << myRank <<
": "
3143 "remoteGIDs.size()=" << remoteGIDs.size() <<
3144 " != remoteProcIDs.size()=" << remoteProcIDs.size() <<
".");
3147 computeSends(remoteGIDs, remoteProcIDs, exportGIDs, exportProcIDs);
3156 std::ostringstream os;
3157 os << *prefix <<
"numProcsSendingToMe: "
3158 << numProcsSendingToMe <<
", remoteProcIDs.size(): "
3159 << remoteProcIDs.size () <<
", selfMessage_: "
3160 << (selfMessage_ ?
"true" :
"false") <<
"" << endl;
3161 std::cerr << os.str();
3164 howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS;
3167 std::ostringstream os;
3168 os << *prefix <<
"Done" << endl;
3169 std::cerr << os.str();
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Stand-alone utility functions and macros.
static bool debug()
Whether Tpetra is in debug mode.
Sets up and executes a communication plan for a Tpetra DistObject.
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
void doReversePostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the reverse communication plan.
size_t getMaxSendLength() const
Maximum number of values this process will send to another single process.
Teuchos::RCP< Distributor > getReverse(bool create=true) const
A reverse communication plan Distributor.
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.
Teuchos::ArrayView< const int > getProcsTo() const
Ranks of the processes to which this process will send values.
void doReversePosts(const Teuchos::ArrayRCP< const Packet > &exports, size_t numPackets, const Teuchos::ArrayRCP< Packet > &imports)
Post the data for a reverse plan, but do not execute the waits yet.
size_t getNumReceives() const
The number of processes from which we will receive data.
void doPosts(const Teuchos::ArrayRCP< const Packet > &exports, size_t numPackets, const Teuchos::ArrayRCP< Packet > &imports)
Post the data for a forward plan, but do not execute the waits yet.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &plist)
Set Distributor parameters.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
virtual ~Distributor()=default
Destructor (virtual for memory safety).
bool hasSelfMessage() const
Whether the calling process will send or receive messages to itself.
void swap(Distributor &rhs)
Swap the contents of rhs with those of *this.
Teuchos::ArrayView< const size_t > getLengthsTo() const
Number of values this process will send to each process.
Teuchos::ArrayView< const int > getProcsFrom() const
Ranks of the processes sending values to this process.
Distributor(const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Construct using the specified communicator and default parameters.
std::string description() const
Return a one-line description of this object.
size_t createFromSends(const Teuchos::ArrayView< const int > &exportProcIDs)
Set up Distributor using list of process ranks to which this process will send.
void createFromSendsAndRecvs(const Teuchos::ArrayView< const int > &exportProcIDs, const Teuchos::ArrayView< const int > &remoteProcIDs)
Set up Distributor using list of process ranks to which to send, and list of process ranks from which...
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
List of valid Distributor parameters.
Teuchos::ArrayView< const size_t > getLengthsFrom() const
Number of values this process will receive from each process.
Details::EDistributorHowInitialized howInitialized() const
Return an enum indicating whether and how a Distributor was initialized.
size_t getNumSends() const
The number of processes to which we will send data.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
void getLastDoStatistics(size_t &bytes_sent, size_t &bytes_recvd) const
Information on the last call to do/doReverse.
Implementation details of Tpetra.
std::string DistributorSendTypeEnumToString(EDistributorSendType sendType)
Convert an EDistributorSendType enum value to a string.
EDistributorSendType
The type of MPI send that Distributor should use.
EDistributorHowInitialized
Enum indicating how and whether a Distributor was initialized.
std::string DistributorHowInitializedEnumToString(EDistributorHowInitialized how)
Convert an EDistributorHowInitialized enum value to a string.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::Array< std::string > distributorSendTypes()
Valid values for Distributor's "Send type" parameter.