Tpetra parallel linear algebra  Version of the Day
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DISTOBJECT_DEF_HPP
41 #define TPETRA_DISTOBJECT_DEF_HPP
42 
50 
51 #include "Tpetra_Distributor.hpp"
54 #include "Tpetra_Details_checkGlobalError.hpp"
56 #include "Tpetra_Util.hpp" // Details::createPrefix
57 #include "Teuchos_CommHelpers.hpp"
58 #include "Teuchos_TypeNameTraits.hpp"
59 #include <typeinfo>
60 #include <memory>
61 #include <sstream>
62 
63 namespace Tpetra {
64 
65  namespace { // (anonymous)
66  template<class DeviceType, class IndexType = size_t>
67  struct SumFunctor {
68  SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
69  viewToSum_ (viewToSum) {}
70  KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
71  lclSum += viewToSum_(i);
72  }
73  Kokkos::View<const size_t*, DeviceType> viewToSum_;
74  };
75 
76  template<class DeviceType, class IndexType = size_t>
77  size_t
78  countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
79  {
80  using Kokkos::parallel_reduce;
81  typedef DeviceType DT;
82  typedef typename DT::execution_space DES;
83  typedef Kokkos::RangePolicy<DES, IndexType> range_type;
84 
85  const IndexType numOut = numImportPacketsPerLID.extent (0);
86  size_t totalImportPackets = 0;
87  parallel_reduce ("Count import packets",
88  range_type (0, numOut),
89  SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
90  totalImportPackets);
91  return totalImportPackets;
92  }
93  } // namespace (anonymous)
94 
95 
96  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
98  DistObject (const Teuchos::RCP<const map_type>& map) :
99  map_ (map)
100  {
101 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
102  using Teuchos::RCP;
103  using Teuchos::Time;
104  using Teuchos::TimeMonitor;
105 
106  RCP<Time> doXferTimer =
107  TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
108  if (doXferTimer.is_null ()) {
109  doXferTimer =
110  TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
111  }
112  doXferTimer_ = doXferTimer;
113 
114  RCP<Time> copyAndPermuteTimer =
115  TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
116  if (copyAndPermuteTimer.is_null ()) {
117  copyAndPermuteTimer =
118  TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
119  }
120  copyAndPermuteTimer_ = copyAndPermuteTimer;
121 
122  RCP<Time> packAndPrepareTimer =
123  TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
124  if (packAndPrepareTimer.is_null ()) {
125  packAndPrepareTimer =
126  TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
127  }
128  packAndPrepareTimer_ = packAndPrepareTimer;
129 
130  RCP<Time> doPostsAndWaitsTimer =
131  TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
132  if (doPostsAndWaitsTimer.is_null ()) {
133  doPostsAndWaitsTimer =
134  TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
135  }
136  doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
137 
138  RCP<Time> unpackAndCombineTimer =
139  TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
140  if (unpackAndCombineTimer.is_null ()) {
141  unpackAndCombineTimer =
142  TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
143  }
144  unpackAndCombineTimer_ = unpackAndCombineTimer;
145 #endif // HAVE_TPETRA_TRANSFER_TIMERS
146  }
147 
148  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
149  std::string
151  description () const
152  {
153  using Teuchos::TypeNameTraits;
154 
155  std::ostringstream os;
156  os << "\"Tpetra::DistObject\": {"
157  << "Packet: " << TypeNameTraits<packet_type>::name ()
158  << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
159  << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
160  << ", Node: " << TypeNameTraits<Node>::name ();
161  if (this->getObjectLabel () != "") {
162  os << "Label: \"" << this->getObjectLabel () << "\"";
163  }
164  os << "}";
165  return os.str ();
166  }
167 
168  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
169  void
171  describe (Teuchos::FancyOStream &out,
172  const Teuchos::EVerbosityLevel verbLevel) const
173  {
174  using Teuchos::rcpFromRef;
175  using Teuchos::TypeNameTraits;
176  using std::endl;
177  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
178  Teuchos::VERB_LOW : verbLevel;
179  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
180  const int myRank = comm.is_null () ? 0 : comm->getRank ();
181  const int numProcs = comm.is_null () ? 1 : comm->getSize ();
182 
183  if (vl != Teuchos::VERB_NONE) {
184  Teuchos::OSTab tab0 (out);
185  if (myRank == 0) {
186  out << "\"Tpetra::DistObject\":" << endl;
187  }
188  Teuchos::OSTab tab1 (out);
189  if (myRank == 0) {
190  out << "Template parameters:" << endl;
191  {
192  Teuchos::OSTab tab2 (out);
193  out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
194  << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
195  << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
196  << "Node: " << TypeNameTraits<node_type>::name () << endl;
197  }
198  if (this->getObjectLabel () != "") {
199  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
200  }
201  } // if myRank == 0
202 
203  // Describe the Map.
204  {
205  if (myRank == 0) {
206  out << "Map:" << endl;
207  }
208  Teuchos::OSTab tab2 (out);
209  map_->describe (out, vl);
210  }
211 
212  // At verbosity > VERB_LOW, each process prints something.
213  if (vl > Teuchos::VERB_LOW) {
214  for (int p = 0; p < numProcs; ++p) {
215  if (myRank == p) {
216  out << "Process " << myRank << ":" << endl;
217  Teuchos::OSTab tab2 (out);
218  out << "Export buffer size (in packets): "
219  << exports_.extent (0)
220  << endl
221  << "Import buffer size (in packets): "
222  << imports_.extent (0)
223  << endl;
224  }
225  if (! comm.is_null ()) {
226  comm->barrier (); // give output time to finish
227  comm->barrier ();
228  comm->barrier ();
229  }
230  } // for each process rank p
231  } // if vl > VERB_LOW
232  } // if vl != VERB_NONE
233  }
234 
235  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
236  void
238  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
239  {
240  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
241  "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
242  }
243 
244  /* These are provided in base DistObject template
245  template<class DistObjectType>
246  void
247  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
248  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
249  typename DistObjectType::global_ordinal_type,
250  typename DistObjectType::node_type> >& newMap)
251  {
252  input->removeEmptyProcessesInPlace (newMap);
253  if (newMap.is_null ()) { // my process is excluded
254  input = Teuchos::null;
255  }
256  }
257 
258  template<class DistObjectType>
259  void
260  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
261  {
262  using Teuchos::RCP;
263  typedef typename DistObjectType::local_ordinal_type LO;
264  typedef typename DistObjectType::global_ordinal_type GO;
265  typedef typename DistObjectType::node_type NT;
266  typedef Map<LO, GO, NT> map_type;
267 
268  RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
269  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
270  }
271  */
272 
273  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
274  void
276  doImport (const SrcDistObject& source,
278  const CombineMode CM,
279  const bool restrictedMode)
280  {
281  using Details::Behavior;
282  using std::endl;
283  const char modeString[] = "doImport (forward mode)";
284 
285  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
286  // output to std::cerr on every MPI process. This is unwise for
287  // runs with large numbers of MPI processes.
288  const bool verbose = Behavior::verbose("DistObject");
289  std::unique_ptr<std::string> prefix;
290  if (verbose) {
291  prefix = this->createPrefix("DistObject", modeString);
292  std::ostringstream os;
293  os << *prefix << "Start" << endl;
294  std::cerr << os.str ();
295  }
296  this->beginImport(source, importer, CM, restrictedMode);
297  this->endImport(source, importer, CM, restrictedMode);
298  if (verbose) {
299  std::ostringstream os;
300  os << *prefix << "Done" << endl;
301  std::cerr << os.str ();
302  }
303  }
304 
305  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
306  void
308  doExport (const SrcDistObject& source,
310  const CombineMode CM,
311  const bool restrictedMode)
312  {
313  using Details::Behavior;
314  using std::endl;
315  const char modeString[] = "doExport (forward mode)";
316 
317  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
318  // output to std::cerr on every MPI process. This is unwise for
319  // runs with large numbers of MPI processes.
320  const bool verbose = Behavior::verbose("DistObject");
321  std::unique_ptr<std::string> prefix;
322  if (verbose) {
323  prefix = this->createPrefix("DistObject", modeString);
324  std::ostringstream os;
325  os << *prefix << "Start" << endl;
326  std::cerr << os.str ();
327  }
328  this->beginExport(source, exporter, CM, restrictedMode);
329  this->endExport(source, exporter, CM, restrictedMode);
330  if (verbose) {
331  std::ostringstream os;
332  os << *prefix << "Done" << endl;
333  std::cerr << os.str ();
334  }
335  }
336 
337  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
338  void
340  doImport (const SrcDistObject& source,
342  const CombineMode CM,
343  const bool restrictedMode)
344  {
345  using Details::Behavior;
346  using std::endl;
347  const char modeString[] = "doImport (reverse mode)";
348 
349  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
350  // output to std::cerr on every MPI process. This is unwise for
351  // runs with large numbers of MPI processes.
352  const bool verbose = Behavior::verbose("DistObject");
353  std::unique_ptr<std::string> prefix;
354  if (verbose) {
355  prefix = this->createPrefix("DistObject", modeString);
356  std::ostringstream os;
357  os << *prefix << "Start" << endl;
358  std::cerr << os.str ();
359  }
360  this->beginImport(source, exporter, CM, restrictedMode);
361  this->endImport(source, exporter, CM, restrictedMode);
362  if (verbose) {
363  std::ostringstream os;
364  os << *prefix << "Done" << endl;
365  std::cerr << os.str ();
366  }
367  }
368 
369  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
370  void
372  doExport (const SrcDistObject& source,
374  const CombineMode CM,
375  const bool restrictedMode)
376  {
377  using Details::Behavior;
378  using std::endl;
379  const char modeString[] = "doExport (reverse mode)";
380 
381  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
382  // output to std::cerr on every MPI process. This is unwise for
383  // runs with large numbers of MPI processes.
384  const bool verbose = Behavior::verbose("DistObject");
385  std::unique_ptr<std::string> prefix;
386  if (verbose) {
387  prefix = this->createPrefix("DistObject", modeString);
388  std::ostringstream os;
389  os << *prefix << "Start" << endl;
390  std::cerr << os.str ();
391  }
392  this->beginExport(source, importer, CM, restrictedMode);
393  this->endExport(source, importer, CM, restrictedMode);
394  if (verbose) {
395  std::ostringstream os;
396  os << *prefix << "Done" << endl;
397  std::cerr << os.str ();
398  }
399  }
400 
401  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
402  void
404  beginImport(const SrcDistObject& source,
406  const CombineMode CM,
407  const bool restrictedMode)
408  {
409  using Details::Behavior;
410  using std::endl;
411  const char modeString[] = "doImport (forward mode)";
412 
413  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
414  // output to std::cerr on every MPI process. This is unwise for
415  // runs with large numbers of MPI processes.
416  const bool verbose = Behavior::verbose("DistObject");
417  std::unique_ptr<std::string> prefix;
418  if (verbose) {
419  prefix = this->createPrefix("DistObject", modeString);
420  std::ostringstream os;
421  os << *prefix << "Start" << endl;
422  std::cerr << os.str ();
423  }
424  this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
425  if (verbose) {
426  std::ostringstream os;
427  os << *prefix << "Done" << endl;
428  std::cerr << os.str ();
429  }
430  }
431 
432  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
433  void
434  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
435  beginExport(const SrcDistObject& source,
436  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
437  const CombineMode CM,
438  const bool restrictedMode)
439  {
440  using Details::Behavior;
441  using std::endl;
442  const char modeString[] = "doExport (forward mode)";
443 
444  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
445  // output to std::cerr on every MPI process. This is unwise for
446  // runs with large numbers of MPI processes.
447  const bool verbose = Behavior::verbose("DistObject");
448  std::unique_ptr<std::string> prefix;
449  if (verbose) {
450  prefix = this->createPrefix("DistObject", modeString);
451  std::ostringstream os;
452  os << *prefix << "Start" << endl;
453  std::cerr << os.str ();
454  }
455  this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
456  if (verbose) {
457  std::ostringstream os;
458  os << *prefix << "Done" << endl;
459  std::cerr << os.str ();
460  }
461  }
462 
463  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
464  void
465  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
466  beginImport(const SrcDistObject& source,
467  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
468  const CombineMode CM,
469  const bool restrictedMode)
470  {
471  using Details::Behavior;
472  using std::endl;
473  const char modeString[] = "doImport (reverse mode)";
474 
475  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
476  // output to std::cerr on every MPI process. This is unwise for
477  // runs with large numbers of MPI processes.
478  const bool verbose = Behavior::verbose("DistObject");
479  std::unique_ptr<std::string> prefix;
480  if (verbose) {
481  prefix = this->createPrefix("DistObject", modeString);
482  std::ostringstream os;
483  os << *prefix << "Start" << endl;
484  std::cerr << os.str ();
485  }
486  this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
487  if (verbose) {
488  std::ostringstream os;
489  os << *prefix << "Done" << endl;
490  std::cerr << os.str ();
491  }
492  }
493 
494  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
495  void
496  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
497  beginExport(const SrcDistObject& source,
498  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
499  const CombineMode CM,
500  const bool restrictedMode)
501  {
502  using Details::Behavior;
503  using std::endl;
504  const char modeString[] = "doExport (reverse mode)";
505 
506  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
507  // output to std::cerr on every MPI process. This is unwise for
508  // runs with large numbers of MPI processes.
509  const bool verbose = Behavior::verbose("DistObject");
510  std::unique_ptr<std::string> prefix;
511  if (verbose) {
512  prefix = this->createPrefix("DistObject", modeString);
513  std::ostringstream os;
514  os << *prefix << "Start" << endl;
515  std::cerr << os.str ();
516  }
517  this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
518  if (verbose) {
519  std::ostringstream os;
520  os << *prefix << "Done" << endl;
521  std::cerr << os.str ();
522  }
523  }
524 
525  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
526  void
527  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
528  endImport(const SrcDistObject& source,
529  const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
530  const CombineMode CM,
531  const bool restrictedMode)
532  {
533  using Details::Behavior;
534  using std::endl;
535  const char modeString[] = "doImport (forward mode)";
536 
537  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
538  // output to std::cerr on every MPI process. This is unwise for
539  // runs with large numbers of MPI processes.
540  const bool verbose = Behavior::verbose("DistObject");
541  std::unique_ptr<std::string> prefix;
542  if (verbose) {
543  prefix = this->createPrefix("DistObject", modeString);
544  std::ostringstream os;
545  os << *prefix << "Start" << endl;
546  std::cerr << os.str ();
547  }
548  this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
549  if (verbose) {
550  std::ostringstream os;
551  os << *prefix << "Done" << endl;
552  std::cerr << os.str ();
553  }
554  }
555 
556  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
557  void
558  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
559  endExport(const SrcDistObject& source,
560  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
561  const CombineMode CM,
562  const bool restrictedMode)
563  {
564  using Details::Behavior;
565  using std::endl;
566  const char modeString[] = "doExport (forward mode)";
567 
568  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
569  // output to std::cerr on every MPI process. This is unwise for
570  // runs with large numbers of MPI processes.
571  const bool verbose = Behavior::verbose("DistObject");
572  std::unique_ptr<std::string> prefix;
573  if (verbose) {
574  prefix = this->createPrefix("DistObject", modeString);
575  std::ostringstream os;
576  os << *prefix << "Start" << endl;
577  std::cerr << os.str ();
578  }
579  this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
580  if (verbose) {
581  std::ostringstream os;
582  os << *prefix << "Done" << endl;
583  std::cerr << os.str ();
584  }
585  }
586 
587  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
588  void
589  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
590  endImport(const SrcDistObject& source,
591  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
592  const CombineMode CM,
593  const bool restrictedMode)
594  {
595  using Details::Behavior;
596  using std::endl;
597  const char modeString[] = "doImport (reverse mode)";
598 
599  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
600  // output to std::cerr on every MPI process. This is unwise for
601  // runs with large numbers of MPI processes.
602  const bool verbose = Behavior::verbose("DistObject");
603  std::unique_ptr<std::string> prefix;
604  if (verbose) {
605  prefix = this->createPrefix("DistObject", modeString);
606  std::ostringstream os;
607  os << *prefix << "Start" << endl;
608  std::cerr << os.str ();
609  }
610  this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
611  if (verbose) {
612  std::ostringstream os;
613  os << *prefix << "Done" << endl;
614  std::cerr << os.str ();
615  }
616  }
617 
618  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
619  void
620  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
621  endExport(const SrcDistObject& source,
622  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
623  const CombineMode CM,
624  const bool restrictedMode)
625  {
626  using Details::Behavior;
627  using std::endl;
628  const char modeString[] = "doExport (reverse mode)";
629 
630  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
631  // output to std::cerr on every MPI process. This is unwise for
632  // runs with large numbers of MPI processes.
633  const bool verbose = Behavior::verbose("DistObject");
634  std::unique_ptr<std::string> prefix;
635  if (verbose) {
636  prefix = this->createPrefix("DistObject", modeString);
637  std::ostringstream os;
638  os << *prefix << "Start" << endl;
639  std::cerr << os.str ();
640  }
641  this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
642  if (verbose) {
643  std::ostringstream os;
644  os << *prefix << "Done" << endl;
645  std::cerr << os.str ();
646  }
647  }
648 
649  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
650  bool
651  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
652  isDistributed () const {
653  return map_->isDistributed ();
654  }
655 
656  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
657  size_t
660  return 0; // default implementation; subclasses may override
661  }
662 
663  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
664  void
667  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
668  const char modeString[],
669  const ReverseOption revOp,
670  const CombineMode CM,
671  bool restrictedMode)
672  {
673  beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
674  endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
675  }
676 
677  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
678  bool
680  reallocImportsIfNeeded (const size_t newSize,
681  const bool verbose,
682  const std::string* prefix,
683  const bool /*remoteLIDsContiguous*/,
684  const CombineMode /*CM*/)
685  {
686  if (verbose) {
687  std::ostringstream os;
688  os << *prefix << "Realloc (if needed) imports_ from "
689  << imports_.extent (0) << " to " << newSize << std::endl;
690  std::cerr << os.str ();
691  }
693  const bool reallocated =
694  reallocDualViewIfNeeded (this->imports_, newSize, "imports");
695  if (verbose) {
696  std::ostringstream os;
697  os << *prefix << "Finished realloc'ing imports_" << std::endl;
698  std::cerr << os.str ();
699  }
700  return reallocated;
701  }
702 
703  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
704  bool
706  reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
707  const size_t numImportLIDs)
708  {
709  using Details::Behavior;
712  using std::endl;
713  // If an array is already allocated, and if is at least
714  // tooBigFactor times bigger than it needs to be, free it and
715  // reallocate to the size we need, in order to save space.
716  // Otherwise, take subviews to reduce allocation size.
717  constexpr size_t tooBigFactor = 10;
718 
719  const bool verbose = Behavior::verbose("DistObject");
720  std::unique_ptr<std::string> prefix;
721  if (verbose) {
722  prefix = this->createPrefix("DistObject",
723  "reallocArraysForNumPacketsPerLid");
724  std::ostringstream os;
725  os << *prefix
726  << "numExportLIDs: " << numExportLIDs
727  << ", numImportLIDs: " << numImportLIDs
728  << endl;
729  os << *prefix << "DualView status before:" << endl
730  << *prefix
731  << dualViewStatusToString (this->numExportPacketsPerLID_,
732  "numExportPacketsPerLID_")
733  << endl
734  << *prefix
735  << dualViewStatusToString (this->numImportPacketsPerLID_,
736  "numImportPacketsPerLID_")
737  << endl;
738  std::cerr << os.str ();
739  }
740 
741  // Reallocate numExportPacketsPerLID_ if needed.
742  const bool firstReallocated =
743  reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
744  numExportLIDs,
745  "numExportPacketsPerLID",
746  tooBigFactor,
747  true); // need fence before, if realloc'ing
748 
749  // If we reallocated above, then we fenced after that
750  // reallocation. This means that we don't need to fence again,
751  // before the next reallocation.
752  const bool needFenceBeforeNextAlloc = ! firstReallocated;
753  const bool secondReallocated =
754  reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
755  numImportLIDs,
756  "numImportPacketsPerLID",
757  tooBigFactor,
758  needFenceBeforeNextAlloc);
759 
760  if (verbose) {
761  std::ostringstream os;
762  os << *prefix << "DualView status after:" << endl
763  << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
764  "numExportPacketsPerLID_")
765  << endl
766  << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
767  "numImportPacketsPerLID_")
768  << endl;
769  std::cerr << os.str ();
770  }
771 
772  return firstReallocated || secondReallocated;
773  }
774 
775  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
776  void
779  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
780  const char modeString[],
781  const ReverseOption revOp,
782  const CombineMode CM,
783  bool restrictedMode)
784  {
785  using Details::Behavior;
789  using Kokkos::Compat::getArrayView;
790  using Kokkos::Compat::getConstArrayView;
791  using Kokkos::Compat::getKokkosViewDeepCopy;
792  using Kokkos::Compat::create_const_view;
793  using std::endl;
796  const char funcName[] = "Tpetra::DistObject::doTransfer";
797 
798  ProfilingRegion region_doTransfer(funcName);
799  const bool verbose = Behavior::verbose("DistObject");
800  std::shared_ptr<std::string> prefix;
801  if (verbose) {
802  std::ostringstream os;
803  prefix = this->createPrefix("DistObject", "doTransfer");
804  os << *prefix << "Source type: " << Teuchos::typeName(src)
805  << ", Target type: " << Teuchos::typeName(*this) << endl;
806  std::cerr << os.str();
807  }
808 
809  // "Restricted Mode" does two things:
810  // 1) Skips copyAndPermute
811  // 2) Allows the "target" Map of the transfer to be a subset of
812  // the Map of *this, in a "locallyFitted" sense.
813  //
814  // This cannot be used if #2 is not true, OR there are permutes.
815  // Source Maps still need to match
816 
817  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
818  // checks. These may communicate more.
819  const bool debug = Behavior::debug("DistObject");
820  if (debug) {
821  if (! restrictedMode && revOp == DoForward) {
822  const bool myMapSameAsTransferTgtMap =
823  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
824  TEUCHOS_TEST_FOR_EXCEPTION
825  (! myMapSameAsTransferTgtMap, std::invalid_argument,
826  "Tpetra::DistObject::" << modeString << ": For forward-mode "
827  "communication, the target DistObject's Map must be the same "
828  "(in the sense of Tpetra::Map::isSameAs) as the input "
829  "Export/Import object's target Map.");
830  }
831  else if (! restrictedMode && revOp == DoReverse) {
832  const bool myMapSameAsTransferSrcMap =
833  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
834  TEUCHOS_TEST_FOR_EXCEPTION
835  (! myMapSameAsTransferSrcMap, std::invalid_argument,
836  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
837  "communication, the target DistObject's Map must be the same "
838  "(in the sense of Tpetra::Map::isSameAs) as the input "
839  "Export/Import object's source Map.");
840  }
841  else if (restrictedMode && revOp == DoForward) {
842  const bool myMapLocallyFittedTransferTgtMap =
843  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
844  TEUCHOS_TEST_FOR_EXCEPTION
845  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
846  "Tpetra::DistObject::" << modeString << ": For forward-mode "
847  "communication using restricted mode, Export/Import object's "
848  "target Map must be locally fitted (in the sense of "
849  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
850  }
851  else { // if (restrictedMode && revOp == DoReverse)
852  const bool myMapLocallyFittedTransferSrcMap =
853  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
854  TEUCHOS_TEST_FOR_EXCEPTION
855  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
856  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
857  "communication using restricted mode, Export/Import object's "
858  "source Map must be locally fitted (in the sense of "
859  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
860  }
861 
862  // SrcDistObject need not even _have_ Maps. However, if the
863  // source object is a DistObject, it has a Map, and we may
864  // compare that Map with the Transfer's Maps.
865  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
866  if (srcDistObj != nullptr) {
867  if (revOp == DoForward) {
868  const bool srcMapSameAsImportSrcMap =
869  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
870  TEUCHOS_TEST_FOR_EXCEPTION
871  (! srcMapSameAsImportSrcMap, std::invalid_argument,
872  "Tpetra::DistObject::" << modeString << ": For forward-mode "
873  "communication, the source DistObject's Map must be the same "
874  "as the input Export/Import object's source Map.");
875  }
876  else { // revOp == DoReverse
877  const bool srcMapSameAsImportTgtMap =
878  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
879  TEUCHOS_TEST_FOR_EXCEPTION
880  (! srcMapSameAsImportTgtMap, std::invalid_argument,
881  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
882  "communication, the source DistObject's Map must be the same "
883  "as the input Export/Import object's target Map.");
884  }
885  }
886  }
887 
888  const size_t numSameIDs = transfer.getNumSameIDs ();
889  Distributor& distor = transfer.getDistributor ();
890  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
891 
892  TEUCHOS_TEST_FOR_EXCEPTION
893  (debug && restrictedMode &&
894  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
895  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
896  std::invalid_argument,
897  "Tpetra::DistObject::" << modeString << ": Transfer object "
898  "cannot have permutes in restricted mode.");
899 
900  // Do we need all communication buffers to live on host?
901  const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
902  if (verbose) {
903  std::ostringstream os;
904  os << *prefix << "doTransfer: Use new interface; "
905  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
906  std::cerr << os.str ();
907  }
908 
909  using const_lo_dv_type =
910  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
911  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
912  transfer.getPermuteToLIDs_dv () :
913  transfer.getPermuteFromLIDs_dv ();
914  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
915  transfer.getPermuteFromLIDs_dv () :
916  transfer.getPermuteToLIDs_dv ();
917  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
918  transfer.getRemoteLIDs_dv () :
919  transfer.getExportLIDs_dv ();
920  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
921  transfer.getExportLIDs_dv () :
922  transfer.getRemoteLIDs_dv ();
923  const bool canTryAliasing = (revOp == DoForward) ?
924  transfer.areRemoteLIDsContiguous() :
925  transfer.areExportLIDsContiguous();
926  // const bool canTryAliasing = false;
927 
928  ProfilingRegion region_dTN(funcName);
929 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
930  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
931  // of Kokkos profiling.
932  Teuchos::TimeMonitor doXferMon (*doXferTimer_);
933 #endif // HAVE_TPETRA_TRANSFER_TIMERS
934 
935  if (verbose) {
936  std::ostringstream os;
937  os << *prefix << "Input arguments:" << endl
938  << *prefix << " combineMode: " << combineModeToString (CM) << endl
939  << *prefix << " numSameIDs: " << numSameIDs << endl
940  << *prefix << " "
941  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
942  << *prefix << " "
943  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
944  << *prefix << " "
945  << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
946  << *prefix << " "
947  << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
948  << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
949  << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
950  std::cerr << os.str ();
951  }
952 
953  {
954  ProfilingRegion region_cs ("Tpetra::DistObject::doTransferNew::checkSizes");
955  if (verbose) {
956  std::ostringstream os;
957  os << *prefix << "1. checkSizes" << endl;
958  std::cerr << os.str ();
959  }
960  const bool checkSizesResult = this->checkSizes (src);
961  TEUCHOS_TEST_FOR_EXCEPTION
962  (! checkSizesResult, std::invalid_argument,
963  "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
964  "destination object is not a legal target for redistribution from the "
965  "source object. This probably means that they do not have the same "
966  "dimensions. For example, MultiVectors must have the same number of "
967  "rows and columns.");
968  }
969 
970  // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
971  // that if CM == INSERT || CM == REPLACE, the target object could
972  // be write only. We don't optimize for that here.
973 
974  if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
975  // There is at least one GID to copy or permute.
976  if (verbose) {
977  std::ostringstream os;
978  os << *prefix << "2. copyAndPermute" << endl;
979  std::cerr << os.str ();
980  }
981  ProfilingRegion region_cp
982  ("Tpetra::DistObject::doTransferNew::copyAndPermute");
983 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
984  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
985  // of Kokkos profiling.
986  Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
987 #endif // HAVE_TPETRA_TRANSFER_TIMERS
988 
989  if (numSameIDs + permuteToLIDs.extent (0) != 0) {
990  // There is at least one GID to copy or permute.
991  if (verbose) {
992  std::ostringstream os;
993  os << *prefix << "2. copyAndPermute" << endl;
994  std::cerr << os.str ();
995  }
996  this->copyAndPermute (src, numSameIDs, permuteToLIDs,
997  permuteFromLIDs, CM);
998  if (verbose) {
999  std::ostringstream os;
1000  os << *prefix << "After copyAndPermute:" << endl
1001  << *prefix << " "
1002  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
1003  << endl
1004  << *prefix << " "
1005  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
1006  << endl;
1007  std::cerr << os.str ();
1008  }
1009  }
1010  }
1011 
1012  // The method may return zero even if the implementation actually
1013  // does have a constant number of packets per LID. However, if it
1014  // returns nonzero, we may use this information to avoid
1015  // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
1016  // will set this to its final value.
1017  //
1018  // We only need this if CM != ZERO, but it has to be lifted out of
1019  // that scope because there are multiple tests for CM != ZERO.
1020  size_t constantNumPackets = this->constantNumberOfPackets ();
1021  if (verbose) {
1022  std::ostringstream os;
1023  os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
1024  std::cerr << os.str ();
1025  }
1026 
1027  // We only need to pack communication buffers if the combine mode
1028  // is not ZERO. A "ZERO combine mode" means that the results are
1029  // the same as if we had received all zeros, and added them to the
1030  // existing values. That means we don't need to communicate.
1031  if (CM != ZERO) {
1032  if (constantNumPackets == 0) {
1033  if (verbose) {
1034  std::ostringstream os;
1035  os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1036  << endl;
1037  std::cerr << os.str ();
1038  }
1039  // This only reallocates if necessary, that is, if the sizes
1040  // don't match.
1041  this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1042  remoteLIDs.extent (0));
1043  }
1044 
1045  if (verbose) {
1046  std::ostringstream os;
1047  os << *prefix << "4. packAndPrepare: before, "
1048  << dualViewStatusToString (this->exports_, "exports_")
1049  << endl;
1050  std::cerr << os.str ();
1051  }
1052 
1053  doPackAndPrepare(src, exportLIDs, constantNumPackets);
1054  if (commOnHost) {
1055  this->exports_.sync_host();
1056  }
1057  else {
1058  this->exports_.sync_device();
1059  }
1060 
1061  if (verbose) {
1062  std::ostringstream os;
1063  os << *prefix << "5.1. After packAndPrepare, "
1064  << dualViewStatusToString (this->exports_, "exports_")
1065  << endl;
1066  std::cerr << os.str ();
1067  }
1068  } // if (CM != ZERO)
1069 
1070  // We only need to send data if the combine mode is not ZERO.
1071  if (CM != ZERO) {
1072  if (constantNumPackets != 0) {
1073  // There are a constant number of packets per element. We
1074  // already know (from the number of "remote" (incoming)
1075  // elements) how many incoming elements we expect, so we can
1076  // resize the buffer accordingly.
1077  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1078  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1079  }
1080 
1081  // Do we need to do communication (via doPostsAndWaits)?
1082  bool needCommunication = true;
1083 
1084  // This may be NULL. It will be used below.
1085  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1086 
1087  if (revOp == DoReverse && ! this->isDistributed ()) {
1088  needCommunication = false;
1089  }
1090  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1091  // is distributed requires a cast to DistObject. If it's not a
1092  // DistObject, then I'm not quite sure what to do. Perhaps it
1093  // would be more appropriate for SrcDistObject to have an
1094  // isDistributed() method. For now, I'll just assume that we
1095  // need to do communication unless the cast succeeds and the
1096  // source is not distributed.
1097  else if (revOp == DoForward && srcDistObj != NULL &&
1098  ! srcDistObj->isDistributed ()) {
1099  needCommunication = false;
1100  }
1101 
1102  if (! needCommunication) {
1103  if (verbose) {
1104  std::ostringstream os;
1105  os << *prefix << "Comm not needed; skipping" << endl;
1106  std::cerr << os.str ();
1107  }
1108  }
1109  else {
1110  ProfilingRegion region_dpw
1111  ("Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1112 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1113  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1114  // favor of Kokkos profiling.
1115  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1116 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1117 
1118  if (verbose) {
1119  std::ostringstream os;
1120  os << *prefix << "7.0. "
1121  << (revOp == DoReverse ? "Reverse" : "Forward")
1122  << " mode" << endl;
1123  std::cerr << os.str ();
1124  }
1125 
1126  doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1127  } // if (needCommunication)
1128  } // if (CM != ZERO)
1129  }
1130 
1131  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1132  void
1134  endTransfer(const SrcDistObject& src,
1135  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1136  const char modeString[],
1137  const ReverseOption revOp,
1138  const CombineMode CM,
1139  bool restrictedMode)
1140  {
1141  using Details::Behavior;
1145  using Kokkos::Compat::getArrayView;
1146  using Kokkos::Compat::getConstArrayView;
1147  using Kokkos::Compat::getKokkosViewDeepCopy;
1148  using Kokkos::Compat::create_const_view;
1149  using std::endl;
1152  const char funcName[] = "Tpetra::DistObject::doTransfer";
1153 
1154  ProfilingRegion region_doTransfer(funcName);
1155  const bool verbose = Behavior::verbose("DistObject");
1156  std::shared_ptr<std::string> prefix;
1157  if (verbose) {
1158  std::ostringstream os;
1159  prefix = this->createPrefix("DistObject", "doTransfer");
1160  os << *prefix << "Source type: " << Teuchos::typeName(src)
1161  << ", Target type: " << Teuchos::typeName(*this) << endl;
1162  std::cerr << os.str();
1163  }
1164 
1165  // "Restricted Mode" does two things:
1166  // 1) Skips copyAndPermute
1167  // 2) Allows the "target" Map of the transfer to be a subset of
1168  // the Map of *this, in a "locallyFitted" sense.
1169  //
1170  // This cannot be used if #2 is not true, OR there are permutes.
1171  // Source Maps still need to match
1172 
1173  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1174  // checks. These may communicate more.
1175  const bool debug = Behavior::debug("DistObject");
1176  if (debug) {
1177  if (! restrictedMode && revOp == DoForward) {
1178  const bool myMapSameAsTransferTgtMap =
1179  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1180  TEUCHOS_TEST_FOR_EXCEPTION
1181  (! myMapSameAsTransferTgtMap, std::invalid_argument,
1182  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1183  "communication, the target DistObject's Map must be the same "
1184  "(in the sense of Tpetra::Map::isSameAs) as the input "
1185  "Export/Import object's target Map.");
1186  }
1187  else if (! restrictedMode && revOp == DoReverse) {
1188  const bool myMapSameAsTransferSrcMap =
1189  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1190  TEUCHOS_TEST_FOR_EXCEPTION
1191  (! myMapSameAsTransferSrcMap, std::invalid_argument,
1192  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1193  "communication, the target DistObject's Map must be the same "
1194  "(in the sense of Tpetra::Map::isSameAs) as the input "
1195  "Export/Import object's source Map.");
1196  }
1197  else if (restrictedMode && revOp == DoForward) {
1198  const bool myMapLocallyFittedTransferTgtMap =
1199  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1200  TEUCHOS_TEST_FOR_EXCEPTION
1201  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1202  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1203  "communication using restricted mode, Export/Import object's "
1204  "target Map must be locally fitted (in the sense of "
1205  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1206  }
1207  else { // if (restrictedMode && revOp == DoReverse)
1208  const bool myMapLocallyFittedTransferSrcMap =
1209  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1210  TEUCHOS_TEST_FOR_EXCEPTION
1211  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1212  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1213  "communication using restricted mode, Export/Import object's "
1214  "source Map must be locally fitted (in the sense of "
1215  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1216  }
1217 
1218  // SrcDistObject need not even _have_ Maps. However, if the
1219  // source object is a DistObject, it has a Map, and we may
1220  // compare that Map with the Transfer's Maps.
1221  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1222  if (srcDistObj != nullptr) {
1223  if (revOp == DoForward) {
1224  const bool srcMapSameAsImportSrcMap =
1225  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1226  TEUCHOS_TEST_FOR_EXCEPTION
1227  (! srcMapSameAsImportSrcMap, std::invalid_argument,
1228  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1229  "communication, the source DistObject's Map must be the same "
1230  "as the input Export/Import object's source Map.");
1231  }
1232  else { // revOp == DoReverse
1233  const bool srcMapSameAsImportTgtMap =
1234  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1235  TEUCHOS_TEST_FOR_EXCEPTION
1236  (! srcMapSameAsImportTgtMap, std::invalid_argument,
1237  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1238  "communication, the source DistObject's Map must be the same "
1239  "as the input Export/Import object's target Map.");
1240  }
1241  }
1242  }
1243 
1244  Distributor& distor = transfer.getDistributor ();
1245  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1246 
1247  TEUCHOS_TEST_FOR_EXCEPTION
1248  (debug && restrictedMode &&
1249  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1250  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1251  std::invalid_argument,
1252  "Tpetra::DistObject::" << modeString << ": Transfer object "
1253  "cannot have permutes in restricted mode.");
1254 
1255  // Do we need all communication buffers to live on host?
1256  const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
1257  if (verbose) {
1258  std::ostringstream os;
1259  os << *prefix << "doTransfer: Use new interface; "
1260  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
1261  std::cerr << os.str ();
1262  }
1263 
1264  using const_lo_dv_type =
1265  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1266  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1267  transfer.getPermuteToLIDs_dv () :
1268  transfer.getPermuteFromLIDs_dv ();
1269  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1270  transfer.getPermuteFromLIDs_dv () :
1271  transfer.getPermuteToLIDs_dv ();
1272  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1273  transfer.getRemoteLIDs_dv () :
1274  transfer.getExportLIDs_dv ();
1275  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1276  transfer.getExportLIDs_dv () :
1277  transfer.getRemoteLIDs_dv ();
1278  const bool canTryAliasing = (revOp == DoForward) ?
1279  transfer.areRemoteLIDsContiguous() :
1280  transfer.areExportLIDsContiguous();
1281 
1282  size_t constantNumPackets = this->constantNumberOfPackets ();
1283 
1284  // We only need to send data if the combine mode is not ZERO.
1285  if (CM != ZERO) {
1286  if (constantNumPackets != 0) {
1287  // There are a constant number of packets per element. We
1288  // already know (from the number of "remote" (incoming)
1289  // elements) how many incoming elements we expect, so we can
1290  // resize the buffer accordingly.
1291  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1292  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1293  }
1294 
1295  // Do we need to do communication (via doPostsAndWaits)?
1296  bool needCommunication = true;
1297 
1298  // This may be NULL. It will be used below.
1299  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1300 
1301  if (revOp == DoReverse && ! this->isDistributed ()) {
1302  needCommunication = false;
1303  }
1304  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1305  // is distributed requires a cast to DistObject. If it's not a
1306  // DistObject, then I'm not quite sure what to do. Perhaps it
1307  // would be more appropriate for SrcDistObject to have an
1308  // isDistributed() method. For now, I'll just assume that we
1309  // need to do communication unless the cast succeeds and the
1310  // source is not distributed.
1311  else if (revOp == DoForward && srcDistObj != NULL &&
1312  ! srcDistObj->isDistributed ()) {
1313  needCommunication = false;
1314  }
1315 
1316  if (! needCommunication) {
1317  if (verbose) {
1318  std::ostringstream os;
1319  os << *prefix << "Comm not needed; skipping" << endl;
1320  std::cerr << os.str ();
1321  }
1322  }
1323  else {
1324  distributorActor_.doWaits(distributorPlan);
1325 
1326  if (verbose) {
1327  std::ostringstream os;
1328  os << *prefix << "8. unpackAndCombine" << endl;
1329  std::cerr << os.str ();
1330  }
1331  doUnpackAndCombine(remoteLIDs, constantNumPackets, CM);
1332  } // if (needCommunication)
1333  } // if (CM != ZERO)
1334 
1335  if (verbose) {
1336  std::ostringstream os;
1337  os << *prefix << "9. Done!" << endl;
1338  std::cerr << os.str ();
1339  }
1340 
1341  if (verbose) {
1342  std::ostringstream os;
1343  os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1344  std::cerr << os.str ();
1345  }
1346  }
1347 
1348  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1349  void
1350  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1351  doPosts(const Details::DistributorPlan& distributorPlan,
1352  size_t constantNumPackets,
1353  bool commOnHost,
1354  std::shared_ptr<std::string> prefix,
1355  const bool canTryAliasing,
1356  const CombineMode CM)
1357  {
1360  using Kokkos::Compat::create_const_view;
1361  using std::endl;
1362 
1363  const bool verbose = Details::Behavior::verbose("DistObject");
1364 
1365  if (constantNumPackets == 0) { // variable num packets per LID
1366  if (verbose) {
1367  std::ostringstream os;
1368  os << *prefix << "7.1. Variable # packets / LID: first comm "
1369  << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1370  << endl;
1371  std::cerr << os.str ();
1372  }
1373  size_t totalImportPackets = 0;
1374  if (commOnHost) {
1375  if (this->numExportPacketsPerLID_.need_sync_host ()) {
1376  this->numExportPacketsPerLID_.sync_host ();
1377  }
1378  if (this->numImportPacketsPerLID_.need_sync_host ()) {
1379  this->numImportPacketsPerLID_.sync_host ();
1380  }
1381  this->numImportPacketsPerLID_.modify_host (); // out arg
1382  auto numExp_h =
1383  create_const_view (this->numExportPacketsPerLID_.view_host ());
1384  auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1385 
1386  // MPI communication happens here.
1387  if (verbose) {
1388  std::ostringstream os;
1389  os << *prefix << "Call doPostsAndWaits"
1390  << endl;
1391  std::cerr << os.str ();
1392  }
1393  distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1394 
1395  if (verbose) {
1396  std::ostringstream os;
1397  os << *prefix << "Count totalImportPackets" << std::endl;
1398  std::cerr << os.str ();
1399  }
1400  using the_dev_type = typename decltype (numImp_h)::device_type;
1401  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1402  }
1403  else { // ! commOnHost
1404  this->numExportPacketsPerLID_.sync_device ();
1405  this->numImportPacketsPerLID_.sync_device ();
1406  this->numImportPacketsPerLID_.modify_device (); // out arg
1407  auto numExp_d = create_const_view
1408  (this->numExportPacketsPerLID_.view_device ());
1409  auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1410 
1411  // MPI communication happens here.
1412  if (verbose) {
1413  std::ostringstream os;
1414  os << *prefix << "Call doPostsAndWaits"
1415  << endl;
1416  std::cerr << os.str ();
1417  }
1418  distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1419 
1420  if (verbose) {
1421  std::ostringstream os;
1422  os << *prefix << "Count totalImportPackets" << std::endl;
1423  std::cerr << os.str ();
1424  }
1425  using the_dev_type = typename decltype (numImp_d)::device_type;
1426  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1427  }
1428 
1429  if (verbose) {
1430  std::ostringstream os;
1431  os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1432  std::cerr << os.str ();
1433  }
1434  this->reallocImportsIfNeeded (totalImportPackets, verbose,
1435  prefix.get (), canTryAliasing, CM);
1436  if (verbose) {
1437  std::ostringstream os;
1438  os << *prefix << "7.3. Second comm" << std::endl;
1439  std::cerr << os.str ();
1440  }
1441 
1442  // mfh 04 Feb 2019: Distributor expects the "num packets per
1443  // LID" arrays on host, so that it can issue MPI sends and
1444  // receives correctly.
1445  this->numExportPacketsPerLID_.sync_host ();
1446  this->numImportPacketsPerLID_.sync_host ();
1447 
1448  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1449  // doReversePostsAndWaits currently want
1450  // numExportPacketsPerLID and numImportPacketsPerLID as
1451  // Teuchos::ArrayView, rather than as Kokkos::View.
1452  //
1453  // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1454  // device. The above syncs might.
1455  auto numExportPacketsPerLID_av =
1456  getArrayViewFromDualView (this->numExportPacketsPerLID_);
1457  auto numImportPacketsPerLID_av =
1458  getArrayViewFromDualView (this->numImportPacketsPerLID_);
1459 
1460  // imports_ is for output only, so we don't need to sync it
1461  // before marking it as modified. However, in order to
1462  // prevent spurious debug-mode errors (e.g., "modified on
1463  // both device and host"), we first need to clear its
1464  // "modified" flags.
1465  this->imports_.clear_sync_state ();
1466 
1467  if (verbose) {
1468  std::ostringstream os;
1469  os << *prefix << "Comm on "
1470  << (commOnHost ? "host" : "device")
1471  << "; call doPosts" << endl;
1472  std::cerr << os.str ();
1473  }
1474 
1475  if (commOnHost) {
1476  this->imports_.modify_host ();
1477  distributorActor_.doPosts
1478  (distributorPlan,
1479  create_const_view (this->exports_.view_host ()),
1480  numExportPacketsPerLID_av,
1481  this->imports_.view_host (),
1482  numImportPacketsPerLID_av);
1483  }
1484  else { // pack on device
1485  Kokkos::fence(); // for UVM
1486  this->imports_.modify_device ();
1487  distributorActor_.doPosts
1488  (distributorPlan,
1489  create_const_view (this->exports_.view_device ()),
1490  numExportPacketsPerLID_av,
1491  this->imports_.view_device (),
1492  numImportPacketsPerLID_av);
1493  }
1494  }
1495  else { // constant number of packets per LID
1496  if (verbose) {
1497  std::ostringstream os;
1498  os << *prefix << "7.1. Const # packets per LID: " << endl
1499  << *prefix << " "
1500  << dualViewStatusToString (this->exports_, "exports_")
1501  << endl
1502  << *prefix << " "
1503  << dualViewStatusToString (this->exports_, "imports_")
1504  << endl;
1505  std::cerr << os.str ();
1506  }
1507  // imports_ is for output only, so we don't need to sync it
1508  // before marking it as modified. However, in order to
1509  // prevent spurious debug-mode errors (e.g., "modified on
1510  // both device and host"), we first need to clear its
1511  // "modified" flags.
1512  this->imports_.clear_sync_state ();
1513 
1514  if (verbose) {
1515  std::ostringstream os;
1516  os << *prefix << "7.2. Comm on "
1517  << (commOnHost ? "host" : "device")
1518  << "; call doPosts" << endl;
1519  std::cerr << os.str ();
1520  }
1521  if (commOnHost) {
1522  this->imports_.modify_host ();
1523  distributorActor_.doPosts
1524  (distributorPlan,
1525  create_const_view (this->exports_.view_host ()),
1526  constantNumPackets,
1527  this->imports_.view_host ());
1528  }
1529  else { // pack on device
1530  Kokkos::fence(); // for UVM
1531  this->imports_.modify_device ();
1532  distributorActor_.doPosts
1533  (distributorPlan,
1534  create_const_view (this->exports_.view_device ()),
1535  constantNumPackets,
1536  this->imports_.view_device ());
1537  } // commOnHost
1538  } // constant or variable num packets per LID
1539  }
1540 
1541  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1542  void
1543  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1544  doPackAndPrepare(const SrcDistObject& src,
1545  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1546  size_t& constantNumPackets)
1547  {
1548  using Details::ProfilingRegion;
1549  using std::endl;
1550  const bool debug = Details::Behavior::debug("DistObject");
1551 
1552  ProfilingRegion region_pp
1553  ("Tpetra::DistObject::doTransferNew::packAndPrepare");
1554 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1555  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1556  // favor of Kokkos profiling.
1557  Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1558 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1559 
1560  // Ask the source to pack data. Also ask it whether there are
1561  // a constant number of packets per element
1562  // (constantNumPackets is an output argument). If there are,
1563  // constantNumPackets will come back nonzero. Otherwise, the
1564  // source will fill the numExportPacketsPerLID_ array.
1565 
1566  // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1567  // Alternately, make packAndPrepare take a "commOnHost"
1568  // argument to tell it where to leave the data?
1569  //
1570  // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1571  // the freedom to pack and unpack either on host or device.
1572  // We should prefer sync'ing only on demand. Thus, we can
1573  // answer the above question: packAndPrepare should not
1574  // take a commOnHost argument, and doTransferNew should sync
1575  // where needed, if needed.
1576  if (debug) {
1577  std::ostringstream lclErrStrm;
1578  bool lclSuccess = false;
1579  try {
1580  this->packAndPrepare (src, exportLIDs, this->exports_,
1581  this->numExportPacketsPerLID_,
1582  constantNumPackets);
1583  lclSuccess = true;
1584  }
1585  catch (std::exception& e) {
1586  lclErrStrm << "packAndPrepare threw an exception: "
1587  << endl << e.what();
1588  }
1589  catch (...) {
1590  lclErrStrm << "packAndPrepare threw an exception "
1591  "not a subclass of std::exception.";
1592  }
1593  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1594  "threw an exception in packAndPrepare on "
1595  "one or more processes in the DistObject's communicator.";
1596  auto comm = getMap()->getComm();
1597  Details::checkGlobalError(std::cerr, lclSuccess,
1598  lclErrStrm.str().c_str(),
1599  gblErrMsgHeader, *comm);
1600  }
1601  else {
1602  this->packAndPrepare (src, exportLIDs, this->exports_,
1603  this->numExportPacketsPerLID_,
1604  constantNumPackets);
1605  }
1606  }
1607 
1608  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1609  void
1610  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1611  doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1612  size_t constantNumPackets,
1613  CombineMode CM)
1614  {
1615  using Details::ProfilingRegion;
1616  using std::endl;
1617  const bool debug = Details::Behavior::debug("DistObject");
1618 
1619  ProfilingRegion region_uc
1620  ("Tpetra::DistObject::doTransferNew::unpackAndCombine");
1621 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1622  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1623  // favor of Kokkos profiling.
1624  Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1625 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1626 
1627  if (debug) {
1628  std::ostringstream lclErrStrm;
1629  bool lclSuccess = false;
1630  try {
1631  this->unpackAndCombine (remoteLIDs, this->imports_,
1632  this->numImportPacketsPerLID_,
1633  constantNumPackets, CM);
1634  lclSuccess = true;
1635  }
1636  catch (std::exception& e) {
1637  lclErrStrm << "unpackAndCombine threw an exception: "
1638  << endl << e.what();
1639  }
1640  catch (...) {
1641  lclErrStrm << "unpackAndCombine threw an exception "
1642  "not a subclass of std::exception.";
1643  }
1644  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1645  "threw an exception in unpackAndCombine on "
1646  "one or more processes in the DistObject's communicator.";
1647  auto comm = getMap()->getComm();
1648  Details::checkGlobalError(std::cerr, lclSuccess,
1649  lclErrStrm.str().c_str(),
1650  gblErrMsgHeader, *comm);
1651  }
1652  else {
1653  this->unpackAndCombine (remoteLIDs, this->imports_,
1654  this->numImportPacketsPerLID_,
1655  constantNumPackets, CM);
1656  }
1657  }
1658 
1659  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1660  void
1661  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1662  copyAndPermute
1663  (const SrcDistObject&,
1664  const size_t,
1665  const Kokkos::DualView<
1666  const local_ordinal_type*,
1668  const Kokkos::DualView<
1669  const local_ordinal_type*,
1671  const CombineMode CM)
1672  {}
1673 
1674  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1675  void
1678  (const SrcDistObject&,
1679  const Kokkos::DualView<
1680  const local_ordinal_type*,
1682  Kokkos::DualView<
1683  packet_type*,
1685  Kokkos::DualView<
1686  size_t*,
1688  size_t&)
1689  {}
1690 
1691  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1692  void
1695  (const Kokkos::DualView<
1696  const local_ordinal_type*,
1697  buffer_device_type>& /* importLIDs */,
1698  Kokkos::DualView<
1699  packet_type*,
1700  buffer_device_type> /* imports */,
1701  Kokkos::DualView<
1702  size_t*,
1703  buffer_device_type> /* numPacketsPerLID */,
1704  const size_t /* constantNumPackets */,
1705  const CombineMode /* combineMode */)
1706  {}
1707 
1708 
1709  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1710  void
1712  print (std::ostream& os) const
1713  {
1714  using Teuchos::FancyOStream;
1715  using Teuchos::getFancyOStream;
1716  using Teuchos::RCP;
1717  using Teuchos::rcpFromRef;
1718  using std::endl;
1719 
1720  RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os));
1721  this->describe (*out, Teuchos::VERB_DEFAULT);
1722  }
1723 
1724  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1725  std::unique_ptr<std::string>
1727  createPrefix(const char className[],
1728  const char methodName[]) const
1729  {
1730  auto map = this->getMap();
1731  auto comm = map.is_null() ? Teuchos::null : map->getComm();
1732  return Details::createPrefix(
1733  comm.getRawPtr(), className, methodName);
1734  }
1735 
1736  template<class DistObjectType>
1737  void
1738  removeEmptyProcessesInPlace(
1739  Teuchos::RCP<DistObjectType>& input,
1740  const Teuchos::RCP<const Map<
1741  typename DistObjectType::local_ordinal_type,
1742  typename DistObjectType::global_ordinal_type,
1743  typename DistObjectType::node_type>>& newMap)
1744  {
1745  input->removeEmptyProcessesInPlace (newMap);
1746  if (newMap.is_null ()) { // my process is excluded
1747  input = Teuchos::null;
1748  }
1749  }
1750 
1751  template<class DistObjectType>
1752  void
1753  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
1754  {
1755  auto newMap = input->getMap ()->removeEmptyProcesses ();
1756  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
1757  }
1758 
1759 // Explicit instantiation macro for general DistObject.
1760 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1761  template class DistObject< SCALAR , LO , GO , NODE >;
1762 
1763 // Explicit instantiation macro for DistObject<char, ...>.
1764 // The "SLGN" stuff above doesn't work for Packet=char.
1765 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1766  template class DistObject< char , LO , GO , NODE >;
1767 
1768 } // namespace Tpetra
1769 
1770 #endif // TPETRA_DISTOBJECT_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
const Details::DistributorPlan & getPlan() const
Get this Distributor&#39;s DistributorPlan.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object&#39;s Map.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
typename ::Kokkos::Details::ArithTraits< GlobalOrdinal >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Replace old values with zero.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
bool isDistributed() const
Whether this is a globally distributed object.
Stand-alone utility functions and macros.
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Base class for distributed Tpetra objects that support data redistribution.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Description of Tpetra&#39;s behavior.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.