Tpetra parallel linear algebra  Version of the Day
Tpetra_Map_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
44 
45 #ifndef TPETRA_MAP_DEF_HPP
46 #define TPETRA_MAP_DEF_HPP
47 
48 #include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
51 #include "Tpetra_Details_FixedHashTable.hpp"
54 #include "Tpetra_Core.hpp"
55 #include "Tpetra_Util.hpp"
56 #include "Teuchos_as.hpp"
57 #include "Teuchos_TypeNameTraits.hpp"
58 #include "Teuchos_CommHelpers.hpp"
59 #include "Tpetra_Details_mpiIsInitialized.hpp"
60 #include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
62 #include <memory>
63 #include <sstream>
64 #include <stdexcept>
65 #include <typeinfo>
66 
67 namespace { // (anonymous)
68 
69  template<class ExecutionSpace>
70  void
71  checkMapInputArray (const char ctorName[],
72  const void* indexList,
73  const size_t indexListSize,
74  const ExecutionSpace& execSpace,
75  const Teuchos::Comm<int>* const comm)
76  {
78 
79  const bool debug = Behavior::debug("Map");
80  if (debug) {
82  using Teuchos::outArg;
83  using Teuchos::REDUCE_MIN;
84  using Teuchos::reduceAll;
85  using std::endl;
86 
87  const int myRank = comm == nullptr ? 0 : comm->getRank ();
88  const bool verbose = Behavior::verbose("Map");
89  std::ostringstream lclErrStrm;
90  int lclSuccess = 1;
91 
92  if (indexListSize != 0 && indexList == nullptr) {
93  lclSuccess = 0;
94  if (verbose) {
95  lclErrStrm << "Proc " << myRank << ": indexList is null, "
96  "but indexListSize=" << indexListSize << " != 0." << endl;
97  }
98  }
99  else {
100  if (indexListSize != 0 && indexList != nullptr &&
101  ! pointerAccessibleFromExecutionSpace (indexList, execSpace)) {
102  lclSuccess = 0;
103  if (verbose) {
104  using ::Tpetra::Details::memorySpaceName;
105  const std::string memSpaceName = memorySpaceName (indexList);
106  const std::string execSpaceName =
107  Teuchos::TypeNameTraits<ExecutionSpace>::name ();
108  lclErrStrm << "Proc " << myRank << ": Input array is not "
109  "accessible from the required execution space " <<
110  execSpaceName << ". As far as I can tell, array lives "
111  "in memory space " << memSpaceName << "." << endl;
112  }
113  }
114  }
115  int gblSuccess = 0; // output argument
116  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
117  if (gblSuccess != 1) {
118  std::ostringstream gblErrStrm;
119  gblErrStrm << "Tpetra::Map constructor " << ctorName <<
120  " detected a problem with the input array "
121  "(raw array, Teuchos::ArrayView, or Kokkos::View) "
122  "of global indices." << endl;
123  if (verbose) {
124  using ::Tpetra::Details::gathervPrint;
125  gathervPrint (gblErrStrm, lclErrStrm.str (), *comm);
126  }
127  TEUCHOS_TEST_FOR_EXCEPTION
128  (true, std::invalid_argument, gblErrStrm.str ());
129  }
130  }
131  }
132 } // namespace (anonymous)
133 
134 namespace Tpetra {
135 
136  template <class LocalOrdinal, class GlobalOrdinal, class Node>
138  Map () :
139  comm_ (new Teuchos::SerialComm<int> ()),
140  indexBase_ (0),
141  numGlobalElements_ (0),
142  numLocalElements_ (0),
143  minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
144  maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
145  minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
146  maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
147  firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
148  lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
149  uniform_ (false), // trivially
150  contiguous_ (false),
151  distributed_ (false), // no communicator yet
152  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
153  {
155  }
156 
157 
158  template <class LocalOrdinal, class GlobalOrdinal, class Node>
160  Map (const global_size_t numGlobalElements,
161  const global_ordinal_type indexBase,
162  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
163  const LocalGlobal lOrG) :
164  comm_ (comm),
165  uniform_ (true),
166  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
167  {
168  using Teuchos::as;
169  using Teuchos::broadcast;
170  using Teuchos::outArg;
171  using Teuchos::reduceAll;
172  using Teuchos::REDUCE_MIN;
173  using Teuchos::REDUCE_MAX;
174  using Teuchos::typeName;
175  using std::endl;
176  using GO = global_ordinal_type;
177  using GST = global_size_t;
178  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
179  const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
180  const char exPfx[] =
181  "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
182 
183  const bool debug = Details::Behavior::debug("Map");
184  const bool verbose = Details::Behavior::verbose("Map");
185  std::unique_ptr<std::string> prefix;
186  if (verbose) {
187  prefix = Details::createPrefix(
188  comm_.getRawPtr(), "Map", funcName);
189  std::ostringstream os;
190  os << *prefix << "Start" << endl;
191  std::cerr << os.str();
192  }
194 
195  // In debug mode only, check whether numGlobalElements and
196  // indexBase are the same over all processes in the communicator.
197  if (debug) {
198  GST proc0NumGlobalElements = numGlobalElements;
199  broadcast(*comm_, 0, outArg(proc0NumGlobalElements));
200  GST minNumGlobalElements = numGlobalElements;
201  GST maxNumGlobalElements = numGlobalElements;
202  reduceAll(*comm, REDUCE_MIN, numGlobalElements,
203  outArg(minNumGlobalElements));
204  reduceAll(*comm, REDUCE_MAX, numGlobalElements,
205  outArg(maxNumGlobalElements));
206  TEUCHOS_TEST_FOR_EXCEPTION
207  (minNumGlobalElements != maxNumGlobalElements ||
208  numGlobalElements != minNumGlobalElements,
209  std::invalid_argument, exPfx << "All processes must "
210  "provide the same number of global elements. Process 0 set "
211  "numGlobalElements="<< proc0NumGlobalElements << ". The "
212  "calling process " << comm->getRank() << " set "
213  "numGlobalElements=" << numGlobalElements << ". The min "
214  "and max values over all processes are "
215  << minNumGlobalElements << " resp. " << maxNumGlobalElements
216  << ".");
217 
218  GO proc0IndexBase = indexBase;
219  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
220  GO minIndexBase = indexBase;
221  GO maxIndexBase = indexBase;
222  reduceAll(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
223  reduceAll(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
224  TEUCHOS_TEST_FOR_EXCEPTION
225  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
226  std::invalid_argument, exPfx << "All processes must "
227  "provide the same indexBase argument. Process 0 set "
228  "indexBase=" << proc0IndexBase << ". The calling process "
229  << comm->getRank() << " set indexBase=" << indexBase
230  << ". The min and max values over all processes are "
231  << minIndexBase << " resp. " << maxIndexBase << ".");
232  }
233 
234  // Distribute the elements across the processes in the given
235  // communicator so that global IDs (GIDs) are
236  //
237  // - Nonoverlapping (only one process owns each GID)
238  // - Contiguous (the sequence of GIDs is nondecreasing, and no two
239  // adjacent GIDs differ by more than one)
240  // - As evenly distributed as possible (the numbers of GIDs on two
241  // different processes do not differ by more than one)
242 
243  // All processes have the same numGlobalElements, but we still
244  // need to check that it is valid. numGlobalElements must be
245  // positive and not the "invalid" value (GSTI).
246  //
247  // This comparison looks funny, but it avoids compiler warnings
248  // for comparing unsigned integers (numGlobalElements_in is a
249  // GST, which is unsigned) while still working if we
250  // later decide to make GST signed.
251  TEUCHOS_TEST_FOR_EXCEPTION(
252  (numGlobalElements < 1 && numGlobalElements != 0),
253  std::invalid_argument, exPfx << "numGlobalElements (= "
254  << numGlobalElements << ") must be nonnegative.");
255 
256  TEUCHOS_TEST_FOR_EXCEPTION
257  (numGlobalElements == GSTI, std::invalid_argument, exPfx <<
258  "You provided numGlobalElements = Teuchos::OrdinalTraits<"
259  "Tpetra::global_size_t>::invalid(). This version of the "
260  "constructor requires a valid value of numGlobalElements. "
261  "You probably mistook this constructor for the \"contiguous "
262  "nonuniform\" constructor, which can compute the global "
263  "number of elements for you if you set numGlobalElements to "
264  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
265 
266  size_t numLocalElements = 0; // will set below
267  if (lOrG == GloballyDistributed) {
268  // Compute numLocalElements:
269  //
270  // If numGlobalElements == numProcs * B + remainder,
271  // then Proc r gets B+1 elements if r < remainder,
272  // and B elements if r >= remainder.
273  //
274  // This strategy is valid for any value of numGlobalElements and
275  // numProcs, including the following border cases:
276  // - numProcs == 1
277  // - numLocalElements < numProcs
278  //
279  // In the former case, remainder == 0 && numGlobalElements ==
280  // numLocalElements. In the latter case, remainder ==
281  // numGlobalElements && numLocalElements is either 0 or 1.
282  const GST numProcs = static_cast<GST> (comm_->getSize ());
283  const GST myRank = static_cast<GST> (comm_->getRank ());
284  const GST quotient = numGlobalElements / numProcs;
285  const GST remainder = numGlobalElements - quotient * numProcs;
286 
287  GO startIndex;
288  if (myRank < remainder) {
289  numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
290  // myRank was originally an int, so it should never overflow
291  // reasonable GO types.
292  startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
293  } else {
294  numLocalElements = as<size_t> (quotient);
295  startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
296  as<GO> (remainder);
297  }
298 
299  minMyGID_ = indexBase + startIndex;
300  maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
301  minAllGID_ = indexBase;
302  maxAllGID_ = indexBase + numGlobalElements - 1;
303  distributed_ = (numProcs > 1);
304  }
305  else { // lOrG == LocallyReplicated
306  numLocalElements = as<size_t> (numGlobalElements);
307  minMyGID_ = indexBase;
308  maxMyGID_ = indexBase + numGlobalElements - 1;
309  distributed_ = false;
310  }
311 
312  minAllGID_ = indexBase;
313  maxAllGID_ = indexBase + numGlobalElements - 1;
314  indexBase_ = indexBase;
315  numGlobalElements_ = numGlobalElements;
316  numLocalElements_ = numLocalElements;
317  firstContiguousGID_ = minMyGID_;
318  lastContiguousGID_ = maxMyGID_;
319  contiguous_ = true;
320 
321  // Create the Directory on demand in getRemoteIndexList().
322  //setupDirectory ();
323 
324  if (verbose) {
325  std::ostringstream os;
326  os << *prefix << "Done" << endl;
327  std::cerr << os.str();
328  }
329  }
330 
331 
332  template <class LocalOrdinal, class GlobalOrdinal, class Node>
334  Map (const global_size_t numGlobalElements,
335  const size_t numLocalElements,
336  const global_ordinal_type indexBase,
337  const Teuchos::RCP<const Teuchos::Comm<int> > &comm) :
338  comm_ (comm),
339  uniform_ (false),
340  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
341  {
342  using Teuchos::as;
343  using Teuchos::broadcast;
344  using Teuchos::outArg;
345  using Teuchos::reduceAll;
346  using Teuchos::REDUCE_MIN;
347  using Teuchos::REDUCE_MAX;
348  using Teuchos::REDUCE_SUM;
349  using Teuchos::scan;
350  using std::endl;
351  using GO = global_ordinal_type;
352  using GST = global_size_t;
353  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
354  const char funcName[] =
355  "Map(gblNumInds,lclNumInds,indexBase,comm)";
356  const char exPfx[] =
357  "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
358  const char suffix[] =
359  ". Please report this bug to the Tpetra developers.";
360 
361  const bool debug = Details::Behavior::debug("Map");
362  const bool verbose = Details::Behavior::verbose("Map");
363  std::unique_ptr<std::string> prefix;
364  if (verbose) {
365  prefix = Details::createPrefix(
366  comm_.getRawPtr(), "Map", funcName);
367  std::ostringstream os;
368  os << *prefix << "Start" << endl;
369  std::cerr << os.str();
370  }
372 
373  // Global sum of numLocalElements over all processes.
374  // Keep this for later debug checks.
375  GST debugGlobalSum {};
376  if (debug) {
377  debugGlobalSum = initialNonuniformDebugCheck(exPfx,
378  numGlobalElements, numLocalElements, indexBase, comm);
379  }
380 
381  // Distribute the elements across the nodes so that they are
382  // - non-overlapping
383  // - contiguous
384 
385  // This differs from the first Map constructor (that only takes a
386  // global number of elements) in that the user has specified the
387  // number of local elements, so that the elements are not
388  // (necessarily) evenly distributed over the processes.
389 
390  // Compute my local offset. This is an inclusive scan, so to get
391  // the final offset, we subtract off the input.
392  GO scanResult = 0;
393  scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
394  const GO myOffset = scanResult - numLocalElements;
395 
396  if (numGlobalElements != GSTI) {
397  numGlobalElements_ = numGlobalElements; // Use the user's value.
398  }
399  else {
400  // Inclusive scan means that the last process has the final sum.
401  // Rather than doing a reduceAll to get the sum of
402  // numLocalElements, we can just have the last process broadcast
403  // its result. That saves us a round of log(numProcs) messages.
404  const int numProcs = comm->getSize ();
405  GST globalSum = scanResult;
406  if (numProcs > 1) {
407  broadcast (*comm, numProcs - 1, outArg (globalSum));
408  }
409  numGlobalElements_ = globalSum;
410 
411  if (debug) {
412  // No need for an all-reduce here; both come from collectives.
413  TEUCHOS_TEST_FOR_EXCEPTION
414  (globalSum != debugGlobalSum, std::logic_error, exPfx <<
415  "globalSum = " << globalSum << " != debugGlobalSum = " <<
416  debugGlobalSum << suffix);
417  }
418  }
419  numLocalElements_ = numLocalElements;
420  indexBase_ = indexBase;
421  minAllGID_ = (numGlobalElements_ == 0) ?
422  std::numeric_limits<GO>::max () :
423  indexBase;
424  maxAllGID_ = (numGlobalElements_ == 0) ?
425  std::numeric_limits<GO>::lowest () :
426  indexBase + GO(numGlobalElements_) - GO(1);
427  minMyGID_ = (numLocalElements_ == 0) ?
428  std::numeric_limits<GO>::max () :
429  indexBase + GO(myOffset);
430  maxMyGID_ = (numLocalElements_ == 0) ?
431  std::numeric_limits<GO>::lowest () :
432  indexBase + myOffset + GO(numLocalElements) - GO(1);
433  firstContiguousGID_ = minMyGID_;
434  lastContiguousGID_ = maxMyGID_;
435  contiguous_ = true;
436  distributed_ = checkIsDist ();
437 
438  // Create the Directory on demand in getRemoteIndexList().
439  //setupDirectory ();
440 
441  if (verbose) {
442  std::ostringstream os;
443  os << *prefix << "Done" << endl;
444  std::cerr << os.str();
445  }
446  }
447 
448  template <class LocalOrdinal, class GlobalOrdinal, class Node>
450  Map<LocalOrdinal,GlobalOrdinal,Node>::
451  initialNonuniformDebugCheck(
452  const char errorMessagePrefix[],
453  const global_size_t numGlobalElements,
454  const size_t numLocalElements,
455  const global_ordinal_type indexBase,
456  const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const
457  {
458  const bool debug = Details::Behavior::debug("Map");
459  if (! debug) {
460  return global_size_t(0);
461  }
462 
463  using Teuchos::broadcast;
464  using Teuchos::outArg;
465  using Teuchos::ptr;
466  using Teuchos::REDUCE_MAX;
467  using Teuchos::REDUCE_MIN;
468  using Teuchos::REDUCE_SUM;
469  using Teuchos::reduceAll;
470  using GO = global_ordinal_type;
471  using GST = global_size_t;
472  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
473 
474  // The user has specified the distribution of indices over the
475  // processes. The distribution is not necessarily contiguous or
476  // equally shared over the processes.
477  //
478  // We assume that the number of local elements can be stored in a
479  // size_t. The instance member numLocalElements_ is a size_t, so
480  // this variable and that should have the same type.
481 
482  GST debugGlobalSum = 0; // Will be global sum of numLocalElements
483  reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
484  outArg (debugGlobalSum));
485  // In debug mode only, check whether numGlobalElements and
486  // indexBase are the same over all processes in the communicator.
487  {
488  GST proc0NumGlobalElements = numGlobalElements;
489  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
490  GST minNumGlobalElements = numGlobalElements;
491  GST maxNumGlobalElements = numGlobalElements;
492  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
493  outArg (minNumGlobalElements));
494  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
495  outArg (maxNumGlobalElements));
496  TEUCHOS_TEST_FOR_EXCEPTION
497  (minNumGlobalElements != maxNumGlobalElements ||
498  numGlobalElements != minNumGlobalElements,
499  std::invalid_argument, errorMessagePrefix << "All processes "
500  "must provide the same number of global elements, even if "
501  "that argument is "
502  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
503  "(which signals that the Map should compute the global "
504  "number of elements). Process 0 set numGlobalElements"
505  "=" << proc0NumGlobalElements << ". The calling process "
506  << comm->getRank() << " set numGlobalElements=" <<
507  numGlobalElements << ". The min and max values over all "
508  "processes are " << minNumGlobalElements << " resp. " <<
509  maxNumGlobalElements << ".");
510 
511  GO proc0IndexBase = indexBase;
512  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
513  GO minIndexBase = indexBase;
514  GO maxIndexBase = indexBase;
515  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
516  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
517  TEUCHOS_TEST_FOR_EXCEPTION
518  (minIndexBase != maxIndexBase || indexBase != minIndexBase,
519  std::invalid_argument, errorMessagePrefix <<
520  "All processes must provide the same indexBase argument. "
521  "Process 0 set indexBase = " << proc0IndexBase << ". The "
522  "calling process " << comm->getRank() << " set indexBase="
523  << indexBase << ". The min and max values over all "
524  "processes are " << minIndexBase << " resp. " << maxIndexBase
525  << ".");
526 
527  // Make sure that the sum of numLocalElements over all processes
528  // equals numGlobalElements.
529  TEUCHOS_TEST_FOR_EXCEPTION
530  (numGlobalElements != GSTI &&
531  debugGlobalSum != numGlobalElements, std::invalid_argument,
532  errorMessagePrefix << "The sum of each process' number of "
533  "indices over all processes, " << debugGlobalSum << ", != "
534  << "numGlobalElements=" << numGlobalElements << ". If you "
535  "would like this constructor to compute numGlobalElements "
536  "for you, you may set numGlobalElements="
537  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
538  "on input. Please note that this is NOT necessarily -1.");
539  }
540  return debugGlobalSum;
541  }
542 
543  template <class LocalOrdinal, class GlobalOrdinal, class Node>
544  void
547  const char errorMessagePrefix[],
548  const global_size_t numGlobalElements,
549  const Kokkos::View<const global_ordinal_type*,
550  Kokkos::LayoutLeft,
551  Kokkos::HostSpace,
552  Kokkos::MemoryUnmanaged>& entryList_host,
553  const global_ordinal_type indexBase,
554  const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
555  {
556  using Kokkos::LayoutLeft;
557  using Kokkos::subview;
558  using Kokkos::View;
559  using Kokkos::view_alloc;
560  using Kokkos::WithoutInitializing;
561  using Teuchos::as;
562  using Teuchos::broadcast;
563  using Teuchos::outArg;
564  using Teuchos::ptr;
565  using Teuchos::REDUCE_MAX;
566  using Teuchos::REDUCE_MIN;
567  using Teuchos::REDUCE_SUM;
568  using Teuchos::reduceAll;
569  using LO = local_ordinal_type;
570  using GO = global_ordinal_type;
571  using GST = global_size_t;
572  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
573 
574  // Make sure that Kokkos has been initialized (Github Issue #513).
575  TEUCHOS_TEST_FOR_EXCEPTION
576  (! Kokkos::is_initialized (), std::runtime_error,
577  errorMessagePrefix << "The Kokkos execution space "
578  << Teuchos::TypeNameTraits<execution_space>::name()
579  << " has not been initialized. "
580  "Please initialize it before creating a Map.")
581 
582  // The user has specified the distribution of indices over the
583  // processes, via the input array of global indices on each
584  // process. The distribution is not necessarily contiguous or
585  // equally shared over the processes.
586 
587  // The length of the input array on this process is the number of
588  // local indices to associate with this process, even though the
589  // input array contains global indices. We assume that the number
590  // of local indices on a process can be stored in a size_t;
591  // numLocalElements_ is a size_t, so this variable and that should
592  // have the same type.
593  const size_t numLocalElements(entryList_host.size());
594 
595  initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
596  numLocalElements, indexBase, comm);
597 
598  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
599  // reduction is redundant, since the directory Map will have to do
600  // the same thing. Thus, we could do the scan and broadcast for
601  // the directory Map here, and give the computed offsets to the
602  // directory Map's constructor. However, a reduction costs less
603  // than a scan and broadcast, so this still saves time if users of
604  // this Map don't ever need the Directory (i.e., if they never
605  // call getRemoteIndexList on this Map).
606  if (numGlobalElements != GSTI) {
607  numGlobalElements_ = numGlobalElements; // Use the user's value.
608  }
609  else { // The user wants us to compute the sum.
610  reduceAll(*comm, REDUCE_SUM,
611  static_cast<GST>(numLocalElements),
612  outArg(numGlobalElements_));
613  }
614 
615  // mfh 20 Feb 2013: We've never quite done the right thing for
616  // duplicate GIDs here. Duplicate GIDs have always been counted
617  // distinctly in numLocalElements_, and thus should get a
618  // different LID. However, we've always used std::map or a hash
619  // table for the GID -> LID lookup table, so distinct GIDs always
620  // map to the same LID. Furthermore, the order of the input GID
621  // list matters, so it's not desirable to sort for determining
622  // uniqueness.
623  //
624  // I've chosen for now to write this code as if the input GID list
625  // contains no duplicates. If this is not desired, we could use
626  // the lookup table itself to determine uniqueness: If we haven't
627  // seen the GID before, it gets a new LID and it's added to the
628  // LID -> GID and GID -> LID tables. If we have seen the GID
629  // before, it doesn't get added to either table. I would
630  // implement this, but it would cost more to do the double lookups
631  // in the table (one to check, and one to insert).
632  //
633  // More importantly, since we build the GID -> LID table in (a
634  // thread-) parallel (way), the order in which duplicate GIDs may
635  // get inserted is not defined. This would make the assignment of
636  // LID to GID nondeterministic.
637 
638  numLocalElements_ = numLocalElements;
639  indexBase_ = indexBase;
640 
641  minMyGID_ = indexBase_;
642  maxMyGID_ = indexBase_;
643 
644  // NOTE (mfh 27 May 2015): While finding the initial contiguous
645  // GID range requires looking at all the GIDs in the range,
646  // dismissing an interval of GIDs only requires looking at the
647  // first and last GIDs. Thus, we could do binary search backwards
648  // from the end in order to catch the common case of a contiguous
649  // interval followed by noncontiguous entries. On the other hand,
650  // we could just expose this case explicitly as yet another Map
651  // constructor, and avoid the trouble of detecting it.
652  if (numLocalElements_ > 0) {
653  // Find contiguous GID range, with the restriction that the
654  // beginning of the range starts with the first entry. While
655  // doing so, fill in the LID -> GID table.
656  typename decltype (lgMap_)::non_const_type lgMap
657  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
658  auto lgMap_host =
659  Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
660 
661  // The input array entryList_host is already on host, so we
662  // don't need to take a host view of it.
663  // auto entryList_host =
664  // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
665  // Kokkos::deep_copy (entryList_host, entryList);
666 
667  firstContiguousGID_ = entryList_host[0];
668  lastContiguousGID_ = firstContiguousGID_+1;
669 
670  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
671  // anyway, so we have to look at them all. The logical way to
672  // find the first noncontiguous entry would thus be to "reduce,"
673  // where the local reduction result is whether entryList[i] + 1
674  // == entryList[i+1].
675 
676  lgMap_host[0] = firstContiguousGID_;
677  size_t i = 1;
678  for ( ; i < numLocalElements_; ++i) {
679  const GO curGid = entryList_host[i];
680  const LO curLid = as<LO> (i);
681 
682  if (lastContiguousGID_ != curGid) break;
683 
684  // Add the entry to the LID->GID table only after we know that
685  // the current GID is in the initial contiguous sequence, so
686  // that we don't repeat adding it in the first iteration of
687  // the loop below over the remaining noncontiguous GIDs.
688  lgMap_host[curLid] = curGid;
689  ++lastContiguousGID_;
690  }
691  --lastContiguousGID_;
692 
693  // [firstContiguousGID_, lastContigousGID_] is the initial
694  // sequence of contiguous GIDs. We can start the min and max
695  // GID using this range.
696  minMyGID_ = firstContiguousGID_;
697  maxMyGID_ = lastContiguousGID_;
698 
699  // Compute the GID -> LID lookup table, _not_ including the
700  // initial sequence of contiguous GIDs.
701  {
702  const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
703  auto nonContigGids_host = subview (entryList_host, ncRange);
704  TEUCHOS_TEST_FOR_EXCEPTION
705  (static_cast<size_t> (nonContigGids_host.extent (0)) !=
706  static_cast<size_t> (entryList_host.extent (0) - i),
707  std::logic_error, "Tpetra::Map noncontiguous constructor: "
708  "nonContigGids_host.extent(0) = "
709  << nonContigGids_host.extent (0)
710  << " != entryList_host.extent(0) - i = "
711  << (entryList_host.extent (0) - i) << " = "
712  << entryList_host.extent (0) << " - " << i
713  << ". Please report this bug to the Tpetra developers.");
714 
715  // FixedHashTable's constructor expects an owned device View,
716  // so we must deep-copy the subview of the input indices.
717  View<GO*, LayoutLeft, device_type>
718  nonContigGids (view_alloc ("nonContigGids", WithoutInitializing),
719  nonContigGids_host.size ());
720  Kokkos::deep_copy (nonContigGids, nonContigGids_host);
721 
722  glMap_ = global_to_local_table_type (nonContigGids,
723  firstContiguousGID_,
724  lastContiguousGID_,
725  static_cast<LO> (i));
726  }
727 
728  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
729  // table above, we have to look at all the (noncontiguous) input
730  // indices anyway. Thus, why not have the constructor compute
731  // and return the min and max?
732 
733  for ( ; i < numLocalElements_; ++i) {
734  const GO curGid = entryList_host[i];
735  const LO curLid = static_cast<LO> (i);
736  lgMap_host[curLid] = curGid; // LID -> GID table
737 
738  // While iterating through entryList, we compute its
739  // (process-local) min and max elements.
740  if (curGid < minMyGID_) {
741  minMyGID_ = curGid;
742  }
743  if (curGid > maxMyGID_) {
744  maxMyGID_ = curGid;
745  }
746  }
747 
748  // We filled lgMap on host above; now sync back to device.
749  Kokkos::deep_copy (lgMap, lgMap_host);
750 
751  // "Commit" the local-to-global lookup table we filled in above.
752  lgMap_ = lgMap;
753  // We've already created this, so use it.
754  lgMapHost_ = lgMap_host;
755  }
756  else {
757  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
758  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
759  // This insures tests for GIDs in the range
760  // [firstContiguousGID_, lastContiguousGID_] fail for processes
761  // with no local elements.
762  firstContiguousGID_ = indexBase_+1;
763  lastContiguousGID_ = indexBase_;
764  // glMap_ was default constructed, so it's already empty.
765  }
766 
767  // Compute the min and max of all processes' GIDs. If
768  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
769  // are both indexBase_. This is wrong, but fixing it would
770  // require either a fancy sparse all-reduce, or a custom reduction
771  // operator that ignores invalid values ("invalid" means
772  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
773  //
774  // Also, while we're at it, use the same all-reduce to figure out
775  // if the Map is distributed. "Distributed" means that there is
776  // at least one process with a number of local elements less than
777  // the number of global elements.
778  //
779  // We're computing the min and max of all processes' GIDs using a
780  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
781  // and y are signed). (This lets us combine the min and max into
782  // a single all-reduce.) If each process sets localDist=1 if its
783  // number of local elements is strictly less than the number of
784  // global elements, and localDist=0 otherwise, then a MAX
785  // all-reduce on localDist tells us if the Map is distributed (1
786  // if yes, 0 if no). Thus, we can append localDist onto the end
787  // of the data and get the global result from the all-reduce.
788  if (std::numeric_limits<GO>::is_signed) {
789  // Does my process NOT own all the elements?
790  const GO localDist =
791  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
792 
793  GO minMaxInput[3];
794  minMaxInput[0] = -minMyGID_;
795  minMaxInput[1] = maxMyGID_;
796  minMaxInput[2] = localDist;
797 
798  GO minMaxOutput[3];
799  minMaxOutput[0] = 0;
800  minMaxOutput[1] = 0;
801  minMaxOutput[2] = 0;
802  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
803  minAllGID_ = -minMaxOutput[0];
804  maxAllGID_ = minMaxOutput[1];
805  const GO globalDist = minMaxOutput[2];
806  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
807  }
808  else { // unsigned; use two reductions
809  // This is always correct, no matter the signedness of GO.
810  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
811  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
812  distributed_ = checkIsDist ();
813  }
814 
815  contiguous_ = false; // "Contiguous" is conservative.
816 
817  TEUCHOS_TEST_FOR_EXCEPTION(
818  minAllGID_ < indexBase_,
819  std::invalid_argument,
820  "Tpetra::Map constructor (noncontiguous): "
821  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
822  "less than the given indexBase = " << indexBase_ << ".");
823 
824  // Create the Directory on demand in getRemoteIndexList().
825  //setupDirectory ();
826  }
827 
828  template <class LocalOrdinal, class GlobalOrdinal, class Node>
830  Map (const global_size_t numGlobalElements,
831  const GlobalOrdinal indexList[],
832  const LocalOrdinal indexListSize,
833  const GlobalOrdinal indexBase,
834  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
835  comm_ (comm),
836  uniform_ (false),
837  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
838  {
839  using std::endl;
840  const char funcName[] =
841  "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
842 
843  const bool verbose = Details::Behavior::verbose("Map");
844  std::unique_ptr<std::string> prefix;
845  if (verbose) {
846  prefix = Details::createPrefix(
847  comm_.getRawPtr(), "Map", funcName);
848  std::ostringstream os;
849  os << *prefix << "Start" << endl;
850  std::cerr << os.str();
851  }
853  checkMapInputArray ("(GST, const GO[], LO, GO, comm)",
854  indexList, static_cast<size_t> (indexListSize),
855  Kokkos::DefaultHostExecutionSpace (),
856  comm.getRawPtr ());
857  // Not quite sure if I trust all code to behave correctly if the
858  // pointer is nonnull but the array length is nonzero, so I'll
859  // make sure the raw pointer is null if the length is zero.
860  const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
861  Kokkos::View<const GlobalOrdinal*,
862  Kokkos::LayoutLeft,
863  Kokkos::HostSpace,
864  Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
865  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
866  indexBase, comm);
867  if (verbose) {
868  std::ostringstream os;
869  os << *prefix << "Done" << endl;
870  std::cerr << os.str();
871  }
872  }
873 
874  template <class LocalOrdinal, class GlobalOrdinal, class Node>
876  Map (const global_size_t numGlobalElements,
877  const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
878  const GlobalOrdinal indexBase,
879  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
880  comm_ (comm),
881  uniform_ (false),
882  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
883  {
884  using std::endl;
885  const char funcName[] =
886  "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
887 
888  const bool verbose = Details::Behavior::verbose("Map");
889  std::unique_ptr<std::string> prefix;
890  if (verbose) {
891  prefix = Details::createPrefix(
892  comm_.getRawPtr(), "Map", funcName);
893  std::ostringstream os;
894  os << *prefix << "Start" << endl;
895  std::cerr << os.str();
896  }
898  const size_t numLclInds = static_cast<size_t> (entryList.size ());
899  checkMapInputArray ("(GST, ArrayView, GO, comm)",
900  entryList.getRawPtr (), numLclInds,
901  Kokkos::DefaultHostExecutionSpace (),
902  comm.getRawPtr ());
903  // Not quite sure if I trust both ArrayView and View to behave
904  // correctly if the pointer is nonnull but the array length is
905  // nonzero, so I'll make sure it's null if the length is zero.
906  const GlobalOrdinal* const indsRaw =
907  numLclInds == 0 ? NULL : entryList.getRawPtr ();
908  Kokkos::View<const GlobalOrdinal*,
909  Kokkos::LayoutLeft,
910  Kokkos::HostSpace,
911  Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
912  initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
913  indexBase, comm);
914  if (verbose) {
915  std::ostringstream os;
916  os << *prefix << "Done" << endl;
917  std::cerr << os.str();
918  }
919  }
920 
921  template <class LocalOrdinal, class GlobalOrdinal, class Node>
923  Map (const global_size_t numGlobalElements,
924  const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
925  const GlobalOrdinal indexBase,
926  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
927  comm_ (comm),
928  uniform_ (false),
929  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
930  {
931  using Kokkos::LayoutLeft;
932  using Kokkos::subview;
933  using Kokkos::View;
934  using Kokkos::view_alloc;
935  using Kokkos::WithoutInitializing;
936  using Teuchos::arcp;
937  using Teuchos::ArrayView;
938  using Teuchos::as;
939  using Teuchos::broadcast;
940  using Teuchos::outArg;
941  using Teuchos::ptr;
942  using Teuchos::REDUCE_MAX;
943  using Teuchos::REDUCE_MIN;
944  using Teuchos::REDUCE_SUM;
945  using Teuchos::reduceAll;
946  using Teuchos::typeName;
947  using std::endl;
948  using LO = local_ordinal_type;
949  using GO = global_ordinal_type;
950  using GST = global_size_t;
951  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
952  const char funcName[] =
953  "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
954 
955  const bool verbose = Details::Behavior::verbose("Map");
956  std::unique_ptr<std::string> prefix;
957  if (verbose) {
959  comm_.getRawPtr(), "Map", funcName);
960  std::ostringstream os;
961  os << *prefix << "Start" << endl;
962  std::cerr << os.str();
963  }
965  checkMapInputArray ("(GST, Kokkos::View, GO, comm)",
966  entryList.data (),
967  static_cast<size_t> (entryList.extent (0)),
968  execution_space (), comm.getRawPtr ());
969 
970  // The user has specified the distribution of indices over the
971  // processes, via the input array of global indices on each
972  // process. The distribution is not necessarily contiguous or
973  // equally shared over the processes.
974 
975  // The length of the input array on this process is the number of
976  // local indices to associate with this process, even though the
977  // input array contains global indices. We assume that the number
978  // of local indices on a process can be stored in a size_t;
979  // numLocalElements_ is a size_t, so this variable and that should
980  // have the same type.
981  const size_t numLocalElements(entryList.size());
982 
983  initialNonuniformDebugCheck(funcName, numGlobalElements,
984  numLocalElements, indexBase, comm);
985 
986  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
987  // reduction is redundant, since the directory Map will have to do
988  // the same thing. Thus, we could do the scan and broadcast for
989  // the directory Map here, and give the computed offsets to the
990  // directory Map's constructor. However, a reduction costs less
991  // than a scan and broadcast, so this still saves time if users of
992  // this Map don't ever need the Directory (i.e., if they never
993  // call getRemoteIndexList on this Map).
994  if (numGlobalElements != GSTI) {
995  numGlobalElements_ = numGlobalElements; // Use the user's value.
996  }
997  else { // The user wants us to compute the sum.
998  reduceAll(*comm, REDUCE_SUM,
999  static_cast<GST>(numLocalElements),
1000  outArg(numGlobalElements_));
1001  }
1002 
1003  // mfh 20 Feb 2013: We've never quite done the right thing for
1004  // duplicate GIDs here. Duplicate GIDs have always been counted
1005  // distinctly in numLocalElements_, and thus should get a
1006  // different LID. However, we've always used std::map or a hash
1007  // table for the GID -> LID lookup table, so distinct GIDs always
1008  // map to the same LID. Furthermore, the order of the input GID
1009  // list matters, so it's not desirable to sort for determining
1010  // uniqueness.
1011  //
1012  // I've chosen for now to write this code as if the input GID list
1013  // contains no duplicates. If this is not desired, we could use
1014  // the lookup table itself to determine uniqueness: If we haven't
1015  // seen the GID before, it gets a new LID and it's added to the
1016  // LID -> GID and GID -> LID tables. If we have seen the GID
1017  // before, it doesn't get added to either table. I would
1018  // implement this, but it would cost more to do the double lookups
1019  // in the table (one to check, and one to insert).
1020  //
1021  // More importantly, since we build the GID -> LID table in (a
1022  // thread-) parallel (way), the order in which duplicate GIDs may
1023  // get inserted is not defined. This would make the assignment of
1024  // LID to GID nondeterministic.
1025 
1026  numLocalElements_ = numLocalElements;
1027  indexBase_ = indexBase;
1028 
1029  minMyGID_ = indexBase_;
1030  maxMyGID_ = indexBase_;
1031 
1032  // NOTE (mfh 27 May 2015): While finding the initial contiguous
1033  // GID range requires looking at all the GIDs in the range,
1034  // dismissing an interval of GIDs only requires looking at the
1035  // first and last GIDs. Thus, we could do binary search backwards
1036  // from the end in order to catch the common case of a contiguous
1037  // interval followed by noncontiguous entries. On the other hand,
1038  // we could just expose this case explicitly as yet another Map
1039  // constructor, and avoid the trouble of detecting it.
1040  if (numLocalElements_ > 0) {
1041  // Find contiguous GID range, with the restriction that the
1042  // beginning of the range starts with the first entry. While
1043  // doing so, fill in the LID -> GID table.
1044  typename decltype (lgMap_)::non_const_type lgMap
1045  (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
1046  auto lgMap_host =
1047  Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1048 
1049  using array_layout =
1050  typename View<const GO*, device_type>::array_layout;
1051  View<GO*, array_layout, Kokkos::HostSpace> entryList_host
1052  (view_alloc ("entryList_host", WithoutInitializing),
1053  entryList.extent(0));
1054  Kokkos::deep_copy (entryList_host, entryList);
1055 
1056  firstContiguousGID_ = entryList_host[0];
1057  lastContiguousGID_ = firstContiguousGID_+1;
1058 
1059  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
1060  // anyway, so we have to look at them all. The logical way to
1061  // find the first noncontiguous entry would thus be to "reduce,"
1062  // where the local reduction result is whether entryList[i] + 1
1063  // == entryList[i+1].
1064 
1065  lgMap_host[0] = firstContiguousGID_;
1066  size_t i = 1;
1067  for ( ; i < numLocalElements_; ++i) {
1068  const GO curGid = entryList_host[i];
1069  const LO curLid = as<LO> (i);
1070 
1071  if (lastContiguousGID_ != curGid) break;
1072 
1073  // Add the entry to the LID->GID table only after we know that
1074  // the current GID is in the initial contiguous sequence, so
1075  // that we don't repeat adding it in the first iteration of
1076  // the loop below over the remaining noncontiguous GIDs.
1077  lgMap_host[curLid] = curGid;
1078  ++lastContiguousGID_;
1079  }
1080  --lastContiguousGID_;
1081 
1082  // [firstContiguousGID_, lastContigousGID_] is the initial
1083  // sequence of contiguous GIDs. We can start the min and max
1084  // GID using this range.
1085  minMyGID_ = firstContiguousGID_;
1086  maxMyGID_ = lastContiguousGID_;
1087 
1088  // Compute the GID -> LID lookup table, _not_ including the
1089  // initial sequence of contiguous GIDs.
1090  {
1091  const std::pair<size_t, size_t> ncRange (i, entryList.extent (0));
1092  auto nonContigGids = subview (entryList, ncRange);
1093  TEUCHOS_TEST_FOR_EXCEPTION
1094  (static_cast<size_t> (nonContigGids.extent (0)) !=
1095  static_cast<size_t> (entryList.extent (0) - i),
1096  std::logic_error, "Tpetra::Map noncontiguous constructor: "
1097  "nonContigGids.extent(0) = "
1098  << nonContigGids.extent (0)
1099  << " != entryList.extent(0) - i = "
1100  << (entryList.extent (0) - i) << " = "
1101  << entryList.extent (0) << " - " << i
1102  << ". Please report this bug to the Tpetra developers.");
1103 
1104  glMap_ = global_to_local_table_type (nonContigGids,
1105  firstContiguousGID_,
1106  lastContiguousGID_,
1107  static_cast<LO> (i));
1108  }
1109 
1110  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
1111  // table above, we have to look at all the (noncontiguous) input
1112  // indices anyway. Thus, why not have the constructor compute
1113  // and return the min and max?
1114 
1115  for ( ; i < numLocalElements_; ++i) {
1116  const GO curGid = entryList_host[i];
1117  const LO curLid = static_cast<LO> (i);
1118  lgMap_host[curLid] = curGid; // LID -> GID table
1119 
1120  // While iterating through entryList, we compute its
1121  // (process-local) min and max elements.
1122  if (curGid < minMyGID_) {
1123  minMyGID_ = curGid;
1124  }
1125  if (curGid > maxMyGID_) {
1126  maxMyGID_ = curGid;
1127  }
1128  }
1129 
1130  // We filled lgMap on host above; now sync back to device.
1131  Kokkos::deep_copy (lgMap, lgMap_host);
1132 
1133  // "Commit" the local-to-global lookup table we filled in above.
1134  lgMap_ = lgMap;
1135  // We've already created this, so use it.
1136  lgMapHost_ = lgMap_host;
1137  }
1138  else {
1139  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1140  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1141  // This insures tests for GIDs in the range
1142  // [firstContiguousGID_, lastContiguousGID_] fail for processes
1143  // with no local elements.
1144  firstContiguousGID_ = indexBase_+1;
1145  lastContiguousGID_ = indexBase_;
1146  // glMap_ was default constructed, so it's already empty.
1147  }
1148 
1149  // Compute the min and max of all processes' GIDs. If
1150  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1151  // are both indexBase_. This is wrong, but fixing it would
1152  // require either a fancy sparse all-reduce, or a custom reduction
1153  // operator that ignores invalid values ("invalid" means
1154  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1155  //
1156  // Also, while we're at it, use the same all-reduce to figure out
1157  // if the Map is distributed. "Distributed" means that there is
1158  // at least one process with a number of local elements less than
1159  // the number of global elements.
1160  //
1161  // We're computing the min and max of all processes' GIDs using a
1162  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1163  // and y are signed). (This lets us combine the min and max into
1164  // a single all-reduce.) If each process sets localDist=1 if its
1165  // number of local elements is strictly less than the number of
1166  // global elements, and localDist=0 otherwise, then a MAX
1167  // all-reduce on localDist tells us if the Map is distributed (1
1168  // if yes, 0 if no). Thus, we can append localDist onto the end
1169  // of the data and get the global result from the all-reduce.
1170  if (std::numeric_limits<GO>::is_signed) {
1171  // Does my process NOT own all the elements?
1172  const GO localDist =
1173  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
1174 
1175  GO minMaxInput[3];
1176  minMaxInput[0] = -minMyGID_;
1177  minMaxInput[1] = maxMyGID_;
1178  minMaxInput[2] = localDist;
1179 
1180  GO minMaxOutput[3];
1181  minMaxOutput[0] = 0;
1182  minMaxOutput[1] = 0;
1183  minMaxOutput[2] = 0;
1184  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1185  minAllGID_ = -minMaxOutput[0];
1186  maxAllGID_ = minMaxOutput[1];
1187  const GO globalDist = minMaxOutput[2];
1188  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1189  }
1190  else { // unsigned; use two reductions
1191  // This is always correct, no matter the signedness of GO.
1192  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1193  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1194  distributed_ = checkIsDist ();
1195  }
1196 
1197  contiguous_ = false; // "Contiguous" is conservative.
1198 
1199  TEUCHOS_TEST_FOR_EXCEPTION(
1200  minAllGID_ < indexBase_,
1201  std::invalid_argument,
1202  "Tpetra::Map constructor (noncontiguous): "
1203  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1204  "less than the given indexBase = " << indexBase_ << ".");
1205 
1206  // Create the Directory on demand in getRemoteIndexList().
1207  //setupDirectory ();
1208 
1209  if (verbose) {
1210  std::ostringstream os;
1211  os << *prefix << "Done" << endl;
1212  std::cerr << os.str();
1213  }
1214  }
1215 
1216 
1217  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1219  {
1220  if (! Kokkos::is_initialized ()) {
1221  std::ostringstream os;
1222  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1223  "Kokkos::finalize() has been called. This is user error! There are "
1224  "two likely causes: " << std::endl <<
1225  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1226  << std::endl <<
1227  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1228  "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1229  "or Tpetra::finalize()." << std::endl << std::endl <<
1230  "Don't do either of these! Please refer to GitHib Issue #2372."
1231  << std::endl;
1232  ::Tpetra::Details::printOnce (std::cerr, os.str (),
1233  this->getComm ().getRawPtr ());
1234  }
1235  else {
1236  using ::Tpetra::Details::mpiIsInitialized;
1237  using ::Tpetra::Details::mpiIsFinalized;
1238  using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1239 
1240  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1241  if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1242  mpiIsInitialized () && mpiIsFinalized ()) {
1243  // Tpetra itself does not require MPI, even if building with
1244  // MPI. It is legal to create Tpetra objects that do not use
1245  // MPI, even in an MPI program. However, calling Tpetra stuff
1246  // after MPI_Finalize() has been called is a bad idea, since
1247  // some Tpetra defaults may use MPI if available.
1248  std::ostringstream os;
1249  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1250  "MPI_Finalize() has been called. This is user error! There are "
1251  "two likely causes: " << std::endl <<
1252  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1253  << std::endl <<
1254  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1255  "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1256  "Tpetra::finalize()." << std::endl << std::endl <<
1257  "Don't do either of these! Please refer to GitHib Issue #2372."
1258  << std::endl;
1259  ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1260  }
1261  }
1262  // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1263  // because Tpetra does not yet require Tpetra::initialize /
1264  // Tpetra::finalize.
1265  }
1266 
1267  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1268  bool
1270  {
1271  TEUCHOS_TEST_FOR_EXCEPTION(
1272  getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1273  "getComm() returns null. Please report this bug to the Tpetra "
1274  "developers.");
1275 
1276  // This is a collective operation, if it hasn't been called before.
1277  setupDirectory ();
1278  return directory_->isOneToOne (*this);
1279  }
1280 
1281  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1282  LocalOrdinal
1284  getLocalElement (GlobalOrdinal globalIndex) const
1285  {
1286  if (isContiguous ()) {
1287  if (globalIndex < getMinGlobalIndex () ||
1288  globalIndex > getMaxGlobalIndex ()) {
1289  return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1290  }
1291  return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1292  }
1293  else if (globalIndex >= firstContiguousGID_ &&
1294  globalIndex <= lastContiguousGID_) {
1295  return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1296  }
1297  else {
1298  // If the given global index is not in the table, this returns
1299  // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1300  return glMap_.get (globalIndex);
1301  }
1302  }
1303 
1304  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1305  GlobalOrdinal
1307  getGlobalElement (LocalOrdinal localIndex) const
1308  {
1309  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1310  return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1311  }
1312  if (isContiguous ()) {
1313  return getMinGlobalIndex () + localIndex;
1314  }
1315  else {
1316  // This is a host Kokkos::View access, with no RCP or ArrayRCP
1317  // involvement. As a result, it is thread safe.
1318  //
1319  // lgMapHost_ is a host pointer; this does NOT assume UVM.
1320  return lgMapHost_[localIndex];
1321  }
1322  }
1323 
1324  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1325  bool
1327  isNodeLocalElement (LocalOrdinal localIndex) const
1328  {
1329  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1330  return false;
1331  } else {
1332  return true;
1333  }
1334  }
1335 
1336  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1337  bool
1339  isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1340  return this->getLocalElement (globalIndex) !=
1341  Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1342  }
1343 
1344  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1346  return uniform_;
1347  }
1348 
1349  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1351  return contiguous_;
1352  }
1353 
1354 
1355  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1358  getLocalMap () const
1359  {
1360  return local_map_type (glMap_, lgMap_, getIndexBase (),
1361  getMinGlobalIndex (), getMaxGlobalIndex (),
1362  firstContiguousGID_, lastContiguousGID_,
1363  getNodeNumElements (), isContiguous ());
1364  }
1365 
1366  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1367  bool
1370  {
1371  using Teuchos::outArg;
1372  using Teuchos::REDUCE_MIN;
1373  using Teuchos::reduceAll;
1374  //
1375  // Tests that avoid the Boolean all-reduce below by using
1376  // globally consistent quantities.
1377  //
1378  if (this == &map) {
1379  // Pointer equality on one process always implies pointer
1380  // equality on all processes, since Map is immutable.
1381  return true;
1382  }
1383  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1384  // The two communicators have different numbers of processes.
1385  // It's not correct to call isCompatible() in that case. This
1386  // may result in the all-reduce hanging below.
1387  return false;
1388  }
1389  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1390  // Two Maps are definitely NOT compatible if they have different
1391  // global numbers of indices.
1392  return false;
1393  }
1394  else if (isContiguous () && isUniform () &&
1395  map.isContiguous () && map.isUniform ()) {
1396  // Contiguous uniform Maps with the same number of processes in
1397  // their communicators, and with the same global numbers of
1398  // indices, are always compatible.
1399  return true;
1400  }
1401  else if (! isContiguous () && ! map.isContiguous () &&
1402  lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1403  lgMap_.data () == map.lgMap_.data ()) {
1404  // Noncontiguous Maps whose global index lists are nonempty and
1405  // have the same pointer must be the same (and therefore
1406  // contiguous).
1407  //
1408  // Nonempty is important. For example, consider a communicator
1409  // with two processes, and two Maps that share this
1410  // communicator, with zero global indices on the first process,
1411  // and different nonzero numbers of global indices on the second
1412  // process. In that case, on the first process, the pointers
1413  // would both be NULL.
1414  return true;
1415  }
1416 
1417  TEUCHOS_TEST_FOR_EXCEPTION(
1418  getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1419  "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1420  "checked that this condition is true above, but it's false here. "
1421  "Please report this bug to the Tpetra developers.");
1422 
1423  // Do both Maps have the same number of indices on each process?
1424  const int locallyCompat =
1425  (getNodeNumElements () == map.getNodeNumElements ()) ? 1 : 0;
1426 
1427  int globallyCompat = 0;
1428  reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1429  return (globallyCompat == 1);
1430  }
1431 
1432  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1433  bool
1436  {
1437  using Teuchos::ArrayView;
1438  using GO = global_ordinal_type;
1439  using size_type = typename ArrayView<const GO>::size_type;
1440 
1441  // If both Maps are contiguous, we can compare their GID ranges
1442  // easily by looking at the min and max GID on this process.
1443  // Otherwise, we'll compare their GID lists. If only one Map is
1444  // contiguous, then we only have to call getNodeElementList() on
1445  // the noncontiguous Map. (It's best to avoid calling it on a
1446  // contiguous Map, since it results in unnecessary storage that
1447  // persists for the lifetime of the Map.)
1448 
1449  if (this == &map) {
1450  // Pointer equality on one process always implies pointer
1451  // equality on all processes, since Map is immutable.
1452  return true;
1453  }
1454  else if (getNodeNumElements () != map.getNodeNumElements ()) {
1455  return false;
1456  }
1457  else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1458  getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1459  return false;
1460  }
1461  else {
1462  if (isContiguous ()) {
1463  if (map.isContiguous ()) {
1464  return true; // min and max match, so the ranges match.
1465  }
1466  else { // *this is contiguous, but map is not contiguous
1467  TEUCHOS_TEST_FOR_EXCEPTION(
1468  ! this->isContiguous () || map.isContiguous (), std::logic_error,
1469  "Tpetra::Map::locallySameAs: BUG");
1470  ArrayView<const GO> rhsElts = map.getNodeElementList ();
1471  const GO minLhsGid = this->getMinGlobalIndex ();
1472  const size_type numRhsElts = rhsElts.size ();
1473  for (size_type k = 0; k < numRhsElts; ++k) {
1474  const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1475  if (curLhsGid != rhsElts[k]) {
1476  return false; // stop on first mismatch
1477  }
1478  }
1479  return true;
1480  }
1481  }
1482  else if (map.isContiguous ()) { // *this is not contiguous, but map is
1483  TEUCHOS_TEST_FOR_EXCEPTION(
1484  this->isContiguous () || ! map.isContiguous (), std::logic_error,
1485  "Tpetra::Map::locallySameAs: BUG");
1486  ArrayView<const GO> lhsElts = this->getNodeElementList ();
1487  const GO minRhsGid = map.getMinGlobalIndex ();
1488  const size_type numLhsElts = lhsElts.size ();
1489  for (size_type k = 0; k < numLhsElts; ++k) {
1490  const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1491  if (curRhsGid != lhsElts[k]) {
1492  return false; // stop on first mismatch
1493  }
1494  }
1495  return true;
1496  }
1497  else if (this->lgMap_.data () == map.lgMap_.data ()) {
1498  // Pointers to LID->GID "map" (actually just an array) are the
1499  // same, and the number of GIDs are the same.
1500  return this->getNodeNumElements () == map.getNodeNumElements ();
1501  }
1502  else { // we actually have to compare the GIDs
1503  if (this->getNodeNumElements () != map.getNodeNumElements ()) {
1504  return false; // We already checked above, but check just in case
1505  }
1506  else {
1507  ArrayView<const GO> lhsElts = getNodeElementList ();
1508  ArrayView<const GO> rhsElts = map.getNodeElementList ();
1509 
1510  // std::equal requires that the latter range is as large as
1511  // the former. We know the ranges have equal length, because
1512  // they have the same number of local entries.
1513  return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1514  }
1515  }
1516  }
1517  }
1518 
1519  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1520  bool
1523  {
1524  if (this == &map)
1525  return true;
1526 
1527  // We are going to check if lmap1 is fitted into lmap2
1528  auto lmap1 = map.getLocalMap();
1529  auto lmap2 = this->getLocalMap();
1530 
1531  auto numLocalElements1 = lmap1.getNodeNumElements();
1532  auto numLocalElements2 = lmap2.getNodeNumElements();
1533 
1534  if (numLocalElements1 > numLocalElements2) {
1535  // There are more indices in the first map on this process than in second map.
1536  return false;
1537  }
1538 
1539  if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1540  // When both Maps are contiguous, just check the interval inclusion.
1541  return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1542  (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1543  }
1544 
1545  if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1546  lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1547  // The second map does not include the first map bounds, and thus some of
1548  // the first map global indices are not in the second map.
1549  return false;
1550  }
1551 
1552  using LO = local_ordinal_type;
1553  using range_type =
1554  Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1555 
1556  // Check all elements.
1557  LO numDiff = 0;
1558  Kokkos::parallel_reduce(
1559  "isLocallyFitted",
1560  range_type(0, numLocalElements1),
1561  KOKKOS_LAMBDA (const LO i, LO& diff) {
1562  diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1563  }, numDiff);
1564 
1565  return (numDiff == 0);
1566  }
1567 
1568  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1569  bool
1572  {
1573  using Teuchos::outArg;
1574  using Teuchos::REDUCE_MIN;
1575  using Teuchos::reduceAll;
1576  //
1577  // Tests that avoid the Boolean all-reduce below by using
1578  // globally consistent quantities.
1579  //
1580  if (this == &map) {
1581  // Pointer equality on one process always implies pointer
1582  // equality on all processes, since Map is immutable.
1583  return true;
1584  }
1585  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1586  // The two communicators have different numbers of processes.
1587  // It's not correct to call isSameAs() in that case. This
1588  // may result in the all-reduce hanging below.
1589  return false;
1590  }
1591  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1592  // Two Maps are definitely NOT the same if they have different
1593  // global numbers of indices.
1594  return false;
1595  }
1596  else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1597  getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
1598  getIndexBase () != map.getIndexBase ()) {
1599  // If the global min or max global index doesn't match, or if
1600  // the index base doesn't match, then the Maps aren't the same.
1601  return false;
1602  }
1603  else if (isDistributed () != map.isDistributed ()) {
1604  // One Map is distributed and the other is not, which means that
1605  // the Maps aren't the same.
1606  return false;
1607  }
1608  else if (isContiguous () && isUniform () &&
1609  map.isContiguous () && map.isUniform ()) {
1610  // Contiguous uniform Maps with the same number of processes in
1611  // their communicators, with the same global numbers of indices,
1612  // and with matching index bases and ranges, must be the same.
1613  return true;
1614  }
1615 
1616  // The two communicators must have the same number of processes,
1617  // with process ranks occurring in the same order. This uses
1618  // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1619  // "Operations that access communicators are local and their
1620  // execution does not require interprocess communication."
1621  // However, just to be sure, I'll put this call after the above
1622  // tests that don't communicate.
1623  if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1624  return false;
1625  }
1626 
1627  // If we get this far, we need to check local properties and then
1628  // communicate local sameness across all processes.
1629  const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1630 
1631  // Return true if and only if all processes report local sameness.
1632  int isSame_gbl = 0;
1633  reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1634  return isSame_gbl == 1;
1635  }
1636 
1637  namespace { // (anonymous)
1638  template <class LO, class GO, class DT>
1639  class FillLgMap {
1640  public:
1641  FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1642  const GO startGid) :
1643  lgMap_ (lgMap), startGid_ (startGid)
1644  {
1645  Kokkos::RangePolicy<LO, typename DT::execution_space>
1646  range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1647  Kokkos::parallel_for (range, *this);
1648  }
1649 
1650  KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1651  lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1652  }
1653 
1654  private:
1655  const Kokkos::View<GO*, DT> lgMap_;
1656  const GO startGid_;
1657  };
1658 
1659  } // namespace (anonymous)
1660 
1661 
1662  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1663  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1665  {
1666  using std::endl;
1667  using LO = local_ordinal_type;
1668  using GO = global_ordinal_type;
1669  using DT = device_type;
1670  using const_lg_view_type = decltype(lgMap_);
1671  using lg_view_type = typename const_lg_view_type::non_const_type;
1672  const bool debug = Details::Behavior::debug("Map");
1673  const bool verbose = Details::Behavior::verbose("Map");
1674 
1675  std::unique_ptr<std::string> prefix;
1676  if (verbose) {
1677  prefix = Details::createPrefix(
1678  comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1679  std::ostringstream os;
1680  os << *prefix << "Start" << endl;
1681  std::cerr << os.str();
1682  }
1683 
1684  // If the local-to-global mapping doesn't exist yet, and if we
1685  // have local entries, then create and fill the local-to-global
1686  // mapping.
1687  const bool needToCreateLocalToGlobalMapping =
1688  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1689 
1690  if (needToCreateLocalToGlobalMapping) {
1691  if (verbose) {
1692  std::ostringstream os;
1693  os << *prefix << "Need to create lgMap" << endl;
1694  std::cerr << os.str();
1695  }
1696  if (debug) {
1697  // The local-to-global mapping should have been set up already
1698  // for a noncontiguous map.
1699  TEUCHOS_TEST_FOR_EXCEPTION
1700  (! isContiguous(), std::logic_error,
1701  "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1702  "mapping (lgMap_) should have been set up already for a "
1703  "noncontiguous Map. Please report this bug to the Tpetra "
1704  "developers.");
1705  }
1706  const LO numElts = static_cast<LO> (getNodeNumElements ());
1707 
1708  using Kokkos::view_alloc;
1709  using Kokkos::WithoutInitializing;
1710  lg_view_type lgMap ("lgMap", numElts);
1711  if (verbose) {
1712  std::ostringstream os;
1713  os << *prefix << "Fill lgMap" << endl;
1714  std::cerr << os.str();
1715  }
1716  FillLgMap<LO, GO, DT> fillIt (lgMap, minMyGID_);
1717 
1718  if (verbose) {
1719  std::ostringstream os;
1720  os << *prefix << "Copy lgMap to lgMapHost" << endl;
1721  std::cerr << os.str();
1722  }
1723 
1724  Kokkos::fence(); // following create_mirror_view fails with CUDA_LAUNCH_BLOCKING=0 due to above FillLgMap parallel_for
1725 
1726  auto lgMapHost =
1727  Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1728  Kokkos::deep_copy (lgMapHost, lgMap);
1729 
1730  // "Commit" the local-to-global lookup table we filled in above.
1731  lgMap_ = lgMap;
1732  lgMapHost_ = lgMapHost;
1733  }
1734 
1735  if (verbose) {
1736  std::ostringstream os;
1737  os << *prefix << "Done" << endl;
1738  std::cerr << os.str();
1739  }
1740  return lgMap_;
1741  }
1742 
1743  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1744  Teuchos::ArrayView<const GlobalOrdinal>
1746  {
1747  using GO = global_ordinal_type;
1748 
1749  // If the local-to-global mapping doesn't exist yet, and if we
1750  // have local entries, then create and fill the local-to-global
1751  // mapping.
1752  (void) this->getMyGlobalIndices ();
1753 
1754  // This does NOT assume UVM; lgMapHost_ is a host pointer.
1755  const GO* lgMapHostRawPtr = lgMapHost_.data ();
1756  // The third argument forces ArrayView not to try to track memory
1757  // in a debug build. We have to use it because the memory does
1758  // not belong to a Teuchos memory management class.
1759  return Teuchos::ArrayView<const GO>(
1760  lgMapHostRawPtr,
1761  lgMapHost_.extent (0),
1762  Teuchos::RCP_DISABLE_NODE_LOOKUP);
1763  }
1764 
1765  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1767  return distributed_;
1768  }
1769 
1770  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1772  using Teuchos::TypeNameTraits;
1773  std::ostringstream os;
1774 
1775  os << "Tpetra::Map: {"
1776  << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1777  << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1778  << ", NodeType: " << TypeNameTraits<Node>::name ();
1779  if (this->getObjectLabel () != "") {
1780  os << ", Label: \"" << this->getObjectLabel () << "\"";
1781  }
1782  os << ", Global number of entries: " << getGlobalNumElements ()
1783  << ", Number of processes: " << getComm ()->getSize ()
1784  << ", Uniform: " << (isUniform () ? "true" : "false")
1785  << ", Contiguous: " << (isContiguous () ? "true" : "false")
1786  << ", Distributed: " << (isDistributed () ? "true" : "false")
1787  << "}";
1788  return os.str ();
1789  }
1790 
1795  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1796  std::string
1798  localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1799  {
1800  using LO = local_ordinal_type;
1801  using std::endl;
1802 
1803  // This preserves current behavior of Map.
1804  if (vl < Teuchos::VERB_HIGH) {
1805  return std::string ();
1806  }
1807  auto outStringP = Teuchos::rcp (new std::ostringstream ());
1808  Teuchos::RCP<Teuchos::FancyOStream> outp =
1809  Teuchos::getFancyOStream (outStringP);
1810  Teuchos::FancyOStream& out = *outp;
1811 
1812  auto comm = this->getComm ();
1813  const int myRank = comm->getRank ();
1814  const int numProcs = comm->getSize ();
1815  out << "Process " << myRank << " of " << numProcs << ":" << endl;
1816  Teuchos::OSTab tab1 (out);
1817 
1818  const LO numEnt = static_cast<LO> (this->getNodeNumElements ());
1819  out << "My number of entries: " << numEnt << endl
1820  << "My minimum global index: " << this->getMinGlobalIndex () << endl
1821  << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1822 
1823  if (vl == Teuchos::VERB_EXTREME) {
1824  out << "My global indices: [";
1825  const LO minLclInd = this->getMinLocalIndex ();
1826  for (LO k = 0; k < numEnt; ++k) {
1827  out << minLclInd + this->getGlobalElement (k);
1828  if (k + 1 < numEnt) {
1829  out << ", ";
1830  }
1831  }
1832  out << "]" << endl;
1833  }
1834 
1835  out.flush (); // make sure the ostringstream got everything
1836  return outStringP->str ();
1837  }
1838 
1839  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1840  void
1842  describe (Teuchos::FancyOStream &out,
1843  const Teuchos::EVerbosityLevel verbLevel) const
1844  {
1845  using Teuchos::TypeNameTraits;
1846  using Teuchos::VERB_DEFAULT;
1847  using Teuchos::VERB_NONE;
1848  using Teuchos::VERB_LOW;
1849  using Teuchos::VERB_HIGH;
1850  using std::endl;
1851  using LO = local_ordinal_type;
1852  using GO = global_ordinal_type;
1853  const Teuchos::EVerbosityLevel vl =
1854  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1855 
1856  if (vl == VERB_NONE) {
1857  return; // don't print anything
1858  }
1859  // If this Map's Comm is null, then the Map does not participate
1860  // in collective operations with the other processes. In that
1861  // case, it is not even legal to call this method. The reasonable
1862  // thing to do in that case is nothing.
1863  auto comm = this->getComm ();
1864  if (comm.is_null ()) {
1865  return;
1866  }
1867  const int myRank = comm->getRank ();
1868  const int numProcs = comm->getSize ();
1869 
1870  // Only Process 0 should touch the output stream, but this method
1871  // in general may need to do communication. Thus, we may need to
1872  // preserve the current tab level across multiple "if (myRank ==
1873  // 0) { ... }" inner scopes. This is why we sometimes create
1874  // OSTab instances by pointer, instead of by value. We only need
1875  // to create them by pointer if the tab level must persist through
1876  // multiple inner scopes.
1877  Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1878 
1879  if (myRank == 0) {
1880  // At every verbosity level but VERB_NONE, Process 0 prints.
1881  // By convention, describe() always begins with a tab before
1882  // printing.
1883  tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1884  out << "\"Tpetra::Map\":" << endl;
1885  tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1886  {
1887  out << "Template parameters:" << endl;
1888  Teuchos::OSTab tab2 (out);
1889  out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1890  << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1891  << "Node: " << TypeNameTraits<Node>::name () << endl;
1892  }
1893  const std::string label = this->getObjectLabel ();
1894  if (label != "") {
1895  out << "Label: \"" << label << "\"" << endl;
1896  }
1897  out << "Global number of entries: " << getGlobalNumElements () << endl
1898  << "Minimum global index: " << getMinAllGlobalIndex () << endl
1899  << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1900  << "Index base: " << getIndexBase () << endl
1901  << "Number of processes: " << numProcs << endl
1902  << "Uniform: " << (isUniform () ? "true" : "false") << endl
1903  << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1904  << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1905  }
1906 
1907  // This is collective over the Map's communicator.
1908  if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1909  const std::string lclStr = this->localDescribeToString (vl);
1910  Tpetra::Details::gathervPrint (out, lclStr, *comm);
1911  }
1912  }
1913 
1914  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1915  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1917  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1918  {
1919  using Teuchos::RCP;
1920  using Teuchos::rcp;
1921  using GST = global_size_t;
1922  using LO = local_ordinal_type;
1923  using GO = global_ordinal_type;
1924  using map_type = Map<LO, GO, Node>;
1925 
1926  // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1927  // the Map by calling its ordinary public constructor, using the
1928  // original Map's data. This only involves O(1) all-reduces over
1929  // the new communicator, which in the common case only includes a
1930  // small number of processes.
1931 
1932  // Create the Map to return.
1933  if (newComm.is_null () || newComm->getSize () < 1) {
1934  return Teuchos::null; // my process does not participate in the new Map
1935  }
1936  else if (newComm->getSize () == 1) {
1937  // The case where the new communicator has only one process is
1938  // easy. We don't have to communicate to get all the
1939  // information we need. Use the default comm to create the new
1940  // Map, then fill in all the fields directly.
1941  RCP<map_type> newMap (new map_type ());
1942 
1943  newMap->comm_ = newComm;
1944  // mfh 07 Oct 2016: Preserve original behavior, even though the
1945  // original index base may no longer be the globally min global
1946  // index. See #616 for why this doesn't matter so much anymore.
1947  newMap->indexBase_ = this->indexBase_;
1948  newMap->numGlobalElements_ = this->numLocalElements_;
1949  newMap->numLocalElements_ = this->numLocalElements_;
1950  newMap->minMyGID_ = this->minMyGID_;
1951  newMap->maxMyGID_ = this->maxMyGID_;
1952  newMap->minAllGID_ = this->minMyGID_;
1953  newMap->maxAllGID_ = this->maxMyGID_;
1954  newMap->firstContiguousGID_ = this->firstContiguousGID_;
1955  newMap->lastContiguousGID_ = this->lastContiguousGID_;
1956  // Since the new communicator has only one process, neither
1957  // uniformity nor contiguity have changed.
1958  newMap->uniform_ = this->uniform_;
1959  newMap->contiguous_ = this->contiguous_;
1960  // The new communicator only has one process, so the new Map is
1961  // not distributed.
1962  newMap->distributed_ = false;
1963  newMap->lgMap_ = this->lgMap_;
1964  newMap->lgMapHost_ = this->lgMapHost_;
1965  newMap->glMap_ = this->glMap_;
1966  // It's OK not to initialize the new Map's Directory.
1967  // This is initialized lazily, on first call to getRemoteIndexList.
1968 
1969  return newMap;
1970  }
1971  else { // newComm->getSize() != 1
1972  // Even if the original Map is contiguous, the new Map might not
1973  // be, especially if the excluded processes have ranks != 0 or
1974  // newComm->getSize()-1. The common case for this method is to
1975  // exclude many (possibly even all but one) processes, so it
1976  // likely doesn't pay to do the global communication (over the
1977  // original communicator) to figure out whether we can optimize
1978  // the result Map. Thus, we just set up the result Map as
1979  // noncontiguous.
1980  //
1981  // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1982  // the global-to-local table, etc. Optimize this code path to
1983  // avoid unnecessary local work.
1984 
1985  // Make Map (re)compute the global number of elements.
1986  const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
1987  // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1988  // to use the noncontiguous Map constructor, since the new Map
1989  // might not be contiguous. Even if the old Map was contiguous,
1990  // some process in the "middle" might have been excluded. If we
1991  // want to avoid local work, we either have to do the setup by
1992  // hand, or write a new Map constructor.
1993 #if 1
1994  // The disabled code here throws the following exception in
1995  // Map's replaceCommWithSubset test:
1996  //
1997  // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ())
1998  // 10:
1999  // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
2000  // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
2001 
2002  auto lgMap = this->getMyGlobalIndices ();
2003  using size_type =
2004  typename std::decay<decltype (lgMap.extent (0)) >::type;
2005  const size_type lclNumInds =
2006  static_cast<size_type> (this->getNodeNumElements ());
2007  using Teuchos::TypeNameTraits;
2008  TEUCHOS_TEST_FOR_EXCEPTION
2009  (lgMap.extent (0) != lclNumInds, std::logic_error,
2010  "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
2011  "has length " << lgMap.extent (0) << " (of type " <<
2012  TypeNameTraits<size_type>::name () << ") != this->getNodeNumElements()"
2013  " = " << this->getNodeNumElements () << ". The latter, upon being "
2014  "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
2015  "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
2016  "developers.");
2017 #else
2018  Teuchos::ArrayView<const GO> lgMap = this->getNodeElementList ();
2019 #endif // 1
2020 
2021  const GO indexBase = this->getIndexBase ();
2022  return rcp (new map_type (RECOMPUTE, lgMap, indexBase, newComm));
2023  }
2024  }
2025 
2026  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2027  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
2029  removeEmptyProcesses () const
2030  {
2031  using Teuchos::Comm;
2032  using Teuchos::null;
2033  using Teuchos::outArg;
2034  using Teuchos::RCP;
2035  using Teuchos::rcp;
2036  using Teuchos::REDUCE_MIN;
2037  using Teuchos::reduceAll;
2038 
2039  // Create the new communicator. split() returns a valid
2040  // communicator on all processes. On processes where color == 0,
2041  // ignore the result. Passing key == 0 tells MPI to order the
2042  // processes in the new communicator by their rank in the old
2043  // communicator.
2044  const int color = (numLocalElements_ == 0) ? 0 : 1;
2045  // MPI_Comm_split must be called collectively over the original
2046  // communicator. We can't just call it on processes with color
2047  // one, even though we will ignore its result on processes with
2048  // color zero.
2049  RCP<const Comm<int> > newComm = comm_->split (color, 0);
2050  if (color == 0) {
2051  newComm = null;
2052  }
2053 
2054  // Create the Map to return.
2055  if (newComm.is_null ()) {
2056  return null; // my process does not participate in the new Map
2057  } else {
2058  // The default constructor that's useful for clone() above is
2059  // also useful here.
2060  RCP<Map> map = rcp (new Map ());
2061 
2062  map->comm_ = newComm;
2063  map->indexBase_ = indexBase_;
2064  map->numGlobalElements_ = numGlobalElements_;
2065  map->numLocalElements_ = numLocalElements_;
2066  map->minMyGID_ = minMyGID_;
2067  map->maxMyGID_ = maxMyGID_;
2068  map->minAllGID_ = minAllGID_;
2069  map->maxAllGID_ = maxAllGID_;
2070  map->firstContiguousGID_= firstContiguousGID_;
2071  map->lastContiguousGID_ = lastContiguousGID_;
2072 
2073  // Uniformity and contiguity have not changed. The directory
2074  // has changed, but we've taken care of that above.
2075  map->uniform_ = uniform_;
2076  map->contiguous_ = contiguous_;
2077 
2078  // If the original Map was NOT distributed, then the new Map
2079  // cannot be distributed.
2080  //
2081  // If the number of processes in the new communicator is 1, then
2082  // the new Map is not distributed.
2083  //
2084  // Otherwise, we have to check the new Map using an all-reduce
2085  // (over the new communicator). For example, the original Map
2086  // may have had some processes with zero elements, and all other
2087  // processes with the same number of elements as in the whole
2088  // Map. That Map is technically distributed, because of the
2089  // processes with zero elements. Removing those processes would
2090  // make the new Map locally replicated.
2091  if (! distributed_ || newComm->getSize () == 1) {
2092  map->distributed_ = false;
2093  } else {
2094  const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2095  int allProcsOwnAllGids = 0;
2096  reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
2097  map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2098  }
2099 
2100  map->lgMap_ = lgMap_;
2101  map->lgMapHost_ = lgMapHost_;
2102  map->glMap_ = glMap_;
2103 
2104  // Map's default constructor creates an uninitialized Directory.
2105  // The Directory will be initialized on demand in
2106  // getRemoteIndexList().
2107  //
2108  // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2109  // directory more efficiently than just recreating it. If
2110  // directory recreation proves a bottleneck, we can always
2111  // revisit this. On the other hand, Directory creation is only
2112  // collective over the new, presumably much smaller
2113  // communicator, so it may not be worth the effort to optimize.
2114 
2115  return map;
2116  }
2117  }
2118 
2119  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2120  void
2122  {
2123  TEUCHOS_TEST_FOR_EXCEPTION(
2124  directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
2125  "The Directory is null. "
2126  "Please report this bug to the Tpetra developers.");
2127 
2128  // Only create the Directory if it hasn't been created yet.
2129  // This is a collective operation.
2130  if (! directory_->initialized ()) {
2131  directory_->initialize (*this);
2132  }
2133  }
2134 
2135  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2136  LookupStatus
2138  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2139  const Teuchos::ArrayView<int>& PIDs,
2140  const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
2141  {
2142  using Tpetra::Details::OrdinalTraits;
2144  using std::endl;
2145  using size_type = Teuchos::ArrayView<int>::size_type;
2146 
2147  const bool verbose = Details::Behavior::verbose("Map");
2148  const size_t maxNumToPrint = verbose ?
2150  std::unique_ptr<std::string> prefix;
2151  if (verbose) {
2152  prefix = Details::createPrefix(comm_.getRawPtr(),
2153  "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2154  std::ostringstream os;
2155  os << *prefix << "Start: ";
2156  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2157  os << endl;
2158  std::cerr << os.str();
2159  }
2160 
2161  // Empty Maps (i.e., containing no indices on any processes in the
2162  // Map's communicator) are perfectly valid. In that case, if the
2163  // input GID list is nonempty, we fill the output arrays with
2164  // invalid values, and return IDNotPresent to notify the caller.
2165  // It's perfectly valid to give getRemoteIndexList GIDs that the
2166  // Map doesn't own. SubmapImport test 2 needs this functionality.
2167  if (getGlobalNumElements () == 0) {
2168  if (GIDs.size () == 0) {
2169  if (verbose) {
2170  std::ostringstream os;
2171  os << *prefix << "Done; both Map & input are empty" << endl;
2172  std::cerr << os.str();
2173  }
2174  return AllIDsPresent; // trivially
2175  }
2176  else {
2177  if (verbose) {
2178  std::ostringstream os;
2179  os << *prefix << "Done: Map is empty on all processes, "
2180  "so all output PIDs & LIDs are invalid (-1)." << endl;
2181  std::cerr << os.str();
2182  }
2183  for (size_type k = 0; k < PIDs.size (); ++k) {
2184  PIDs[k] = OrdinalTraits<int>::invalid ();
2185  }
2186  for (size_type k = 0; k < LIDs.size (); ++k) {
2187  LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
2188  }
2189  return IDNotPresent;
2190  }
2191  }
2192 
2193  // getRemoteIndexList must be called collectively, and Directory
2194  // initialization is collective too, so it's OK to initialize the
2195  // Directory on demand.
2196 
2197  if (verbose) {
2198  std::ostringstream os;
2199  os << *prefix << "Call setupDirectory" << endl;
2200  std::cerr << os.str();
2201  }
2202  setupDirectory();
2203  if (verbose) {
2204  std::ostringstream os;
2205  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2206  std::cerr << os.str();
2207  }
2208  const Tpetra::LookupStatus retVal =
2209  directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
2210  if (verbose) {
2211  std::ostringstream os;
2212  os << *prefix << "Done; getDirectoryEntries returned "
2213  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2214  << "; ";
2215  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2216  os << ", ";
2217  verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2218  os << endl;
2219  std::cerr << os.str();
2220  }
2221  return retVal;
2222  }
2223 
2224  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2225  LookupStatus
2227  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
2228  const Teuchos::ArrayView<int> & PIDs) const
2229  {
2231  using std::endl;
2232 
2233  const bool verbose = Details::Behavior::verbose("Map");
2234  const size_t maxNumToPrint = verbose ?
2236  std::unique_ptr<std::string> prefix;
2237  if (verbose) {
2238  prefix = Details::createPrefix(comm_.getRawPtr(),
2239  "Map", "getRemoteIndexList(GIDs,PIDs)");
2240  std::ostringstream os;
2241  os << *prefix << "Start: ";
2242  verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2243  os << endl;
2244  std::cerr << os.str();
2245  }
2246 
2247  if (getGlobalNumElements () == 0) {
2248  if (GIDs.size () == 0) {
2249  if (verbose) {
2250  std::ostringstream os;
2251  os << *prefix << "Done; both Map & input are empty" << endl;
2252  std::cerr << os.str();
2253  }
2254  return AllIDsPresent; // trivially
2255  }
2256  else {
2257  if (verbose) {
2258  std::ostringstream os;
2259  os << *prefix << "Done: Map is empty on all processes, "
2260  "so all output PIDs are invalid (-1)." << endl;
2261  std::cerr << os.str();
2262  }
2263  for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
2264  PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
2265  }
2266  return IDNotPresent;
2267  }
2268  }
2269 
2270  // getRemoteIndexList must be called collectively, and Directory
2271  // initialization is collective too, so it's OK to initialize the
2272  // Directory on demand.
2273 
2274  if (verbose) {
2275  std::ostringstream os;
2276  os << *prefix << "Call setupDirectory" << endl;
2277  std::cerr << os.str();
2278  }
2279  setupDirectory();
2280  if (verbose) {
2281  std::ostringstream os;
2282  os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2283  std::cerr << os.str();
2284  }
2285  const Tpetra::LookupStatus retVal =
2286  directory_->getDirectoryEntries(*this, GIDs, PIDs);
2287  if (verbose) {
2288  std::ostringstream os;
2289  os << *prefix << "Done; getDirectoryEntries returned "
2290  << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2291  << "; ";
2292  verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2293  os << endl;
2294  std::cerr << os.str();
2295  }
2296  return retVal;
2297  }
2298 
2299  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2300  Teuchos::RCP<const Teuchos::Comm<int> >
2302  return comm_;
2303  }
2304 
2305  template <class LocalOrdinal,class GlobalOrdinal, class Node>
2306  bool
2308  checkIsDist() const
2309  {
2310  using Teuchos::as;
2311  using Teuchos::outArg;
2312  using Teuchos::REDUCE_MIN;
2313  using Teuchos::reduceAll;
2314  using std::endl;
2315 
2316  const bool verbose = Details::Behavior::verbose("Map");
2317  std::unique_ptr<std::string> prefix;
2318  if (verbose) {
2319  prefix = Details::createPrefix(
2320  comm_.getRawPtr(), "Map", "checkIsDist");
2321  std::ostringstream os;
2322  os << *prefix << "Start" << endl;
2323  std::cerr << os.str();
2324  }
2325 
2326  bool global = false;
2327  if (comm_->getSize () > 1) {
2328  // The communicator has more than one process, but that doesn't
2329  // necessarily mean the Map is distributed.
2330  int localRep = 0;
2331  if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
2332  // The number of local elements on this process equals the
2333  // number of global elements.
2334  //
2335  // NOTE (mfh 22 Nov 2011) Does this still work if there were
2336  // duplicates in the global ID list on input (the third Map
2337  // constructor), so that the number of local elements (which
2338  // are not duplicated) on this process could be less than the
2339  // number of global elements, even if this process owns all
2340  // the elements?
2341  localRep = 1;
2342  }
2343  int allLocalRep;
2344  reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2345  if (allLocalRep != 1) {
2346  // At least one process does not own all the elements.
2347  // This makes the Map a distributed Map.
2348  global = true;
2349  }
2350  }
2351  // If the communicator has only one process, then the Map is not
2352  // distributed.
2353 
2354  if (verbose) {
2355  std::ostringstream os;
2356  os << *prefix << "Done; global=" << (global ? "true" : "false")
2357  << endl;
2358  std::cerr << os.str();
2359  }
2360  return global;
2361  }
2362 
2363 } // namespace Tpetra
2364 
2365 template <class LocalOrdinal, class GlobalOrdinal>
2366 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2367 Tpetra::createLocalMap (const size_t numElements,
2368  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2369 {
2370  typedef LocalOrdinal LO;
2371  typedef GlobalOrdinal GO;
2372  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2373  return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2374 }
2375 
2376 template <class LocalOrdinal, class GlobalOrdinal>
2377 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2378 Tpetra::createUniformContigMap (const global_size_t numElements,
2379  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2380 {
2381  typedef LocalOrdinal LO;
2382  typedef GlobalOrdinal GO;
2383  using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2384  return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2385 }
2386 
2387 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2388 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2389 Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2390  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2391 )
2392 {
2393  using Teuchos::rcp;
2395  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2396 
2397  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2398 }
2399 
2400 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2401 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2402 Tpetra::createLocalMapWithNode (const size_t numElements,
2403  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2404 )
2405 {
2406  using Tpetra::global_size_t;
2407  using Teuchos::rcp;
2409  const GlobalOrdinal indexBase = 0;
2410  const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2411 
2412  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2413 }
2414 
2415 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2416 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2418  const size_t localNumElements,
2419  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2420 )
2421 {
2422  using Teuchos::rcp;
2424  const GlobalOrdinal indexBase = 0;
2425 
2426  return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2427 }
2428 
2429 template <class LocalOrdinal, class GlobalOrdinal>
2430 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2432  const size_t localNumElements,
2433  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2434 {
2435  typedef LocalOrdinal LO;
2436  typedef GlobalOrdinal GO;
2437  using NT = typename Tpetra::Map<LO, GO>::node_type;
2438 
2439  return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2440 }
2441 
2442 template <class LocalOrdinal, class GlobalOrdinal>
2443 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2444 Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2445  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2446 {
2447  typedef LocalOrdinal LO;
2448  typedef GlobalOrdinal GO;
2449  using NT = typename Tpetra::Map<LO, GO>::node_type;
2450 
2451  return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2452 }
2453 
2454 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2455 Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2456 Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2457  const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2458 )
2459 {
2460  using Teuchos::rcp;
2462  using GST = Tpetra::global_size_t;
2463  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2464  // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2465  // shouldn't be zero, given that the index base is supposed to equal
2466  // the globally min global index?
2467  const GlobalOrdinal indexBase = 0;
2468 
2469  return rcp (new map_type (INV, elementList, indexBase, comm));
2470 }
2471 
2472 template<class LO, class GO, class NT>
2473 Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2474 Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2475 {
2477  using Teuchos::Array;
2478  using Teuchos::ArrayView;
2479  using Teuchos::as;
2480  using Teuchos::rcp;
2481  using std::cerr;
2482  using std::endl;
2483  using map_type = Tpetra::Map<LO, GO, NT>;
2484  using GST = global_size_t;
2485 
2486  const bool verbose = Details::Behavior::verbose("Map");
2487  std::unique_ptr<std::string> prefix;
2488  if (verbose) {
2489  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2490  prefix = Details::createPrefix(
2491  comm.getRawPtr(), "createOneToOne(Map)");
2492  std::ostringstream os;
2493  os << *prefix << "Start" << endl;
2494  cerr << os.str();
2495  }
2496  const size_t maxNumToPrint = verbose ?
2497  Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2498  const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2499  const int myRank = M->getComm ()->getRank ();
2500 
2501  // Bypasses for special cases where either M is known to be
2502  // one-to-one, or the one-to-one version of M is easy to compute.
2503  // This is why we take M as an RCP, not as a const reference -- so
2504  // that we can return M itself if it is 1-to-1.
2505  if (! M->isDistributed ()) {
2506  // For a locally replicated Map, we assume that users want to push
2507  // all the GIDs to Process 0.
2508 
2509  // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2510  // you think it should return, in this special case of a locally
2511  // replicated contiguous Map.
2512  const GST numGlobalEntries = M->getGlobalNumElements ();
2513  if (M->isContiguous()) {
2514  const size_t numLocalEntries =
2515  (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2516  if (verbose) {
2517  std::ostringstream os;
2518  os << *prefix << "Input is locally replicated & contiguous; "
2519  "numLocalEntries=" << numLocalEntries << endl;
2520  cerr << os.str ();
2521  }
2522  auto retMap =
2523  rcp(new map_type(numGlobalEntries, numLocalEntries,
2524  M->getIndexBase(), M->getComm()));
2525  if (verbose) {
2526  std::ostringstream os;
2527  os << *prefix << "Done" << endl;
2528  cerr << os.str ();
2529  }
2530  return retMap;
2531  }
2532  else {
2533  if (verbose) {
2534  std::ostringstream os;
2535  os << *prefix << "Input is locally replicated & noncontiguous"
2536  << endl;
2537  cerr << os.str ();
2538  }
2539  ArrayView<const GO> myGids =
2540  (myRank == 0) ? M->getNodeElementList() : Teuchos::null;
2541  auto retMap =
2542  rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2543  M->getComm()));
2544  if (verbose) {
2545  std::ostringstream os;
2546  os << *prefix << "Done" << endl;
2547  cerr << os.str ();
2548  }
2549  return retMap;
2550  }
2551  }
2552  else if (M->isContiguous ()) {
2553  if (verbose) {
2554  std::ostringstream os;
2555  os << *prefix << "Input is distributed & contiguous" << endl;
2556  cerr << os.str ();
2557  }
2558  // Contiguous, distributed Maps are one-to-one by construction.
2559  // (Locally replicated Maps can be contiguous.)
2560  return M;
2561  }
2562  else {
2563  if (verbose) {
2564  std::ostringstream os;
2565  os << *prefix << "Input is distributed & noncontiguous" << endl;
2566  cerr << os.str ();
2567  }
2569  const size_t numMyElems = M->getNodeNumElements ();
2570  ArrayView<const GO> myElems = M->getNodeElementList ();
2571  Array<int> owner_procs_vec (numMyElems);
2572 
2573  if (verbose) {
2574  std::ostringstream os;
2575  os << *prefix << "Call Directory::getDirectoryEntries: ";
2576  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2577  os << endl;
2578  cerr << os.str();
2579  }
2580  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2581  if (verbose) {
2582  std::ostringstream os;
2583  os << *prefix << "getDirectoryEntries result: ";
2584  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2585  os << endl;
2586  cerr << os.str();
2587  }
2588 
2589  Array<GO> myOwned_vec (numMyElems);
2590  size_t numMyOwnedElems = 0;
2591  for (size_t i = 0; i < numMyElems; ++i) {
2592  const GO GID = myElems[i];
2593  const int owner = owner_procs_vec[i];
2594 
2595  if (myRank == owner) {
2596  myOwned_vec[numMyOwnedElems++] = GID;
2597  }
2598  }
2599  myOwned_vec.resize (numMyOwnedElems);
2600 
2601  if (verbose) {
2602  std::ostringstream os;
2603  os << *prefix << "Create Map: ";
2604  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2605  os << endl;
2606  cerr << os.str();
2607  }
2608  auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2609  M->getIndexBase(), M->getComm()));
2610  if (verbose) {
2611  std::ostringstream os;
2612  os << *prefix << "Done" << endl;
2613  cerr << os.str();
2614  }
2615  return retMap;
2616  }
2617 }
2618 
2619 template<class LocalOrdinal, class GlobalOrdinal, class Node>
2620 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2623 {
2624  using Details::Behavior;
2626  using Teuchos::Array;
2627  using Teuchos::ArrayView;
2628  using Teuchos::RCP;
2629  using Teuchos::rcp;
2630  using Teuchos::toString;
2631  using std::cerr;
2632  using std::endl;
2633  using LO = LocalOrdinal;
2634  using GO = GlobalOrdinal;
2635  using map_type = Tpetra::Map<LO, GO, Node>;
2636 
2637  const bool verbose = Behavior::verbose("Map");
2638  std::unique_ptr<std::string> prefix;
2639  if (verbose) {
2640  auto comm = M.is_null() ? Teuchos::null : M->getComm();
2641  prefix = Details::createPrefix(
2642  comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2643  std::ostringstream os;
2644  os << *prefix << "Start" << endl;
2645  cerr << os.str();
2646  }
2647  const size_t maxNumToPrint = verbose ?
2648  Behavior::verbosePrintCountThreshold() : size_t(0);
2649 
2650  // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2651  // Maps (which are 1-to-1 by construction).
2652 
2654  if (verbose) {
2655  std::ostringstream os;
2656  os << *prefix << "Initialize Directory" << endl;
2657  cerr << os.str ();
2658  }
2659  directory.initialize (*M, tie_break);
2660  if (verbose) {
2661  std::ostringstream os;
2662  os << *prefix << "Done initializing Directory" << endl;
2663  cerr << os.str ();
2664  }
2665  size_t numMyElems = M->getNodeNumElements ();
2666  ArrayView<const GO> myElems = M->getNodeElementList ();
2667  Array<int> owner_procs_vec (numMyElems);
2668  if (verbose) {
2669  std::ostringstream os;
2670  os << *prefix << "Call Directory::getDirectoryEntries: ";
2671  verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2672  os << endl;
2673  cerr << os.str();
2674  }
2675  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2676  if (verbose) {
2677  std::ostringstream os;
2678  os << *prefix << "getDirectoryEntries result: ";
2679  verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2680  os << endl;
2681  cerr << os.str();
2682  }
2683 
2684  const int myRank = M->getComm()->getRank();
2685  Array<GO> myOwned_vec (numMyElems);
2686  size_t numMyOwnedElems = 0;
2687  for (size_t i = 0; i < numMyElems; ++i) {
2688  const GO GID = myElems[i];
2689  const int owner = owner_procs_vec[i];
2690  if (myRank == owner) {
2691  myOwned_vec[numMyOwnedElems++] = GID;
2692  }
2693  }
2694  myOwned_vec.resize (numMyOwnedElems);
2695 
2696  // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2697  // valid for the new Map. Why can't we reuse it?
2698  const global_size_t GINV =
2699  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2700  if (verbose) {
2701  std::ostringstream os;
2702  os << *prefix << "Create Map: ";
2703  verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2704  os << endl;
2705  cerr << os.str();
2706  }
2707  RCP<const map_type> retMap
2708  (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2709  M->getComm ()));
2710  if (verbose) {
2711  std::ostringstream os;
2712  os << *prefix << "Done" << endl;
2713  cerr << os.str();
2714  }
2715  return retMap;
2716 }
2717 
2718 //
2719 // Explicit instantiation macro
2720 //
2721 // Must be expanded from within the Tpetra namespace!
2722 //
2723 
2725 
2726 #define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2727  \
2728  template class Map< LO , GO , NODE >; \
2729  \
2730  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2731  createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2732  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2733  \
2734  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2735  createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2736  const size_t localNumElements, \
2737  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2738  \
2739  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2740  createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2741  const Teuchos::RCP<const Teuchos::Comm<int> > &comm); \
2742  \
2743  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2744  createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2745  const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2746  \
2747  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2748  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2749  \
2750  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2751  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2752  const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2753 
2754 
2756 #define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2757  template Teuchos::RCP< const Map<LO,GO> > \
2758  createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2759  \
2760  template Teuchos::RCP< const Map<LO,GO> > \
2761  createContigMap<LO,GO>( global_size_t, size_t, \
2762  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2763  \
2764  template Teuchos::RCP< const Map<LO,GO> > \
2765  createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2766  const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2767  \
2768  template Teuchos::RCP< const Map<LO,GO> > \
2769  createUniformContigMap<LO,GO>( const global_size_t, \
2770  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2771 
2772 #endif // TPETRA_MAP_DEF_HPP
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::initializeKokkos.
Declaration of Tpetra::Details::printOnce.
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
Interface for breaking ties in ownership.
Implement mapping from global ID to process ID and local ID.
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
void initialize(const map_type &map)
Initialize the Directory with its Map.
A parallel distribution of indices over processes.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
bool isOneToOne() const
Whether the Map is one to one.
std::string description() const
Implementation of Teuchos::Describable.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Node node_type
Legacy typedef that will go away at some point.
GlobalOrdinal global_ordinal_type
The type of global indices.
Teuchos::ArrayView< const global_ordinal_type > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
typename device_type::execution_space execution_space
The Kokkos execution space.
LO local_ordinal_type
The type of local indices.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
global_ordinal_type getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
global_ordinal_type getIndexBase() const
The index base for this Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
global_ordinal_type getMinGlobalIndex() const
The minimum global index owned by the calling process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map's communicator with a subset communicator.
global_ordinal_type getMaxGlobalIndex() const
The maximum global index owned by the calling process.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
typename Node::device_type device_type
This class' Kokkos::Device specialization.
Implementation details of Tpetra.
bool pointerAccessibleFromExecutionSpace(const void *ptr, const ExecutionSpace &space)
Is the given nonnull ptr accessible from the given execution space?
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
Definition: Tpetra_Util.cpp:64
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
std::string memorySpaceName(const void *ptr)
Return the Kokkos memory space name (without "Kokkos::") corresponding to the given nonnull pointer.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
LocalGlobal
Enum for local versus global allocation of Map entries.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...