Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_makeColMap_def.hpp
Go to the documentation of this file.
1 #ifndef TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
2 #define TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
3 
4 // @HEADER
5 // ***********************************************************************
6 //
7 // Tpetra: Templated Linear Algebra Services Package
8 // Copyright (2008) Sandia Corporation
9 //
10 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
11 // the U.S. Government retains certain rights in this software.
12 //
13 // Redistribution and use in source and binary forms, with or without
14 // modification, are permitted provided that the following conditions are
15 // met:
16 //
17 // 1. Redistributions of source code must retain the above copyright
18 // notice, this list of conditions and the following disclaimer.
19 //
20 // 2. Redistributions in binary form must reproduce the above copyright
21 // notice, this list of conditions and the following disclaimer in the
22 // documentation and/or other materials provided with the distribution.
23 //
24 // 3. Neither the name of the Corporation nor the names of the
25 // contributors may be used to endorse or promote products derived from
26 // this software without specific prior written permission.
27 //
28 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
29 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
32 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
35 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
36 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
37 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
38 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 //
40 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
41 //
42 // ************************************************************************
43 // @HEADER
44 
55 
56 #include "Tpetra_RowGraph.hpp"
57 #include "Tpetra_Util.hpp"
58 #include "Teuchos_Array.hpp"
59 #include "Kokkos_Bitset.hpp"
60 #include <set>
61 #include <vector>
62 
63 namespace Tpetra {
64 namespace Details {
65 
66 template <class LO, class GO, class NT>
67 int
68 makeColMapImpl(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
69  Teuchos::Array<int>& remotePIDs,
70  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
71  size_t numLocalColGIDs,
72  size_t numRemoteColGIDs,
73  std::set<GO>& RemoteGIDSet,
74  std::vector<GO>& RemoteGIDUnorderedVector,
75  std::vector<bool>& GIDisLocal,
76  const bool sortEachProcsGids,
77  std::ostream* errStrm)
78 {
79  using Teuchos::Array;
80  using Teuchos::ArrayView;
81  using Teuchos::rcp;
82  using std::endl;
83  int errCode = 0;
84  const char prefix[] = "Tpetra::Details::makeColMapImpl: ";
85  typedef ::Tpetra::Map<LO, GO, NT> map_type;
86  // Possible short-circuit for serial scenario:
87  //
88  // If all domain GIDs are present as column indices, then set
89  // ColMap=DomainMap. By construction, LocalGIDs is a subset of
90  // DomainGIDs.
91  //
92  // If we have
93  // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs,
94  // and
95  // * Number of local GIDs is number of domain GIDs
96  // then
97  // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) ==
98  // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs
99  // on the calling process.
100  //
101  // We will concern ourselves only with the special case of a
102  // serial DomainMap, obviating the need for communication.
103  //
104  // If
105  // * DomainMap has a serial communicator
106  // then we can set the column Map as the domain Map
107  // return. Benefit: this graph won't need an Import object
108  // later.
109  //
110  // Note, for a serial domain map, there can be no RemoteGIDs,
111  // because there are no remote processes. Likely explanations
112  // for this are:
113  // * user submitted erroneous column indices
114  // * user submitted erroneous domain Map
115  if (domMap->getComm ()->getSize () == 1) {
116  if (numRemoteColGIDs != 0) {
117  errCode = -2;
118  if (errStrm != NULL) {
119  *errStrm << prefix << "The domain Map only has one process, but "
120  << numRemoteColGIDs << " column "
121  << (numRemoteColGIDs != 1 ? "indices are" : "index is")
122  << " not in the domain Map. Either these indices are "
123  "invalid or the domain Map is invalid. Remember that nonsquare "
124  "matrices, or matrices where the row and range Maps differ, "
125  "require calling the version of fillComplete that takes the "
126  "domain and range Maps as input." << endl;
127  }
128  }
129  if (numLocalColGIDs == domMap->getNodeNumElements ()) {
130  colMap = domMap; // shallow copy
131  return errCode;
132  }
133  }
134 
135  // Populate myColumns with a list of all column GIDs. Put
136  // locally owned (in the domain Map) GIDs at the front: they
137  // correspond to "same" and "permuted" entries between the
138  // column Map and the domain Map. Put remote GIDs at the back.
139  Array<GO> myColumns(numLocalColGIDs + numRemoteColGIDs);
140  // get pointers into myColumns for each part
141  ArrayView<GO> LocalColGIDs = myColumns (0, numLocalColGIDs);
142  ArrayView<GO> remoteColGIDs = myColumns (numLocalColGIDs, numRemoteColGIDs);
143 
144  // Copy the remote GIDs into myColumns
145  if (sortEachProcsGids) {
146  // The std::set puts GIDs in increasing order.
147  std::copy (RemoteGIDSet.begin(), RemoteGIDSet.end(),
148  remoteColGIDs.begin());
149  }
150  else {
151  // Respect the originally encountered order.
152  std::copy (RemoteGIDUnorderedVector.begin(),
153  RemoteGIDUnorderedVector.end(), remoteColGIDs.begin());
154  }
155 
156  // Make a list of process ranks corresponding to the remote GIDs.
157  // remotePIDs is an output argument of getRemoteIndexList below;
158  // its initial contents don't matter.
159  if (static_cast<size_t> (remotePIDs.size ()) != numRemoteColGIDs) {
160  remotePIDs.resize (numRemoteColGIDs);
161  }
162  // Look up the remote process' ranks in the domain Map.
163  {
164  const LookupStatus stat =
165  domMap->getRemoteIndexList (remoteColGIDs, remotePIDs ());
166 
167  // If any process returns IDNotPresent, then at least one of
168  // the remote indices was not present in the domain Map. This
169  // means that the Import object cannot be constructed, because
170  // of incongruity between the column Map and domain Map.
171  // This has two likely causes:
172  // - The user has made a mistake in the column indices
173  // - The user has made a mistake with respect to the domain Map
174  if (stat == IDNotPresent) {
175  if (errStrm != NULL) {
176  *errStrm << prefix << "Some column indices are not in the domain Map."
177  "Either these column indices are invalid or the domain Map is "
178  "invalid. Likely cause: For a nonsquare matrix, you must give the "
179  "domain and range Maps as input to fillComplete." << endl;
180  }
181  // Don't return yet, because not all processes may have
182  // encountered this error state. This function ends with an
183  // all-reduce, so we have to make sure that everybody gets to
184  // that point. The resulting Map may be wrong, but at least
185  // nothing should crash.
186  errCode = -3;
187  }
188  }
189 
190  // Sort incoming remote column indices by their owning process
191  // rank, so that all columns coming from a given remote process
192  // are contiguous. This means the Import's Distributor doesn't
193  // need to reorder data.
194  //
195  // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so that
196  // it respects either of the possible orderings of GIDs (sorted,
197  // or original order) specified above.
198  sort2 (remotePIDs.begin (), remotePIDs.end (), remoteColGIDs.begin ());
199 
200  // Copy the local GIDs into myColumns. Two cases:
201  // 1. If the number of Local column GIDs is the same as the number
202  // of Local domain GIDs, we can simply read the domain GIDs
203  // into the front part of ColIndices (see logic above from the
204  // serial short circuit case)
205  // 2. We step through the GIDs of the DomainMap, checking to see
206  // if each domain GID is a column GID. We want to do this to
207  // maintain a consistent ordering of GIDs between the columns
208  // and the domain.
209 
210  const size_t numDomainElts = domMap->getNodeNumElements ();
211  if (numLocalColGIDs == numDomainElts) {
212  // If the number of locally owned GIDs are the same as the
213  // number of local domain Map elements, then the local domain
214  // Map elements are the same as the locally owned GIDs.
215  if (domMap->isContiguous ()) {
216  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
217  // the domain Map is contiguous, it's more efficient to avoid
218  // calling getNodeElementList(), since that permanently
219  // constructs and caches the GID list in the contiguous Map.
220  GO curColMapGid = domMap->getMinGlobalIndex ();
221  for (size_t k = 0; k < numLocalColGIDs; ++k, ++curColMapGid) {
222  LocalColGIDs[k] = curColMapGid;
223  }
224  }
225  else {
226  ArrayView<const GO> domainElts = domMap->getNodeElementList ();
227  std::copy (domainElts.begin(), domainElts.end(), LocalColGIDs.begin());
228  }
229  }
230  else {
231  // Count the number of locally owned GIDs, both to keep track of
232  // the current array index, and as a sanity check.
233  size_t numLocalCount = 0;
234  if (domMap->isContiguous ()) {
235  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
236  // the domain Map is contiguous, it's more efficient to avoid
237  // calling getNodeElementList(), since that permanently
238  // constructs and caches the GID list in the contiguous Map.
239  GO curColMapGid = domMap->getMinGlobalIndex ();
240  for (size_t i = 0; i < numDomainElts; ++i, ++curColMapGid) {
241  if (GIDisLocal[i]) {
242  LocalColGIDs[numLocalCount++] = curColMapGid;
243  }
244  }
245  }
246  else {
247  ArrayView<const GO> domainElts = domMap->getNodeElementList ();
248  for (size_t i = 0; i < numDomainElts; ++i) {
249  if (GIDisLocal[i]) {
250  LocalColGIDs[numLocalCount++] = domainElts[i];
251  }
252  }
253  }
254  if (numLocalCount != numLocalColGIDs) {
255  if (errStrm != NULL) {
256  *errStrm << prefix << "numLocalCount = " << numLocalCount
257  << " != numLocalColGIDs = " << numLocalColGIDs
258  << ". This should never happen. "
259  "Please report this bug to the Tpetra developers." << endl;
260  }
261  // Don't return yet, because not all processes may have
262  // encountered this error state. This function ends with an
263  // all-reduce, so we have to make sure that everybody gets to
264  // that point.
265  errCode = -4;
266  }
267  }
268 
269  // FIXME (mfh 03 Apr 2013) Now would be a good time to use the
270  // information we collected above to construct the Import. In
271  // particular, building an Import requires:
272  //
273  // 1. numSameIDs (length of initial contiguous sequence of GIDs
274  // on this process that are the same in both Maps; this
275  // equals the number of domain Map elements on this process)
276  //
277  // 2. permuteToLIDs and permuteFromLIDs (both empty in this
278  // case, since there's no permutation going on; the column
279  // Map starts with the domain Map's GIDs, and immediately
280  // after them come the remote GIDs)
281  //
282  // 3. remoteGIDs (exactly those GIDs that we found out above
283  // were not in the domain Map) and remoteLIDs (which we could
284  // have gotten above by using the three-argument version of
285  // getRemoteIndexList() that computes local indices as well
286  // as process ranks, instead of the two-argument version that
287  // was used above)
288  //
289  // 4. remotePIDs (which we have from the getRemoteIndexList() call
290  // above) -- NOTE (mfh 14 Sep 2017) Fix for Trilinos GitHub
291  // Issue #628 (https://github.com/trilinos/Trilinos/issues/628)
292  // addresses this. remotePIDs is now an output argument of
293  // both this function and CrsGraph::makeColMap, and
294  // CrsGraph::makeImportExport now has an option to use that
295  // information, if available (that is, if makeColMap was
296  // actually called, which it would not be if the graph already
297  // had a column Map).
298  //
299  // 5. Apply the permutation from sorting remotePIDs to both
300  // remoteGIDs and remoteLIDs (by calling sort3 above instead of
301  // sort2), instead of just to remoteLIDs alone.
302  //
303  // 6. Everything after the sort3 call in Import::setupExport():
304  // a. Create the Distributor via createFromRecvs(), which
305  // computes exportGIDs and exportPIDs
306  // b. Compute exportLIDs from exportGIDs (by asking the
307  // source Map, in this case the domain Map, to convert
308  // global to local)
309  //
310  // Steps 1-5 come for free, since we must do that work anyway in
311  // order to compute the column Map. In particular, Step 3 is
312  // even more expensive than Step 6a, since it involves both
313  // creating and using a new Distributor object.
314  const global_size_t INV =
315  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
316  // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
317  // be the same as the Map's min GID? If the first column is empty
318  // (contains no entries), then the column Map's min GID won't
319  // necessarily be the same as the domain Map's index base.
320  const GO indexBase = domMap->getIndexBase ();
321  colMap = rcp (new map_type (INV, myColumns, indexBase, domMap->getComm ()));
322  return errCode;
323 }
324 
325 template <class LO, class GO, class NT>
326 int
327 makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& colMap,
328  Teuchos::Array<int>& remotePIDs,
329  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& domMap,
330  const RowGraph<LO, GO, NT>& graph,
331  const bool sortEachProcsGids,
332  std::ostream* errStrm)
333 {
334  using Teuchos::Array;
335  using Teuchos::ArrayView;
336  using Teuchos::rcp;
337  using std::endl;
338  typedef ::Tpetra::Map<LO, GO, NT> map_type;
339  const char prefix[] = "Tpetra::Details::makeColMap: ";
340  int errCode = 0;
341 
342  // If the input domain Map or its communicator is null on the
343  // calling process, then the calling process does not participate in
344  // the returned column Map. Thus, we can set the returned column
345  // Map to null on those processes, and return immediately. This is
346  // not an error condition, as long as when domMap and its
347  // communicator are NOT null, the graph's other Maps and
348  // communicator are not also null.
349  if (domMap.is_null () || domMap->getComm ().is_null ()) {
350  colMap = Teuchos::null;
351  return errCode;
352  }
353 
354  Array<GO> myColumns;
355  if (graph.isLocallyIndexed ()) {
356  colMap = graph.getColMap ();
357  // If the graph is locally indexed, it had better have a column Map.
358  // The extra check for ! graph.hasColMap() is conservative.
359  if (colMap.is_null () || ! graph.hasColMap ()) {
360  errCode = -1;
361  if (errStrm != NULL) {
362  *errStrm << prefix << "The graph is locally indexed on the calling "
363  "process, but has no column Map (either getColMap() returns null, "
364  "or hasColMap() returns false)." << endl;
365  }
366  // Under this error condition, this process will not fill
367  // myColumns. The resulting new column Map will be incorrect,
368  // but at least we won't hang, and this process will report the
369  // error.
370  }
371  else {
372  // The graph already has a column Map, and is locally indexed on
373  // the calling process. However, it may be globally indexed (or
374  // neither locally nor globally indexed) on other processes.
375  // Assume that we want to recreate the column Map.
376  if (colMap->isContiguous ()) {
377  // The number of indices on each process must fit in LO.
378  const LO numCurGids = static_cast<LO> (colMap->getNodeNumElements ());
379  myColumns.resize (numCurGids);
380  const GO myFirstGblInd = colMap->getMinGlobalIndex ();
381  for (LO k = 0; k < numCurGids; ++k) {
382  myColumns[k] = myFirstGblInd + static_cast<GO> (k);
383  }
384  }
385  else { // the column Map is NOT contiguous
386  ArrayView<const GO> curGids = graph.getColMap ()->getNodeElementList ();
387  // The number of indices on each process must fit in LO.
388  const LO numCurGids = static_cast<LO> (curGids.size ());
389  myColumns.resize (numCurGids);
390  for (LO k = 0; k < numCurGids; ++k) {
391  myColumns[k] = curGids[k];
392  }
393  } // whether the graph's current column Map is contiguous
394  } // does the graph currently have a column Map?
395  }
396  else if (graph.isGloballyIndexed ()) {
397  // Go through all the rows, finding the populated column indices.
398  //
399  // Our final list of indices for the column Map constructor will
400  // have the following properties (all of which are with respect to
401  // the calling process):
402  //
403  // 1. Indices in the domain Map go first.
404  // 2. Indices not in the domain Map follow, ordered first
405  // contiguously by their owning process rank (in the domain
406  // Map), then in increasing order within that.
407  // 3. No duplicate indices.
408  //
409  // This imitates the ordering used by Aztec(OO) and Epetra.
410  // Storing indices owned by the same process (in the domain Map)
411  // contiguously permits the use of contiguous send and receive
412  // buffers.
413  //
414  // We begin by partitioning the column indices into "local" GIDs
415  // (owned by the domain Map) and "remote" GIDs (not owned by the
416  // domain Map). We use the same order for local GIDs as the
417  // domain Map, so we track them in place in their array. We use
418  // an std::set (RemoteGIDSet) to keep track of remote GIDs, so
419  // that we don't have to merge duplicates later.
420  const LO LINV = Tpetra::Details::OrdinalTraits<LO>::invalid ();
421  size_t numLocalColGIDs = 0;
422  size_t numRemoteColGIDs = 0;
423 
424  // GIDisLocal[lid] is false if and only if local index lid in the
425  // domain Map is remote (not local).
426  std::vector<bool> GIDisLocal (domMap->getNodeNumElements (), false);
427  std::set<GO> RemoteGIDSet;
428  // This preserves the not-sorted Epetra order of GIDs.
429  // We only use this if sortEachProcsGids is false.
430  std::vector<GO> RemoteGIDUnorderedVector;
431 
432  if (! graph.getRowMap ().is_null ()) {
433  const Tpetra::Map<LO, GO, NT>& rowMap = * (graph.getRowMap ());
434  const LO lclNumRows = rowMap.getNodeNumElements ();
435  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
436  const GO gblRow = rowMap.getGlobalElement (lclRow);
437  typename RowGraph<LO,GO,NT>::global_inds_host_view_type rowGids;
438  graph.getGlobalRowView (gblRow, rowGids);
439 
440  const LO numEnt = static_cast<LO> (rowGids.size ());
441  if (numEnt != 0) {
442  for (LO k = 0; k < numEnt; ++k) {
443  const GO gid = rowGids[k];
444  const LO lid = domMap->getLocalElement (gid);
445  if (lid != LINV) {
446  const bool alreadyFound = GIDisLocal[lid];
447  if (! alreadyFound) {
448  GIDisLocal[lid] = true;
449  ++numLocalColGIDs;
450  }
451  }
452  else {
453  const bool notAlreadyFound = RemoteGIDSet.insert (gid).second;
454  if (notAlreadyFound) { // gid did not exist in the set before
455  if (! sortEachProcsGids) {
456  // The user doesn't want to sort remote GIDs (for each
457  // remote process); they want us to keep remote GIDs
458  // in their original order. We do this by stuffing
459  // each remote GID into an array as we encounter it
460  // for the first time. The std::set helpfully tracks
461  // first encounters.
462  RemoteGIDUnorderedVector.push_back (gid);
463  }
464  ++numRemoteColGIDs;
465  }
466  }
467  } // for each entry k in row r
468  } // if row r contains > 0 entries
469  } // for each locally owned row r
470  } // if the graph has a nonnull row Map
471 
472  return makeColMapImpl<LO, GO, NT>(
473  colMap, remotePIDs,
474  domMap,
475  numLocalColGIDs, numRemoteColGIDs,
476  RemoteGIDSet, RemoteGIDUnorderedVector, GIDisLocal,
477  sortEachProcsGids, errStrm);
478 
479  } // if the graph is globally indexed
480  else {
481  // If we reach this point, the graph is neither locally nor
482  // globally indexed. Thus, the graph is empty on this process
483  // (per the usual legacy Petra convention), so myColumns will be
484  // left empty.
485  ; // do nothing
486  }
487 
488  const global_size_t INV =
489  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
490  // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
491  // be the same as the Map's min GID? If the first column is empty
492  // (contains no entries), then the column Map's min GID won't
493  // necessarily be the same as the domain Map's index base.
494  const GO indexBase = domMap->getIndexBase ();
495  colMap = rcp (new map_type (INV, myColumns, indexBase, domMap->getComm ()));
496  return errCode;
497 }
498 
499 template<typename GOView, typename bitset_t>
500 struct GatherPresentEntries
501 {
502  using GO = typename GOView::non_const_value_type;
503 
504  GatherPresentEntries(GO minGID_, const GOView& gids_, const bitset_t& present_)
505  : minGID(minGID_), gids(gids_), present(present_)
506  {}
507 
508  KOKKOS_INLINE_FUNCTION void operator()(const GO i) const
509  {
510  present.set(gids(i) - minGID);
511  }
512 
513  GO minGID;
514  GOView gids;
515  bitset_t present;
516 };
517 
518 template<typename LO, typename GO, typename device_t, typename LocalMapType, typename const_bitset_t, bool doingRemotes>
519 struct ListGIDs
520 {
521  using mem_space = typename device_t::memory_space;
522  using GOView = Kokkos::View<GO*, mem_space>;
523  using SingleView = Kokkos::View<GO, mem_space>;
524 
525  ListGIDs(GO minGID_, GOView& gidList_, SingleView& numElems_, const_bitset_t& present_, const LocalMapType& localDomainMap_)
526  : minGID(minGID_), gidList(gidList_), numElems(numElems_), present(present_), localDomainMap(localDomainMap_)
527  {}
528 
529  KOKKOS_INLINE_FUNCTION void operator()(const GO i, GO& lcount, const bool finalPass) const
530  {
531  bool isRemote = localDomainMap.getLocalElement(i + minGID) == ::Tpetra::Details::OrdinalTraits<LO>::invalid();
532  if(present.test(i) && doingRemotes == isRemote)
533  {
534  if(finalPass)
535  {
536  //lcount is the index where this GID should be inserted in gidList.
537  gidList(lcount) = minGID + i;
538  }
539  lcount++;
540  }
541  if((i == static_cast<GO>(present.size() - 1)) && finalPass)
542  {
543  //Set the number of inserted indices in a single-element view
544  numElems() = lcount;
545  }
546  }
547 
548  GO minGID;
549  GOView gidList;
550  SingleView numElems;
551  const_bitset_t present;
552  const LocalMapType localDomainMap;
553 };
554 
555 template<typename GO, typename mem_space>
556 struct MinMaxReduceFunctor
557 {
558  using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
559  using GOView = Kokkos::View<GO*, mem_space>;
560 
561  MinMaxReduceFunctor(const GOView& gids_)
562  : gids(gids_)
563  {}
564 
565  KOKKOS_INLINE_FUNCTION void operator()(const GO i, MinMaxValue& lminmax) const
566  {
567  GO gid = gids(i);
568  if(gid < lminmax.min_val)
569  lminmax.min_val = gid;
570  if(gid > lminmax.max_val)
571  lminmax.max_val = gid;
572  };
573 
574  const GOView gids;
575 };
576 
577 template <class LO, class GO, class NT>
578 int
579 makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
580  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
581  Kokkos::View<GO*, typename NT::memory_space> gids,
582  std::ostream* errStrm)
583 {
584  using Teuchos::RCP;
585  using Teuchos::Array;
586  using Kokkos::RangePolicy;
587  using device_t = typename NT::device_type;
588  using exec_space = typename device_t::execution_space;
589  using memory_space = typename device_t::memory_space;
590  // Note BMK 5-2021: this is deliberately not just device_t.
591  // Bitset cannot use HIPHostPinnedSpace currently, so this needs to
592  // use the default memory space for HIP (HIPSpace). Using the default mem
593  // space is fine for all other backends too. This bitset type is only used
594  // in this function so it won't cause type mismatches.
595  using bitset_t = Kokkos::Bitset<typename exec_space::memory_space>;
596  using const_bitset_t = Kokkos::ConstBitset<typename exec_space::memory_space>;
597  using GOView = Kokkos::View<GO*, memory_space>;
598  using SingleView = Kokkos::View<GO, memory_space>;
599  using map_type = Tpetra::Map<LO, GO, NT>;
600  using LocalMap = typename map_type::local_map_type;
601  GO nentries = gids.extent(0);
602  GO minGID = Teuchos::OrdinalTraits<GO>::max();
603  GO maxGID = 0;
604  using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
605  MinMaxValue minMaxGID = {minGID, maxGID};
606  Kokkos::parallel_reduce(RangePolicy<exec_space>(0, nentries),
607  MinMaxReduceFunctor<GO, memory_space>(gids),
608  Kokkos::MinMax<GO>(minMaxGID));
609  minGID = minMaxGID.min_val; maxGID = minMaxGID.max_val;
610  //Now, know the full range of input GIDs.
611  //Determine the set of GIDs in the column map using a dense bitset, which corresponds to the range [minGID, maxGID]
612  bitset_t presentGIDs(maxGID - minGID + 1);
613  Kokkos::parallel_for(RangePolicy<exec_space>(0, nentries), GatherPresentEntries<GOView, bitset_t>(minGID, gids, presentGIDs));
614  const_bitset_t constPresentGIDs(presentGIDs);
615  //Get the set of local and remote GIDs on device
616  SingleView numLocals("Num local GIDs");
617  SingleView numRemotes("Num remote GIDs");
618  GOView localGIDView(Kokkos::ViewAllocateWithoutInitializing("Local GIDs"), constPresentGIDs.count());
619  GOView remoteGIDView(Kokkos::ViewAllocateWithoutInitializing("Remote GIDs"), constPresentGIDs.count());
620  LocalMap localDomMap = domMap->getLocalMap();
621  //This lists the locally owned GIDs in localGIDView
622  Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
623  ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, false>
624  (minGID, localGIDView, numLocals, constPresentGIDs, localDomMap));
625  //And this lists the remote GIDs in remoteGIDView
626  Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
627  ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, true>
628  (minGID, remoteGIDView, numRemotes, constPresentGIDs, localDomMap));
629  //Pull down the sizes
630  GO numLocalColGIDs = 0;
631  GO numRemoteColGIDs = 0;
632  Kokkos::deep_copy(numLocalColGIDs, numLocals);
633  Kokkos::deep_copy(numRemoteColGIDs, numRemotes);
634  //Pull down the remote lists
635  auto localsHost = Kokkos::create_mirror_view(localGIDView);
636  auto remotesHost = Kokkos::create_mirror_view(remoteGIDView);
637  Kokkos::deep_copy(localsHost, localGIDView);
638  Kokkos::deep_copy(remotesHost, remoteGIDView);
639  //Finally, populate the STL structures which hold the index lists
640  std::set<GO> RemoteGIDSet;
641  std::vector<GO> RemoteGIDUnorderedVector;
642  std::vector<bool> GIDisLocal (domMap->getNodeNumElements (), false);
643  for(GO i = 0; i < numLocalColGIDs; i++)
644  {
645  GO gid = localsHost(i);
646  //Already know that gid is locally owned, so this index will never be invalid().
647  //makeColMapImpl uses this and the domain map to get the the local GID list.
648  GIDisLocal[domMap->getLocalElement(gid)] = true;
649  }
650  RemoteGIDUnorderedVector.reserve(numRemoteColGIDs);
651  for(GO i = 0; i < numRemoteColGIDs; i++)
652  {
653  GO gid = remotesHost(i);
654  RemoteGIDSet.insert(gid);
655  RemoteGIDUnorderedVector.push_back(gid);
656  }
657  //remotePIDs will be discarded in this version of makeColMap
658  Array<int> remotePIDs;
659  //Find the min and max GID
660  return makeColMapImpl<LO, GO, NT>(
661  colMap,
662  remotePIDs,
663  domMap,
664  static_cast<size_t>(numLocalColGIDs),
665  static_cast<size_t>(numRemoteColGIDs),
666  RemoteGIDSet,
667  RemoteGIDUnorderedVector,
668  GIDisLocal,
669  true, //always sort remotes
670  errStrm);
671 }
672 
673 } // namespace Details
674 } // namespace Tpetra
675 
676 //
677 // Explicit instantiation macros
678 //
679 // Must be expanded from within the Tpetra namespace!
680 //
681 #define TPETRA_DETAILS_MAKECOLMAP_INSTANT(LO,GO,NT) \
682  namespace Details { \
683  template int \
684  makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
685  Teuchos::Array<int>&, \
686  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
687  const RowGraph<LO, GO, NT>&, \
688  const bool, \
689  std::ostream*); \
690  template int \
691  makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
692  const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
693  Kokkos::View<GO*, typename NT::memory_space>, \
694  std::ostream*); \
695  }
696 
697 #endif // TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
virtual bool hasColMap() const =0
Whether the graph has a well-defined column Map.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
An abstract interface for graphs accessed by rows.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes this graph&#39;s distribution of columns over processes.
"Local" part of Map suitable for Kokkos kernels.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
Implementation details of Tpetra.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes this graph&#39;s distribution of rows over processes.
virtual bool isGloballyIndexed() const =0
If graph indices are in the global range, this function returns true. Otherwise, this function return...
virtual void getGlobalRowView(const GlobalOrdinal gblRow, global_inds_host_view_type &gblColInds) const =0
Get a const, non-persisting view of the given global row&#39;s global column indices, as a Teuchos::Array...
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
Stand-alone utility functions and macros.
virtual bool isLocallyIndexed() const =0
If graph indices are in the local range, this function returns true. Otherwise, this function returns...
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.