Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_CRSGRAPH_DEF_HPP
41 #define TPETRA_CRSGRAPH_DEF_HPP
42 
50 
55 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
56 #include "Tpetra_Details_makeColMap.hpp"
60 #include "Tpetra_Distributor.hpp"
61 #include "Teuchos_SerialDenseMatrix.hpp"
62 #include "Tpetra_Vector.hpp"
63 #include "Tpetra_Import_Util.hpp"
64 #include "Tpetra_Import_Util2.hpp"
65 #include "Tpetra_Details_packCrsGraph.hpp"
66 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
67 #include "Tpetra_Details_CrsPadding.hpp"
68 #include "Tpetra_Util.hpp"
69 #include <algorithm>
70 #include <limits>
71 #include <map>
72 #include <sstream>
73 #include <string>
74 #include <type_traits>
75 #include <utility>
76 #include <vector>
77 
78 namespace Tpetra {
79  namespace Details {
80  namespace Impl {
81 
82  template<class MapIter>
83  void
84  verbosePrintMap(std::ostream& out,
85  MapIter beg,
86  MapIter end,
87  const size_t numEnt,
88  const char mapName[])
89  {
90  using ::Tpetra::Details::Behavior;
92 
93  out << mapName << ": {";
94  const size_t maxNumToPrint =
96  if (maxNumToPrint == 0) {
97  if (numEnt != 0) {
98  out << "...";
99  }
100  }
101  else {
102  const size_t numToPrint = numEnt > maxNumToPrint ?
103  maxNumToPrint : numEnt;
104  size_t count = 0;
105  for (MapIter it = beg; it != end; ++it) {
106  out << "(" << (*it).first << ", ";
107  verbosePrintArray(out, (*it).second, "gblColInds",
108  maxNumToPrint);
109  out << ")";
110  if (count + size_t(1) < numToPrint) {
111  out << ", ";
112  }
113  ++count;
114  }
115  if (count < numEnt) {
116  out << ", ...";
117  }
118  }
119  out << "}";
120  }
121 
122  template<class LO, class GO, class Node>
123  Teuchos::ArrayView<GO>
124  getRowGraphGlobalRow(
125  std::vector<GO>& gblColIndsStorage,
126  const RowGraph<LO, GO, Node>& graph,
127  const GO gblRowInd)
128  {
129  size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
130  if (gblColIndsStorage.size() < origNumEnt) {
131  gblColIndsStorage.resize(origNumEnt);
132  }
133  Teuchos::ArrayView<GO> gblColInds(gblColIndsStorage.data(),
134  origNumEnt);
135  graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
136  return gblColInds;
137  }
138 
139  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
140  class ConvertColumnIndicesFromGlobalToLocal {
141  public:
142  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
143  const ::Kokkos::View<const GO*, DT>& gblColInds,
144  const ::Kokkos::View<const OffsetType*, DT>& ptr,
145  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
146  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
147  lclColInds_ (lclColInds),
148  gblColInds_ (gblColInds),
149  ptr_ (ptr),
150  lclColMap_ (lclColMap),
151  numRowEnt_ (numRowEnt)
152  {}
153 
154  KOKKOS_FUNCTION void
155  operator () (const LO& lclRow, OffsetType& curNumBad) const
156  {
157  const OffsetType offset = ptr_(lclRow);
158  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
159  // of entries in a row to LO, as long as the row doesn't have
160  // too many duplicate entries.
161  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
162  for (LO j = 0; j < numEnt; ++j) {
163  const GO gid = gblColInds_(offset + j);
164  const LO lid = lclColMap_.getLocalElement (gid);
165  lclColInds_(offset + j) = lid;
166  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
167  ++curNumBad;
168  }
169  }
170  }
171 
172  static OffsetType
173  run (const ::Kokkos::View<LO*, DT>& lclColInds,
174  const ::Kokkos::View<const GO*, DT>& gblColInds,
175  const ::Kokkos::View<const OffsetType*, DT>& ptr,
176  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
177  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
178  {
179  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
180  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
181 
182  const LO lclNumRows = ptr.extent (0) == 0 ?
183  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
184  OffsetType numBad = 0;
185  // Count of "bad" column indices is a reduction over rows.
186  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
187  functor_type (lclColInds, gblColInds, ptr,
188  lclColMap, numRowEnt),
189  numBad);
190  return numBad;
191  }
192 
193  private:
194  ::Kokkos::View<LO*, DT> lclColInds_;
195  ::Kokkos::View<const GO*, DT> gblColInds_;
196  ::Kokkos::View<const OffsetType*, DT> ptr_;
198  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
199  };
200 
201  } // namespace Impl
202 
217  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
218  OffsetType
219  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
220  const Kokkos::View<const GO*, DT>& gblColInds,
221  const Kokkos::View<const OffsetType*, DT>& ptr,
222  const LocalMap<LO, GO, DT>& lclColMap,
223  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
224  {
225  using Impl::ConvertColumnIndicesFromGlobalToLocal;
226  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
227  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
228  }
229 
230  template<class ViewType, class LO>
231  class MaxDifference {
232  public:
233  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
234 
235  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
236  dst = 0;
237  }
238 
239  KOKKOS_INLINE_FUNCTION void
240  join (volatile LO& dst, const volatile LO& src) const
241  {
242  dst = (src > dst) ? src : dst;
243  }
244 
245  KOKKOS_INLINE_FUNCTION void
246  operator () (const LO lclRow, LO& maxNumEnt) const
247  {
248  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
249  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
250  }
251  private:
252  typename ViewType::const_type ptr_;
253  };
254 
255  template<class ViewType, class LO>
256  typename ViewType::non_const_value_type
257  maxDifference (const char kernelLabel[],
258  const ViewType& ptr,
259  const LO lclNumRows)
260  {
261  if (lclNumRows == 0) {
262  // mfh 07 May 2018: Weirdly, I need this special case,
263  // otherwise I get the wrong answer.
264  return static_cast<LO> (0);
265  }
266  else {
267  using execution_space = typename ViewType::execution_space;
268  using range_type = Kokkos::RangePolicy<execution_space, LO>;
269  LO theMaxNumEnt {0};
270  Kokkos::parallel_reduce (kernelLabel,
271  range_type (0, lclNumRows),
272  MaxDifference<ViewType, LO> (ptr),
273  theMaxNumEnt);
274  return theMaxNumEnt;
275  }
276  }
277 
278  } // namespace Details
279 
280  template <class LocalOrdinal, class GlobalOrdinal, class Node>
281  bool
282  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
283  getDebug() {
284  return Details::Behavior::debug("CrsGraph");
285  }
286 
287  template <class LocalOrdinal, class GlobalOrdinal, class Node>
288  bool
289  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
290  getVerbose() {
291  return Details::Behavior::verbose("CrsGraph");
292  }
293 
294  template <class LocalOrdinal, class GlobalOrdinal, class Node>
295  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
296  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
297  const size_t maxNumEntriesPerRow,
298  const ProfileType /* pftype */,
299  const Teuchos::RCP<Teuchos::ParameterList>& params) :
300  dist_object_type (rowMap)
301  , rowMap_ (rowMap)
302  , numAllocForAllRows_ (maxNumEntriesPerRow)
303  {
304  const char tfecfFuncName[] =
305  "CrsGraph(rowMap,maxNumEntriesPerRow,pftype,params): ";
306  staticAssertions ();
307  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
308  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
309  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
310  "a valid size_t value, which in this case means it must not be "
311  "Teuchos::OrdinalTraits<size_t>::invalid().");
312  resumeFill (params);
313  checkInternalState ();
314  }
315 
316  template <class LocalOrdinal, class GlobalOrdinal, class Node>
318  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
319  const Teuchos::RCP<const map_type>& colMap,
320  const size_t maxNumEntriesPerRow,
321  const ProfileType /* pftype */,
322  const Teuchos::RCP<Teuchos::ParameterList>& params) :
323  dist_object_type (rowMap)
324  , rowMap_ (rowMap)
325  , colMap_ (colMap)
326  , numAllocForAllRows_ (maxNumEntriesPerRow)
327  {
328  const char tfecfFuncName[] =
329  "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,pftype,params): ";
330  staticAssertions ();
331  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
332  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
333  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
334  "a valid size_t value, which in this case means it must not be "
335  "Teuchos::OrdinalTraits<size_t>::invalid().");
336  resumeFill (params);
337  checkInternalState ();
338  }
339 
340  template <class LocalOrdinal, class GlobalOrdinal, class Node>
342  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
343  const Teuchos::ArrayView<const size_t>& numEntPerRow,
344  const ProfileType /* pftype */,
345  const Teuchos::RCP<Teuchos::ParameterList>& params) :
346  dist_object_type (rowMap)
347  , rowMap_ (rowMap)
348  , numAllocForAllRows_ (0)
349  {
350  const char tfecfFuncName[] =
351  "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
352  staticAssertions ();
353 
354  const size_t lclNumRows = rowMap.is_null () ?
355  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
356  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
357  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
358  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
359  << " != the local number of rows " << lclNumRows << " as specified by "
360  "the input row Map.");
361 
362  if (debug_) {
363  for (size_t r = 0; r < lclNumRows; ++r) {
364  const size_t curRowCount = numEntPerRow[r];
365  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
366  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
367  std::invalid_argument, "numEntPerRow(" << r << ") "
368  "specifies an invalid number of entries "
369  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
370  }
371  }
372 
373  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
374  // The latter is a const View, so we have to copy into a nonconst
375  // View first, then assign.
376  typedef decltype (k_numAllocPerRow_) out_view_type;
377  typedef typename out_view_type::non_const_type nc_view_type;
378  typedef Kokkos::View<const size_t*,
379  typename nc_view_type::array_layout,
380  Kokkos::HostSpace,
381  Kokkos::MemoryUnmanaged> in_view_type;
382  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
383  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
384  lclNumRows);
385  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
386  k_numAllocPerRow_ = numAllocPerRowOut;
387 
388  resumeFill (params);
389  checkInternalState ();
390  }
391 
392 
393 
394  template <class LocalOrdinal, class GlobalOrdinal, class Node>
396  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
397  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
398  const ProfileType /* pftype */,
399  const Teuchos::RCP<Teuchos::ParameterList>& params) :
400  dist_object_type (rowMap)
401  , rowMap_ (rowMap)
402  , k_numAllocPerRow_ (numEntPerRow.h_view)
403  , numAllocForAllRows_ (0)
404  {
405  const char tfecfFuncName[] =
406  "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
407  staticAssertions ();
408 
409  const size_t lclNumRows = rowMap.is_null () ?
410  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
411  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
412  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
413  std::invalid_argument, "numEntPerRow has length " <<
414  numEntPerRow.extent (0) << " != the local number of rows " <<
415  lclNumRows << " as specified by " "the input row Map.");
416 
417  if (debug_) {
418  for (size_t r = 0; r < lclNumRows; ++r) {
419  const size_t curRowCount = numEntPerRow.h_view(r);
420  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
421  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
422  std::invalid_argument, "numEntPerRow(" << r << ") "
423  "specifies an invalid number of entries "
424  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
425  }
426  }
427 
428  resumeFill (params);
429  checkInternalState ();
430  }
431 
432 
433  template <class LocalOrdinal, class GlobalOrdinal, class Node>
435  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
436  const Teuchos::RCP<const map_type>& colMap,
437  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
438  const ProfileType /* pftype */,
439  const Teuchos::RCP<Teuchos::ParameterList>& params) :
440  dist_object_type (rowMap)
441  , rowMap_ (rowMap)
442  , colMap_ (colMap)
443  , k_numAllocPerRow_ (numEntPerRow.h_view)
444  , numAllocForAllRows_ (0)
445  {
446  const char tfecfFuncName[] =
447  "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
448  staticAssertions ();
449 
450  const size_t lclNumRows = rowMap.is_null () ?
451  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
452  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
453  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
454  std::invalid_argument, "numEntPerRow has length " <<
455  numEntPerRow.extent (0) << " != the local number of rows " <<
456  lclNumRows << " as specified by " "the input row Map.");
457 
458  if (debug_) {
459  for (size_t r = 0; r < lclNumRows; ++r) {
460  const size_t curRowCount = numEntPerRow.h_view(r);
461  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
462  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
463  std::invalid_argument, "numEntPerRow(" << r << ") "
464  "specifies an invalid number of entries "
465  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
466  }
467  }
468 
469  resumeFill (params);
470  checkInternalState ();
471  }
472 
473 
474  template <class LocalOrdinal, class GlobalOrdinal, class Node>
476  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
477  const Teuchos::RCP<const map_type>& colMap,
478  const Teuchos::ArrayView<const size_t>& numEntPerRow,
479  const ProfileType /* pftype */,
480  const Teuchos::RCP<Teuchos::ParameterList>& params) :
481  dist_object_type (rowMap)
482  , rowMap_ (rowMap)
483  , colMap_ (colMap)
484  , numAllocForAllRows_ (0)
485  {
486  const char tfecfFuncName[] =
487  "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
488  staticAssertions ();
489 
490  const size_t lclNumRows = rowMap.is_null () ?
491  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
492  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
493  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
494  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
495  << " != the local number of rows " << lclNumRows << " as specified by "
496  "the input row Map.");
497 
498  if (debug_) {
499  for (size_t r = 0; r < lclNumRows; ++r) {
500  const size_t curRowCount = numEntPerRow[r];
501  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
502  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
503  std::invalid_argument, "numEntPerRow(" << r << ") "
504  "specifies an invalid number of entries "
505  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
506  }
507  }
508 
509  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
510  // The latter is a const View, so we have to copy into a nonconst
511  // View first, then assign.
512  typedef decltype (k_numAllocPerRow_) out_view_type;
513  typedef typename out_view_type::non_const_type nc_view_type;
514  typedef Kokkos::View<const size_t*,
515  typename nc_view_type::array_layout,
516  Kokkos::HostSpace,
517  Kokkos::MemoryUnmanaged> in_view_type;
518  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
519  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
520  lclNumRows);
521  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
522  k_numAllocPerRow_ = numAllocPerRowOut;
523 
524  resumeFill (params);
525  checkInternalState ();
526  }
527 
528 
529  template <class LocalOrdinal, class GlobalOrdinal, class Node>
531  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
532  const Teuchos::RCP<const map_type>& colMap,
533  const typename local_graph_type::row_map_type& rowPointers,
534  const typename local_graph_type::entries_type::non_const_type& columnIndices,
535  const Teuchos::RCP<Teuchos::ParameterList>& params) :
536  dist_object_type (rowMap)
537  , rowMap_(rowMap)
538  , colMap_(colMap)
539  , numAllocForAllRows_(0)
540  , storageStatus_(Details::STORAGE_1D_PACKED)
541  , indicesAreAllocated_(true)
542  , indicesAreLocal_(true)
543  {
544  staticAssertions ();
545  if (! params.is_null() && params->isParameter("sorted") &&
546  ! params->get<bool>("sorted")) {
547  indicesAreSorted_ = false;
548  }
549  else {
550  indicesAreSorted_ = true;
551  }
552  setAllIndices (rowPointers, columnIndices);
553  checkInternalState ();
554  }
555 
556  template <class LocalOrdinal, class GlobalOrdinal, class Node>
558  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
559  const Teuchos::RCP<const map_type>& colMap,
560  const Teuchos::ArrayRCP<size_t>& rowPointers,
561  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
562  const Teuchos::RCP<Teuchos::ParameterList>& params) :
563  dist_object_type (rowMap)
564  , rowMap_ (rowMap)
565  , colMap_ (colMap)
566  , numAllocForAllRows_ (0)
567  , storageStatus_ (Details::STORAGE_1D_PACKED)
568  , indicesAreAllocated_ (true)
569  , indicesAreLocal_ (true)
570  {
571  staticAssertions ();
572  if (! params.is_null() && params->isParameter("sorted") &&
573  ! params->get<bool>("sorted")) {
574  indicesAreSorted_ = false;
575  }
576  else {
577  indicesAreSorted_ = true;
578  }
579  setAllIndices (rowPointers, columnIndices);
580  checkInternalState ();
581  }
582 
583  template <class LocalOrdinal, class GlobalOrdinal, class Node>
585  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
586  const Teuchos::RCP<const map_type>& colMap,
587  const local_graph_type& k_local_graph_,
588  const Teuchos::RCP<Teuchos::ParameterList>& params)
589  : CrsGraph (k_local_graph_,
590  rowMap,
591  colMap,
592  Teuchos::null,
593  Teuchos::null,
594  params)
595  {}
596 
597  template <class LocalOrdinal, class GlobalOrdinal, class Node>
599  CrsGraph (const local_graph_type& k_local_graph_,
600  const Teuchos::RCP<const map_type>& rowMap,
601  const Teuchos::RCP<const map_type>& colMap,
602  const Teuchos::RCP<const map_type>& domainMap,
603  const Teuchos::RCP<const map_type>& rangeMap,
604  const Teuchos::RCP<Teuchos::ParameterList>& params)
605  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
606  , rowMap_ (rowMap)
607  , colMap_ (colMap)
608  , lclGraph_ (k_local_graph_)
609  , numAllocForAllRows_ (0)
610  , storageStatus_ (Details::STORAGE_1D_PACKED)
611  , indicesAreAllocated_ (true)
612  , indicesAreLocal_ (true)
613  {
614  staticAssertions();
615  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
616 
617  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
618  colMap.is_null (), std::runtime_error,
619  ": The input column Map must be nonnull.");
620  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
621  k_local_graph_.numRows () != rowMap->getNodeNumElements (),
622  std::runtime_error,
623  ": The input row Map and the input local graph need to have the same "
624  "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
625  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
626  << " row(s).");
627 
628  // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
629  // rowMap_->getNodeNumElements(), but it doesn't have to.
630  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
631  // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
632  // ": The input row Map and the input local graph need to have the same "
633  // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
634  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
636  k_lclInds1D_.extent (0) != 0 || k_gblInds1D_.extent (0) != 0, std::logic_error,
637  ": cannot have 1D data structures allocated.");
638 
639  if(! params.is_null() && params->isParameter("sorted") &&
640  ! params->get<bool>("sorted")) {
641  indicesAreSorted_ = false;
642  }
643  else {
644  indicesAreSorted_ = true;
645  }
646 
647  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
648  rangeMap .is_null() ? rowMap_ : rangeMap);
649  Teuchos::Array<int> remotePIDs (0); // unused output argument
650  this->makeImportExport (remotePIDs, false);
651 
652  k_lclInds1D_ = lclGraph_.entries;
653  k_rowPtrs_ = lclGraph_.row_map;
654 
655  const bool callComputeGlobalConstants = params.get () == nullptr ||
656  params->get ("compute global constants", true);
657 
658  if (callComputeGlobalConstants) {
659  this->computeGlobalConstants ();
660  }
661  this->fillComplete_ = true;
662  this->checkInternalState ();
663  }
664 
665  template <class LocalOrdinal, class GlobalOrdinal, class Node>
667  CrsGraph (const local_graph_type& lclGraph,
668  const Teuchos::RCP<const map_type>& rowMap,
669  const Teuchos::RCP<const map_type>& colMap,
670  const Teuchos::RCP<const map_type>& domainMap,
671  const Teuchos::RCP<const map_type>& rangeMap,
672  const Teuchos::RCP<const import_type>& importer,
673  const Teuchos::RCP<const export_type>& exporter,
674  const Teuchos::RCP<Teuchos::ParameterList>& params) :
675  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
676  rowMap_ (rowMap),
677  colMap_ (colMap),
678  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
679  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
680  importer_ (importer),
681  exporter_ (exporter),
682  lclGraph_ (lclGraph),
683  numAllocForAllRows_ (0),
684  storageStatus_ (Details::STORAGE_1D_PACKED),
685  indicesAreAllocated_ (true),
686  indicesAreLocal_ (true)
687  {
688  staticAssertions();
689  const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_type,"
690  "Map,Map,Map,Map,Import,Export,params): ";
691 
692  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
693  (colMap.is_null (), std::runtime_error,
694  "The input column Map must be nonnull.");
695 
696  k_lclInds1D_ = lclGraph_.entries;
697  k_rowPtrs_ = lclGraph_.row_map;
698 
699  if (! params.is_null() && params->isParameter("sorted") &&
700  ! params->get<bool>("sorted")) {
701  indicesAreSorted_ = false;
702  }
703  else {
704  indicesAreSorted_ = true;
705  }
706 
707  const bool callComputeGlobalConstants =
708  params.get () == nullptr ||
709  params->get ("compute global constants", true);
710  if (callComputeGlobalConstants) {
711  this->computeGlobalConstants ();
712  }
713  fillComplete_ = true;
714  checkInternalState ();
715  }
716 
717  template <class LocalOrdinal, class GlobalOrdinal, class Node>
718  Teuchos::RCP<const Teuchos::ParameterList>
721  {
722  using Teuchos::RCP;
723  using Teuchos::ParameterList;
724  using Teuchos::parameterList;
725 
726  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
727 
728  // Make a sublist for the Import.
729  RCP<ParameterList> importSublist = parameterList ("Import");
730 
731  // FIXME (mfh 02 Apr 2012) We should really have the Import and
732  // Export objects fill in these lists. However, we don't want to
733  // create an Import or Export unless we need them. For now, we
734  // know that the Import and Export just pass the list directly to
735  // their Distributor, so we can create a Distributor here
736  // (Distributor's constructor is a lightweight operation) and have
737  // it fill in the list.
738 
739  // Fill in Distributor default parameters by creating a
740  // Distributor and asking it to do the work.
741  Distributor distributor (rowMap_->getComm (), importSublist);
742  params->set ("Import", *importSublist, "How the Import performs communication.");
743 
744  // Make a sublist for the Export. For now, it's a clone of the
745  // Import sublist. It's not a shallow copy, though, since we
746  // might like the Import to do communication differently than the
747  // Export.
748  params->set ("Export", *importSublist, "How the Export performs communication.");
749 
750  return params;
751  }
752 
753  template <class LocalOrdinal, class GlobalOrdinal, class Node>
754  void
756  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
757  {
758  Teuchos::RCP<const Teuchos::ParameterList> validParams =
759  getValidParameters ();
760  params->validateParametersAndSetDefaults (*validParams);
761  this->setMyParamList (params);
762  }
763 
764  template <class LocalOrdinal, class GlobalOrdinal, class Node>
768  {
769  return rowMap_->getGlobalNumElements ();
770  }
771 
772  template <class LocalOrdinal, class GlobalOrdinal, class Node>
776  {
777  const char tfecfFuncName[] = "getGlobalNumCols: ";
778  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
779  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
780  "The graph does not have a domain Map. You may not call this method in "
781  "that case.");
782  return getDomainMap ()->getGlobalNumElements ();
783  }
784 
785  template <class LocalOrdinal, class GlobalOrdinal, class Node>
786  size_t
789  {
790  return this->rowMap_.is_null () ?
791  static_cast<size_t> (0) :
792  this->rowMap_->getNodeNumElements ();
793  }
794 
795  template <class LocalOrdinal, class GlobalOrdinal, class Node>
796  size_t
799  {
800  const char tfecfFuncName[] = "getNodeNumCols: ";
801  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
802  ! hasColMap (), std::runtime_error,
803  "The graph does not have a column Map. You may not call this method "
804  "unless the graph has a column Map. This requires either that a custom "
805  "column Map was given to the constructor, or that fillComplete() has "
806  "been called.");
807  return colMap_.is_null () ? static_cast<size_t> (0) :
808  colMap_->getNodeNumElements ();
809  }
810 
811 
812 
813  template <class LocalOrdinal, class GlobalOrdinal, class Node>
814  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
816  getRowMap () const
817  {
818  return rowMap_;
819  }
820 
821  template <class LocalOrdinal, class GlobalOrdinal, class Node>
822  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
824  getColMap () const
825  {
826  return colMap_;
827  }
828 
829  template <class LocalOrdinal, class GlobalOrdinal, class Node>
830  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
833  {
834  return domainMap_;
835  }
836 
837  template <class LocalOrdinal, class GlobalOrdinal, class Node>
838  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
840  getRangeMap () const
841  {
842  return rangeMap_;
843  }
844 
845  template <class LocalOrdinal, class GlobalOrdinal, class Node>
846  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
848  getImporter () const
849  {
850  return importer_;
851  }
852 
853  template <class LocalOrdinal, class GlobalOrdinal, class Node>
854  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
856  getExporter () const
857  {
858  return exporter_;
859  }
860 
861  template <class LocalOrdinal, class GlobalOrdinal, class Node>
862  bool
864  hasColMap () const
865  {
866  return ! colMap_.is_null ();
867  }
868 
869  template <class LocalOrdinal, class GlobalOrdinal, class Node>
870  bool
873  {
874  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
875  // getNodeNumRows() is zero?
876 
877  const bool isOpt = indicesAreAllocated_ &&
878  k_numRowEntries_.extent (0) == 0 &&
879  getNodeNumRows () > 0;
880 
881  return isOpt;
882  }
883 
884  template <class LocalOrdinal, class GlobalOrdinal, class Node>
888  {
889  return StaticProfile;
890  }
891 
892  template <class LocalOrdinal, class GlobalOrdinal, class Node>
896  {
897  const char tfecfFuncName[] = "getGlobalNumEntries: ";
898  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
899  (! this->haveGlobalConstants_, std::logic_error,
900  "The graph does not have global constants computed, "
901  "but the user has requested them.");
902 
903  return globalNumEntries_;
904  }
905 
906  template <class LocalOrdinal, class GlobalOrdinal, class Node>
907  size_t
910  {
911  typedef LocalOrdinal LO;
912 
913  if (this->indicesAreAllocated_) {
914  const LO lclNumRows = this->getNodeNumRows ();
915  if (lclNumRows == 0) {
916  return static_cast<size_t> (0);
917  }
918  else {
919  // Avoid the "*this capture" issue by creating a local Kokkos::View.
920  auto numEntPerRow = this->k_numRowEntries_;
921  const LO numNumEntPerRow = numEntPerRow.extent (0);
922  if (numNumEntPerRow == 0) {
923  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
924  static_cast<LO> (lclNumRows + 1)) {
925  return static_cast<size_t> (0);
926  }
927  else {
928  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
929  }
930  }
931  else { // k_numRowEntries_ is populated
932  // k_numRowEntries_ is actually be a host View, so we run
933  // the sum in its native execution space. This also means
934  // that we can use explicit capture (which could perhaps
935  // improve build time) instead of KOKKOS_LAMBDA, and avoid
936  // any CUDA build issues with trying to run a __device__ -
937  // only function on host.
938  typedef typename num_row_entries_type::execution_space
939  host_exec_space;
940  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
941 
942  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
943  lclNumRows :
944  numNumEntPerRow;
945  size_t nodeNumEnt = 0;
946  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
947  range_type (0, upperLoopBound),
948  [=] (const LO& k, size_t& lclSum) {
949  lclSum += numEntPerRow(k);
950  }, nodeNumEnt);
951  return nodeNumEnt;
952  }
953  }
954  }
955  else { // nothing allocated on this process, so no entries
956  return static_cast<size_t> (0);
957  }
958  }
959 
960  template <class LocalOrdinal, class GlobalOrdinal, class Node>
964  {
965  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
966  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
967  (! this->haveGlobalConstants_, std::logic_error,
968  "The graph does not have global constants computed, "
969  "but the user has requested them.");
970 
971  return globalMaxNumRowEntries_;
972  }
973 
974  template <class LocalOrdinal, class GlobalOrdinal, class Node>
975  size_t
978  {
979  return nodeMaxNumRowEntries_;
980  }
981 
982  template <class LocalOrdinal, class GlobalOrdinal, class Node>
983  bool
986  {
987  return fillComplete_;
988  }
989 
990  template <class LocalOrdinal, class GlobalOrdinal, class Node>
991  bool
994  {
995  return ! fillComplete_;
996  }
997 
998 
999  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1000  bool
1003  {
1004  return indicesAreLocal_;
1005  }
1006 
1007  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1008  bool
1011  {
1012  return indicesAreGlobal_;
1013  }
1014 
1015  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1016  size_t
1019  {
1020  typedef LocalOrdinal LO;
1021 
1022  if (this->indicesAreAllocated_) {
1023  const LO lclNumRows = this->getNodeNumRows ();
1024  if (lclNumRows == 0) {
1025  return static_cast<size_t> (0);
1026  }
1027  else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1028  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
1029  static_cast<LO> (lclNumRows + 1)) {
1030  return static_cast<size_t> (0);
1031  }
1032  else {
1033  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1034  }
1035  }
1036  else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1037  if (this->k_rowPtrs_.extent (0) == 0) {
1038  return static_cast<size_t> (0);
1039  }
1040  else {
1041  return ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, lclNumRows);
1042  }
1043  }
1044  else {
1045  return static_cast<size_t> (0);
1046  }
1047  }
1048  else {
1049  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1050  }
1051  }
1052 
1053  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1054  Teuchos::RCP<const Teuchos::Comm<int> >
1056  getComm () const
1057  {
1058  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1059  }
1060 
1061  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1062  GlobalOrdinal
1065  {
1066  return rowMap_->getIndexBase ();
1067  }
1068 
1069  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1070  bool
1072  indicesAreAllocated () const
1073  {
1074  return indicesAreAllocated_;
1075  }
1076 
1077  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1078  bool
1079  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1080  isSorted () const
1081  {
1082  return indicesAreSorted_;
1083  }
1084 
1085  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1086  bool
1088  isMerged () const
1089  {
1090  return noRedundancies_;
1091  }
1092 
1093  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1094  void
1097  {
1098  // FIXME (mfh 07 May 2013) How do we know that the change
1099  // introduced a redundancy, or even that it invalidated the sorted
1100  // order of indices? CrsGraph has always made this conservative
1101  // guess. It could be a bit costly to check at insertion time,
1102  // though.
1103  indicesAreSorted_ = false;
1104  noRedundancies_ = false;
1105 
1106  // We've modified the graph, so we'll have to recompute local
1107  // constants like the number of diagonal entries on this process.
1108  haveLocalConstants_ = false;
1109  }
1110 
1111  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1112  void
1114  allocateIndices (const ELocalGlobal lg, const bool verbose)
1115  {
1117  using Teuchos::arcp;
1118  using Teuchos::Array;
1119  using Teuchos::ArrayRCP;
1120  using std::endl;
1121  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1122  typedef typename local_graph_type::row_map_type::non_const_type
1123  non_const_row_map_type;
1124  typedef typename local_graph_type::entries_type::non_const_type
1125  lcl_col_inds_type;
1126  typedef Kokkos::View<GlobalOrdinal*,
1127  typename lcl_col_inds_type::array_layout,
1128  device_type> gbl_col_inds_type;
1129  const char tfecfFuncName[] = "allocateIndices: ";
1130  const char suffix[] =
1131  " Please report this bug to the Tpetra developers.";
1132  ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1133 
1134  std::unique_ptr<std::string> prefix;
1135  if (verbose) {
1136  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1137  std::ostringstream os;
1138  os << *prefix << "Start: lg="
1139  << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1140  << ", numRows: " << this->getNodeNumRows() << endl;
1141  std::cerr << os.str();
1142  }
1143 
1144  // This is a protected function, only callable by us. If it was
1145  // called incorrectly, it is our fault. That's why the tests
1146  // below throw std::logic_error instead of std::invalid_argument.
1147  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1148  (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1149  ": The graph is locally indexed, but Tpetra code is calling "
1150  "this method with lg=GlobalIndices." << suffix);
1151  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1152  (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1153  ": The graph is globally indexed, but Tpetra code is calling "
1154  "this method with lg=LocalIndices." << suffix);
1155  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1156  (indicesAreAllocated (), std::logic_error, ": The graph's "
1157  "indices are already allocated, but Tpetra is calling "
1158  "allocateIndices again." << suffix);
1159  const size_t numRows = this->getNodeNumRows ();
1160 
1161  //
1162  // STATIC ALLOCATION PROFILE
1163  //
1164  if (verbose) {
1165  std::ostringstream os;
1166  os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1167  std::cerr << os.str();
1168  }
1169  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1170 
1171  if (this->k_numAllocPerRow_.extent (0) != 0) {
1172  // It's OK to throw std::invalid_argument here, because we
1173  // haven't incurred any side effects yet. Throwing that
1174  // exception (and not, say, std::logic_error) implies that the
1175  // instance can recover.
1176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1177  (this->k_numAllocPerRow_.extent (0) != numRows,
1178  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1179  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1180  << ", but its length != numRows = " << numRows << ".");
1181 
1182  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1183  // we want to compute here) lives on device. That's OK;
1184  // computeOffsetsFromCounts can handle this case.
1186 
1187  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1188  // doesn't attempt to check its input for "invalid" flag
1189  // values. For now, we omit that feature of the sequential
1190  // code disabled below.
1191  computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1192  }
1193  else {
1194  // It's OK to throw std::invalid_argument here, because we
1195  // haven't incurred any side effects yet. Throwing that
1196  // exception (and not, say, std::logic_error) implies that the
1197  // instance can recover.
1198  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1199  (this->numAllocForAllRows_ ==
1200  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1201  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1202  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1203  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1204 
1206  computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1207  }
1208 
1209  // "Commit" the resulting row offsets.
1210  this->k_rowPtrs_ = k_rowPtrs;
1211 
1212  const size_type numInds =
1213  Details::getEntryOnHost(this->k_rowPtrs_, numRows);
1214  if (lg == LocalIndices) {
1215  if (verbose) {
1216  std::ostringstream os;
1217  os << *prefix << "Allocate local column indices "
1218  "k_lclInds1D_: " << numInds << endl;
1219  std::cerr << os.str();
1220  }
1221  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1222  }
1223  else {
1224  if (verbose) {
1225  std::ostringstream os;
1226  os << *prefix << "Allocate global column indices "
1227  "k_gblInds1D_: " << numInds << endl;
1228  std::cerr << os.str();
1229  }
1230  k_gblInds1D_ = gbl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1231  }
1232  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1233 
1234  this->indicesAreLocal_ = (lg == LocalIndices);
1235  this->indicesAreGlobal_ = (lg == GlobalIndices);
1236 
1237  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1238  using Kokkos::ViewAllocateWithoutInitializing;
1239  typedef decltype (k_numRowEntries_) row_ent_type;
1240  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1241  if (verbose) {
1242  std::ostringstream os;
1243  os << *prefix << "Allocate k_numRowEntries_: " << numRows
1244  << endl;
1245  std::cerr << os.str();
1246  }
1247  row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1248  Kokkos::deep_copy (numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1249  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1250  }
1251 
1252  // Once indices are allocated, CrsGraph needs to free this information.
1253  this->numAllocForAllRows_ = 0;
1254  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1255  this->indicesAreAllocated_ = true;
1256 
1257  try {
1258  this->checkInternalState ();
1259  }
1260  catch (std::logic_error& e) {
1261  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1262  (true, std::logic_error, "At end of allocateIndices, "
1263  "checkInternalState threw std::logic_error: "
1264  << e.what ());
1265  }
1266  catch (std::exception& e) {
1267  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1268  (true, std::runtime_error, "At end of allocateIndices, "
1269  "checkInternalState threw std::exception: "
1270  << e.what ());
1271  }
1272  catch (...) {
1273  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1274  (true, std::runtime_error, "At end of allocateIndices, "
1275  "checkInternalState threw an exception "
1276  "not a subclass of std::exception.");
1277  }
1278 
1279  if (verbose) {
1280  std::ostringstream os;
1281  os << *prefix << "Done" << endl;
1282  std::cerr << os.str();
1283  }
1284  }
1285 
1286  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1287  Teuchos::ArrayView<const LocalOrdinal>
1288  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1289  getLocalView (const RowInfo& rowinfo) const
1290  {
1291  using Kokkos::subview;
1292  typedef LocalOrdinal LO;
1293  typedef Kokkos::View<const LO*, execution_space,
1294  Kokkos::MemoryUnmanaged> row_view_type;
1295 
1296  if (rowinfo.allocSize == 0) {
1297  return Teuchos::ArrayView<const LO> ();
1298  }
1299  else { // nothing in the row to view
1300  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1301  const size_t start = rowinfo.offset1D;
1302  const size_t len = rowinfo.allocSize;
1303  const std::pair<size_t, size_t> rng (start, start + len);
1304  // mfh 23 Nov 2015: Don't just create a subview of
1305  // k_lclInds1D_ directly, because that first creates a
1306  // _managed_ subview, then returns an unmanaged version of
1307  // that. That touches the reference count, which costs
1308  // performance in a measurable way.
1309  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1310  const LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1311  return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1312  }
1313  else {
1314  return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1315  }
1316  }
1317  }
1318 
1319  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1320  LocalOrdinal
1322  getLocalViewRawConst (const LocalOrdinal*& lclInds,
1323  LocalOrdinal& capacity,
1324  const RowInfo& rowInfo) const
1325  {
1326  lclInds = nullptr;
1327  capacity = 0;
1328 
1329  if (rowInfo.allocSize != 0 && k_lclInds1D_.extent (0) != 0) {
1330  if (debug_) {
1331  if (rowInfo.offset1D + rowInfo.allocSize >
1332  static_cast<size_t> (k_lclInds1D_.extent (0))) {
1333  return static_cast<LocalOrdinal> (-1);
1334  }
1335  }
1336  lclInds = k_lclInds1D_.data () + rowInfo.offset1D;
1337  capacity = rowInfo.allocSize;
1338  }
1339  return static_cast<LocalOrdinal> (0);
1340  }
1341 
1342  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1343  Teuchos::ArrayView<LocalOrdinal>
1346  {
1347  using Kokkos::subview;
1348  typedef LocalOrdinal LO;
1349  typedef Kokkos::View<LO*, execution_space,
1350  Kokkos::MemoryUnmanaged> row_view_type;
1351 
1352  if (rowinfo.allocSize == 0) { // nothing in the row to view
1353  return Teuchos::ArrayView<LO> ();
1354  }
1355  else {
1356  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1357  const size_t start = rowinfo.offset1D;
1358  const size_t len = rowinfo.allocSize;
1359  const std::pair<size_t, size_t> rng (start, start + len);
1360  // mfh 23 Nov 2015: Don't just create a subview of
1361  // k_lclInds1D_ directly, because that first creates a
1362  // _managed_ subview, then returns an unmanaged version of
1363  // that. That touches the reference count, which costs
1364  // performance in a measurable way.
1365  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1366  LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1367  return Teuchos::ArrayView<LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1368  }
1369  else {
1370  return Teuchos::ArrayView<LO> (); // nothing in the row to view
1371  }
1372  }
1373  }
1374 
1375 
1376  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1377  Kokkos::View<const LocalOrdinal*,
1379  Kokkos::MemoryUnmanaged>
1381  getLocalKokkosRowView (const RowInfo& rowInfo) const
1382  {
1383  typedef LocalOrdinal LO;
1384  typedef Kokkos::View<const LO*, execution_space,
1385  Kokkos::MemoryUnmanaged> row_view_type;
1386 
1387  if (rowInfo.allocSize == 0) {
1388  return row_view_type ();
1389  }
1390  else { // nothing in the row to view
1391  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1392  const size_t start = rowInfo.offset1D;
1393  const size_t len = rowInfo.allocSize;
1394  const std::pair<size_t, size_t> rng (start, start + len);
1395  // mfh 23 Nov 2015: Don't just create a subview of
1396  // k_lclInds1D_ directly, because that first creates a
1397  // _managed_ subview, then returns an unmanaged version of
1398  // that. That touches the reference count, which costs
1399  // performance in a measurable way.
1400  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1401  }
1402  else {
1403  return row_view_type (); // nothing in the row to view
1404  }
1405  }
1406  }
1407 
1408 
1409  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1410  Kokkos::View<LocalOrdinal*,
1411  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1412  Kokkos::MemoryUnmanaged>
1413  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1414  getLocalKokkosRowViewNonConst (const RowInfo& rowInfo)
1415  {
1416  using row_view_type = Kokkos::View<LocalOrdinal*,
1417  execution_space, Kokkos::MemoryUnmanaged>;
1418 
1419  if (rowInfo.allocSize == 0) {
1420  return row_view_type ();
1421  }
1422  else { // nothing in the row to view
1423  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1424  const size_t start = rowInfo.offset1D;
1425  const size_t len = rowInfo.allocSize;
1426  const std::pair<size_t, size_t> rng (start, start + len);
1427  // mfh 23 Nov 2015: Don't just create a subview of
1428  // k_lclInds1D_ directly, because that first creates a
1429  // _managed_ subview, then returns an unmanaged version of
1430  // that. That touches the reference count, which costs
1431  // performance in a measurable way.
1432  return Kokkos::subview (row_view_type (this->k_lclInds1D_), rng);
1433  }
1434  else {
1435  return row_view_type (); // nothing in the row to view
1436  }
1437  }
1438  }
1439 
1440 
1441  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1442  Kokkos::View<const GlobalOrdinal*,
1443  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1444  Kokkos::MemoryUnmanaged>
1445  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1446  getGlobalKokkosRowView (const RowInfo& rowinfo) const
1447  {
1448  using row_view_type = Kokkos::View<const GlobalOrdinal*,
1449  execution_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1450 
1451  if (rowinfo.allocSize == 0) {
1452  return row_view_type ();
1453  }
1454  else { // nothing in the row to view
1455  if (this->k_gblInds1D_.extent (0) != 0) { // 1-D storage
1456  const size_t start = rowinfo.offset1D;
1457  const size_t len = rowinfo.allocSize;
1458  const std::pair<size_t, size_t> rng (start, start + len);
1459  // mfh 23 Nov 2015: Don't just create a subview of
1460  // k_gblInds1D_ directly, because that first creates a
1461  // _managed_ subview, then returns an unmanaged version of
1462  // that. That touches the reference count, which costs
1463  // performance in a measurable way.
1464  return Kokkos::subview (row_view_type (this->k_gblInds1D_), rng);
1465  }
1466  else {
1467  return row_view_type (); // nothing in the row to view
1468  }
1469  }
1470  }
1471 
1472 
1473  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1474  Teuchos::ArrayView<const GlobalOrdinal>
1475  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1476  getGlobalView (const RowInfo& rowinfo) const
1477  {
1478  using GO = global_ordinal_type;
1479 
1480  Teuchos::ArrayView<const GO> view;
1481  if (rowinfo.allocSize > 0 && k_gblInds1D_.extent (0) != 0) {
1482  const auto rng =
1483  std::make_pair (rowinfo.offset1D,
1484  rowinfo.offset1D + rowinfo.allocSize);
1485  // mfh 23 Nov 2015: Don't just create a subview of
1486  // k_gblInds1D_ directly, because that first creates a
1487  // _managed_ subview, then returns an unmanaged version of
1488  // that. That touches the reference count, which costs
1489  // performance in a measurable way.
1490  using row_view_type = Kokkos::View<const GO*,
1491  execution_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1492  row_view_type k_gblInds1D_unmanaged = k_gblInds1D_;
1493  using Kokkos::Compat::getConstArrayView;
1494  using Kokkos::subview;
1495  view = getConstArrayView (subview (k_gblInds1D_unmanaged, rng));
1496  }
1497  return view;
1498  }
1499 
1500 
1501  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1502  LocalOrdinal
1504  getGlobalViewRawConst (const GlobalOrdinal*& gblInds,
1505  LocalOrdinal& capacity,
1506  const RowInfo& rowInfo) const
1507  {
1508  gblInds = nullptr;
1509  capacity = 0;
1510 
1511  if (rowInfo.allocSize != 0 && k_gblInds1D_.extent (0) != 0) {
1512  if (debug_) {
1513  if (rowInfo.offset1D + rowInfo.allocSize >
1514  static_cast<size_t> (k_gblInds1D_.extent (0))) {
1515  return static_cast<LocalOrdinal> (-1);
1516  }
1517  }
1518  gblInds = k_gblInds1D_.data () + rowInfo.offset1D;
1519  capacity = rowInfo.allocSize;
1520  }
1521  return static_cast<LocalOrdinal> (0);
1522  }
1523 
1524 
1525  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1526  Teuchos::ArrayView<GlobalOrdinal>
1529  {
1530  using GO = global_ordinal_type;
1531 
1532  Teuchos::ArrayView<GO> view;
1533  if (rowinfo.allocSize > 0 && k_gblInds1D_.extent (0) != 0) {
1534  const auto rng =
1535  std::make_pair (rowinfo.offset1D,
1536  rowinfo.offset1D + rowinfo.allocSize);
1537  // mfh 23 Nov 2015: Don't just create a subview of
1538  // k_gblInds1D_ directly, because that first creates a
1539  // _managed_ subview, then returns an unmanaged version of
1540  // that. That touches the reference count, which costs
1541  // performance in a measurable way.
1542  using row_view_type = Kokkos::View<GO*, execution_space,
1543  Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1544  row_view_type k_gblInds1D_unmanaged = k_gblInds1D_;
1545  using Kokkos::Compat::getArrayView;
1546  using Kokkos::subview;
1547  view = getArrayView (subview (k_gblInds1D_unmanaged, rng));
1548  }
1549  return view;
1550  }
1551 
1552 
1553  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1554  RowInfo
1556  getRowInfo (const LocalOrdinal myRow) const
1557  {
1558  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1559  RowInfo ret;
1560  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1561  ret.localRow = STINV;
1562  ret.allocSize = 0;
1563  ret.numEntries = 0;
1564  ret.offset1D = STINV;
1565  return ret;
1566  }
1567 
1568  ret.localRow = static_cast<size_t> (myRow);
1569  if (this->indicesAreAllocated ()) {
1570  // Offsets tell us the allocation size in this case.
1571  if (this->k_rowPtrs_.extent (0) == 0) {
1572  ret.offset1D = 0;
1573  ret.allocSize = 0;
1574  }
1575  else {
1576  ret.offset1D = this->k_rowPtrs_(myRow);
1577  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1578  }
1579 
1580  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1581  ret.allocSize :
1582  this->k_numRowEntries_(myRow);
1583  }
1584  else { // haven't performed allocation yet; probably won't hit this code
1585  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1586  // allocate, rather than doing lazy allocation at first insert.
1587  // This will make k_numAllocPerRow_ obsolete.
1588  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1589  this->k_numAllocPerRow_(myRow) : // this is a host View
1590  this->numAllocForAllRows_;
1591  ret.numEntries = 0;
1592  ret.offset1D = STINV;
1593  }
1594 
1595  return ret;
1596  }
1597 
1598 
1599  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1600  RowInfo
1602  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1603  {
1604  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1605  RowInfo ret;
1606  if (this->rowMap_.is_null ()) {
1607  ret.localRow = STINV;
1608  ret.allocSize = 0;
1609  ret.numEntries = 0;
1610  ret.offset1D = STINV;
1611  return ret;
1612  }
1613  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1614  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1615  ret.localRow = STINV;
1616  ret.allocSize = 0;
1617  ret.numEntries = 0;
1618  ret.offset1D = STINV;
1619  return ret;
1620  }
1621 
1622  ret.localRow = static_cast<size_t> (myRow);
1623  if (this->indicesAreAllocated ()) {
1624  // graph data structures have the info that we need
1625  //
1626  // if static graph, offsets tell us the allocation size
1627  if (this->k_rowPtrs_.extent (0) == 0) {
1628  ret.offset1D = 0;
1629  ret.allocSize = 0;
1630  }
1631  else {
1632  ret.offset1D = this->k_rowPtrs_(myRow);
1633  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1634  }
1635 
1636  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1637  ret.allocSize :
1638  this->k_numRowEntries_(myRow);
1639  }
1640  else { // haven't performed allocation yet; probably won't hit this code
1641  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1642  // allocate, rather than doing lazy allocation at first insert.
1643  // This will make k_numAllocPerRow_ obsolete.
1644  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1645  this->k_numAllocPerRow_(myRow) : // this is a host View
1646  this->numAllocForAllRows_;
1647  ret.numEntries = 0;
1648  ret.offset1D = STINV;
1649  }
1650 
1651  return ret;
1652  }
1653 
1654 
1655  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1656  void
1658  staticAssertions () const
1659  {
1660  using Teuchos::OrdinalTraits;
1661  typedef LocalOrdinal LO;
1662  typedef GlobalOrdinal GO;
1663  typedef global_size_t GST;
1664 
1665  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1666  // This is so that we can store local indices in the memory
1667  // formerly occupied by global indices.
1668  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1669  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1670  // Assumption: max(size_t) >= max(LocalOrdinal)
1671  // This is so that we can represent any LocalOrdinal as a size_t.
1672  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1673  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1674  static_assert (sizeof(GST) >= sizeof(size_t),
1675  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1676 
1677  // FIXME (mfh 30 Sep 2015) We're not using
1678  // Teuchos::CompileTimeAssert any more. Can we do these checks
1679  // with static_assert?
1680 
1681  // can't call max() with CompileTimeAssert, because it isn't a
1682  // constant expression; will need to make this a runtime check
1683  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1684  "given template arguments: size assumptions are not valid.";
1685  TEUCHOS_TEST_FOR_EXCEPTION(
1686  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1687  std::runtime_error, msg);
1688  TEUCHOS_TEST_FOR_EXCEPTION(
1689  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1690  std::runtime_error, msg);
1691  TEUCHOS_TEST_FOR_EXCEPTION(
1692  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1693  std::runtime_error, msg);
1694  TEUCHOS_TEST_FOR_EXCEPTION(
1695  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1696  std::runtime_error, msg);
1697  }
1698 
1699 
1700  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1701  size_t
1702  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1703  insertIndices (RowInfo& rowinfo,
1704  const SLocalGlobalViews &newInds,
1705  const ELocalGlobal lg,
1706  const ELocalGlobal I)
1707  {
1708  using Teuchos::ArrayView;
1709  typedef LocalOrdinal LO;
1710  typedef GlobalOrdinal GO;
1711  const char tfecfFuncName[] = "insertIndices: ";
1712 
1713  size_t oldNumEnt = 0;
1714  if (debug_) {
1715  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1716  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1717  "lg must be either GlobalIndices or LocalIndices.");
1718  oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1719  }
1720 
1721  size_t numNewInds = 0;
1722  if (lg == GlobalIndices) { // input indices are global
1723  ArrayView<const GO> new_ginds = newInds.ginds;
1724  numNewInds = new_ginds.size();
1725  if (I == GlobalIndices) { // store global indices
1726  ArrayView<GO> gind_view = this->getGlobalViewNonConst (rowinfo);
1727  if (debug_) {
1728  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1729  (static_cast<size_t> (gind_view.size ()) <
1730  rowinfo.numEntries + numNewInds, std::logic_error,
1731  "gind_view.size() = " << gind_view.size ()
1732  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1733  << ") + numNewInds (= " << numNewInds << ").");
1734  }
1735  GO* const gblColInds_out = gind_view.getRawPtr () + rowinfo.numEntries;
1736  for (size_t k = 0; k < numNewInds; ++k) {
1737  gblColInds_out[k] = new_ginds[k];
1738  }
1739  }
1740  else if (I == LocalIndices) { // store local indices
1741  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
1742  if (debug_) {
1743  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1744  (static_cast<size_t> (lind_view.size ()) <
1745  rowinfo.numEntries + numNewInds, std::logic_error,
1746  "lind_view.size() = " << lind_view.size ()
1747  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1748  << ") + numNewInds (= " << numNewInds << ").");
1749  }
1750  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
1751  for (size_t k = 0; k < numNewInds; ++k) {
1752  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1753  }
1754  }
1755  }
1756  else if (lg == LocalIndices) { // input indices are local
1757  ArrayView<const LO> new_linds = newInds.linds;
1758  numNewInds = new_linds.size();
1759  if (I == LocalIndices) { // store local indices
1760  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
1761  if (debug_) {
1762  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1763  (static_cast<size_t> (lind_view.size ()) <
1764  rowinfo.numEntries + numNewInds, std::logic_error,
1765  "lind_view.size() = " << lind_view.size ()
1766  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1767  << ") + numNewInds (= " << numNewInds << ").");
1768  }
1769  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
1770  for (size_t k = 0; k < numNewInds; ++k) {
1771  lclColInds_out[k] = new_linds[k];
1772  }
1773  }
1774  else if (I == GlobalIndices) {
1775  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1776  (true, std::logic_error, "The case where the input indices are local "
1777  "and the indices to write are global (lg=LocalIndices, I="
1778  "GlobalIndices) is not implemented, because it does not make sense."
1779  << std::endl << "If you have correct local column indices, that "
1780  "means the graph has a column Map. In that case, you should be "
1781  "storing local indices.");
1782  }
1783  }
1784 
1785  rowinfo.numEntries += numNewInds;
1786  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1787  this->setLocallyModified ();
1788 
1789  if (debug_) {
1790  const size_t chkNewNumEnt =
1791  this->getNumEntriesInLocalRow (rowinfo.localRow);
1792  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1793  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1794  "chkNewNumEnt = " << chkNewNumEnt
1795  << " != oldNumEnt (= " << oldNumEnt
1796  << ") + numNewInds (= " << numNewInds << ").");
1797  }
1798 
1799  return numNewInds;
1800  }
1801 
1802  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1803  size_t
1805  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1806  const GlobalOrdinal inputGblColInds[],
1807  const size_t numInputInds)
1808  {
1809  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1810  inputGblColInds, numInputInds);
1811  }
1812 
1813  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1814  size_t
1817  const GlobalOrdinal inputGblColInds[],
1818  const size_t numInputInds,
1819  std::function<void(const size_t, const size_t, const size_t)> fun)
1820  {
1822  using Kokkos::View;
1823  using Kokkos::subview;
1824  using Kokkos::MemoryUnmanaged;
1825  using Teuchos::ArrayView;
1826  using LO = LocalOrdinal;
1827  using GO = GlobalOrdinal;
1828  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1829  const LO lclRow = static_cast<LO> (rowInfo.localRow);
1830 
1831  auto numEntries = rowInfo.numEntries;
1832  using inp_view_type = View<const GO*, execution_space, MemoryUnmanaged>;
1833  inp_view_type inputInds(inputGblColInds, numInputInds);
1834  size_t numInserted = Details::insertCrsIndices(lclRow, k_rowPtrs_,
1835  this->k_gblInds1D_, numEntries, inputInds, fun);
1836 
1837  const bool insertFailed =
1838  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1839  if(insertFailed) {
1840  constexpr size_t ONE (1);
1841  const int myRank = this->getComm()->getRank();
1842  std::ostringstream os;
1843 
1844  os << "Proc " << myRank << ": Not enough capacity to insert "
1845  << numInputInds
1846  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1847  << " into local row " << lclRow << ", which currently has "
1848  << rowInfo.numEntries
1849  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1850  << " and total allocation size " << rowInfo.allocSize
1851  << ". ";
1852  const size_t maxNumToPrint =
1853  Details::Behavior::verbosePrintCountThreshold();
1854  ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1855  numInputInds);
1856  verbosePrintArray(os, inputGblColIndsView, "Input global "
1857  "column indices", maxNumToPrint);
1858  os << ", ";
1859  const GO* const curGblColInds =
1860  k_gblInds1D_.data() + rowInfo.offset1D;
1861  ArrayView<const GO> curGblColIndsView(curGblColInds,
1862  rowInfo.numEntries);
1863  verbosePrintArray(os, curGblColIndsView, "Current global "
1864  "column indices", maxNumToPrint);
1865  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1866  (true, std::runtime_error, os.str());
1867  }
1868 
1869  this->k_numRowEntries_(lclRow) += numInserted;
1870  this->setLocallyModified();
1871  return numInserted;
1872  }
1873 
1874 
1875  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1876  void
1878  insertLocalIndicesImpl (const LocalOrdinal myRow,
1879  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1880  std::function<void(const size_t, const size_t, const size_t)> fun)
1881  {
1882  using Kokkos::MemoryUnmanaged;
1883  using Kokkos::subview;
1884  using Kokkos::View;
1885  using LO = LocalOrdinal;
1886  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1887 
1888  const RowInfo rowInfo = this->getRowInfo(myRow);
1889 
1890  size_t numNewInds = 0;
1891  size_t newNumEntries = 0;
1892 
1893  auto numEntries = rowInfo.numEntries;
1894  // Note: Teuchos::ArrayViews are in HostSpace
1895  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1896  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1897  auto numInserted = Details::insertCrsIndices(myRow, k_rowPtrs_,
1898  this->k_lclInds1D_, numEntries, inputInds, fun);
1899 
1900  const bool insertFailed =
1901  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1902  if(insertFailed) {
1903  constexpr size_t ONE (1);
1904  const size_t numInputInds(indices.size());
1905  const int myRank = this->getComm()->getRank();
1906  std::ostringstream os;
1907  os << "On MPI Process " << myRank << ": Not enough capacity to "
1908  "insert " << numInputInds
1909  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1910  << " into local row " << myRow << ", which currently has "
1911  << rowInfo.numEntries
1912  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1913  << " and total allocation size " << rowInfo.allocSize << ".";
1914  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1915  (true, std::runtime_error, os.str());
1916  }
1917  numNewInds = numInserted;
1918  newNumEntries = rowInfo.numEntries + numNewInds;
1919 
1920  this->k_numRowEntries_(myRow) += numNewInds;
1921  this->setLocallyModified ();
1922 
1923  if (debug_) {
1924  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1925  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1926  (chkNewNumEntries != newNumEntries, std::logic_error,
1927  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1928  << " != newNumEntries = " << newNumEntries
1929  << ". Please report this bug to the Tpetra developers.");
1930  }
1931  }
1932 
1933 
1934  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1935  size_t
1936  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1937  findLocalIndices(const RowInfo& rowInfo,
1938  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1939  std::function<void(const size_t, const size_t, const size_t)> fun) const
1940  {
1941  using LO = LocalOrdinal;
1942  using inp_view_type = Kokkos::View<const LO*, Kokkos::HostSpace,
1943  Kokkos::MemoryUnmanaged>;
1944  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1945 
1946  size_t numFound = 0;
1947  LO lclRow = rowInfo.localRow;
1948  if (this->isLocallyIndexed())
1949  {
1950  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
1951  this->k_lclInds1D_, inputInds, fun);
1952  }
1953  else if (this->isGloballyIndexed())
1954  {
1955  if (this->colMap_.is_null())
1956  return Teuchos::OrdinalTraits<size_t>::invalid();
1957  const auto& colMap = *(this->colMap_);
1958  auto map = [&](LO const lclInd){return colMap.getGlobalElement(lclInd);};
1959  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
1960  this->k_gblInds1D_, inputInds, map, fun);
1961  }
1962  return numFound;
1963  }
1964 
1965 
1966  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1967  size_t
1969  findGlobalIndices(const RowInfo& rowInfo,
1970  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1971  std::function<void(const size_t, const size_t, const size_t)> fun) const
1972  {
1973  using GO = GlobalOrdinal;
1974  using Kokkos::View;
1975  using Kokkos::MemoryUnmanaged;
1976  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1977 
1978  using inp_view_type = View<const GO*, execution_space, MemoryUnmanaged>;
1979  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1980 
1981  size_t numFound = 0;
1982  LocalOrdinal lclRow = rowInfo.localRow;
1983  if (this->isLocallyIndexed())
1984  {
1985  if (this->colMap_.is_null())
1986  return invalidCount;
1987  const auto& colMap = *(this->colMap_);
1988  auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1989  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
1990  this->k_lclInds1D_, inputInds, map, fun);
1991  }
1992  else if (this->isGloballyIndexed())
1993  {
1994  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
1995  this->k_gblInds1D_, inputInds, fun);
1996  }
1997  return numFound;
1998  }
1999 
2000 
2001  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2002  size_t
2003  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
2004  sortAndMergeRowIndices (const RowInfo& rowInfo,
2005  const bool sorted,
2006  const bool merged)
2007  {
2008  const size_t origNumEnt = rowInfo.numEntries;
2009  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
2010  origNumEnt != 0) {
2011  auto lclColInds = this->getLocalKokkosRowViewNonConst (rowInfo);
2012 
2013  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
2014  if (! sorted) {
2015  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2016  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
2017  }
2018 
2019  if (! merged) {
2020  LocalOrdinal* const beg = lclColIndsRaw;
2021  LocalOrdinal* const end = beg + rowInfo.numEntries;
2022  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2023  LocalOrdinal* const newend = std::unique (beg, end);
2024  const size_t newNumEnt = newend - beg;
2025 
2026  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
2027  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
2028  return origNumEnt - newNumEnt; // the number of duplicates in the row
2029  }
2030  else {
2031  return static_cast<size_t> (0); // assume no duplicates
2032  }
2033  }
2034  else {
2035  return static_cast<size_t> (0); // no entries in the row
2036  }
2037  }
2038 
2039 
2040  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2041  void
2043  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
2044  const Teuchos::RCP<const map_type>& rangeMap)
2045  {
2046  // simple pointer comparison for equality
2047  if (domainMap_ != domainMap) {
2048  domainMap_ = domainMap;
2049  importer_ = Teuchos::null;
2050  }
2051  if (rangeMap_ != rangeMap) {
2052  rangeMap_ = rangeMap;
2053  exporter_ = Teuchos::null;
2054  }
2055  }
2056 
2057 
2058  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2059  void
2062  {
2063  const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
2064 
2065  globalNumEntries_ = INV;
2066  globalMaxNumRowEntries_ = INV;
2067  haveGlobalConstants_ = false;
2068  }
2069 
2070 
2071  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2072  void
2073  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
2074  checkInternalState () const
2075  {
2076  if (debug_) {
2077  using std::endl;
2078  const char tfecfFuncName[] = "checkInternalState: ";
2079  const char suffix[] = " Please report this bug to the Tpetra developers.";
2080 
2081  std::unique_ptr<std::string> prefix;
2082  if (verbose_) {
2083  prefix = this->createPrefix("CrsGraph", "checkInternalState");
2084  std::ostringstream os;
2085  os << *prefix << "Start" << endl;
2086  std::cerr << os.str();
2087  }
2088 
2089  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2090  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
2091  // check the internal state of this data structure
2092  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
2093  // always remains in a valid state
2094 
2095  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2096  (this->rowMap_.is_null (), std::logic_error,
2097  "Row Map is null." << suffix);
2098  // This may access the row Map, so we need to check first (above)
2099  // whether the row Map is null.
2100  const LocalOrdinal lclNumRows =
2101  static_cast<LocalOrdinal> (this->getNodeNumRows ());
2102 
2103  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2104  (this->isFillActive () == this->isFillComplete (), std::logic_error,
2105  "Graph cannot be both fill active and fill complete." << suffix);
2106  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2107  (this->isFillComplete () &&
2108  (this->colMap_.is_null () ||
2109  this->rangeMap_.is_null () ||
2110  this->domainMap_.is_null ()),
2111  std::logic_error,
2112  "Graph is full complete, but at least one of {column, range, domain} "
2113  "Map is null." << suffix);
2114  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2115  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
2116  std::logic_error, "Storage is optimized, but indices are not "
2117  "allocated, not even trivially." << suffix);
2118 
2119  size_t nodeAllocSize = 0;
2120  try {
2121  nodeAllocSize = this->getNodeAllocationSize ();
2122  }
2123  catch (std::logic_error& e) {
2124  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2125  (true, std::runtime_error, "getNodeAllocationSize threw "
2126  "std::logic_error: " << e.what ());
2127  }
2128  catch (std::exception& e) {
2129  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2130  (true, std::runtime_error, "getNodeAllocationSize threw an "
2131  "std::exception: " << e.what ());
2132  }
2133  catch (...) {
2134  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2135  (true, std::runtime_error, "getNodeAllocationSize threw an exception "
2136  "not a subclass of std::exception.");
2137  }
2138 
2139  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2140  (this->isStorageOptimized () &&
2141  nodeAllocSize != this->getNodeNumEntries (),
2142  std::logic_error, "Storage is optimized, but "
2143  "this->getNodeAllocationSize() = " << nodeAllocSize
2144  << " != this->getNodeNumEntries() = " << this->getNodeNumEntries ()
2145  << "." << suffix);
2146  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2147  (! this->haveGlobalConstants_ &&
2148  (this->globalNumEntries_ != GSTI ||
2149  this->globalMaxNumRowEntries_ != GSTI),
2150  std::logic_error, "Graph claims not to have global constants, but "
2151  "some of the global constants are not marked as invalid." << suffix);
2152  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2153  (this->haveGlobalConstants_ &&
2154  (this->globalNumEntries_ == GSTI ||
2155  this->globalMaxNumRowEntries_ == GSTI),
2156  std::logic_error, "Graph claims to have global constants, but "
2157  "some of them are marked as invalid." << suffix);
2158  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2159  (this->haveGlobalConstants_ &&
2160  (this->globalNumEntries_ < this->getNodeNumEntries () ||
2161  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2162  std::logic_error, "Graph claims to have global constants, and "
2163  "all of the values of the global constants are valid, but "
2164  "some of the local constants are greater than "
2165  "their corresponding global constants." << suffix);
2166  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2167  (this->indicesAreAllocated () &&
2168  (this->numAllocForAllRows_ != 0 ||
2169  this->k_numAllocPerRow_.extent (0) != 0),
2170  std::logic_error, "The graph claims that its indices are allocated, but "
2171  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2172  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2173  "the graph is supposed to release its \"allocation specifications\" "
2174  "when it allocates its indices." << suffix);
2175  if (isGloballyIndexed() && k_rowPtrs_.extent(0) != 0) {
2176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2177  (size_t(k_rowPtrs_.extent(0)) != size_t(lclNumRows + 1),
2178  std::logic_error, "The graph is globally indexed and "
2179  "k_rowPtrs_ has nonzero size " << k_rowPtrs_.extent(0)
2180  << ", but that size does not equal lclNumRows+1 = "
2181  << (lclNumRows+1) << "." << suffix);
2182  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2183  (k_rowPtrs_(lclNumRows) != size_t(k_gblInds1D_.extent(0)),
2184  std::logic_error, "The graph is globally indexed and "
2185  "k_rowPtrs_ has nonzero size " << k_rowPtrs_.extent(0)
2186  << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2187  << k_rowPtrs_(lclNumRows) << " != k_gblInds1D_.extent(0)="
2188  << k_gblInds1D_.extent(0) << "." << suffix);
2189  }
2190  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2191  (this->isLocallyIndexed () &&
2192  this->k_rowPtrs_.extent (0) != 0 &&
2193  (static_cast<size_t> (k_rowPtrs_.extent (0)) != static_cast<size_t> (lclNumRows + 1) ||
2194  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_lclInds1D_.extent (0))),
2195  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2196  "the graph is locally indexed, then "
2197  "k_rowPtrs_ must have N+1 rows, and "
2198  "k_rowPtrs_(N) must equal k_lclInds1D_.extent(0)." << suffix);
2199 
2200  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2201  (this->indicesAreAllocated () &&
2202  nodeAllocSize > 0 &&
2203  this->k_lclInds1D_.extent (0) == 0 &&
2204  this->k_gblInds1D_.extent (0) == 0,
2205  std::logic_error, "Graph is allocated nontrivially, but "
2206  "but 1-D allocations are not present." << suffix);
2207 
2208  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2209  (! this->indicesAreAllocated () &&
2210  ((this->k_rowPtrs_.extent (0) != 0 ||
2211  this->k_numRowEntries_.extent (0) != 0) ||
2212  this->k_lclInds1D_.extent (0) != 0 ||
2213  this->k_gblInds1D_.extent (0) != 0),
2214  std::logic_error, "If indices are not allocated, "
2215  "then none of the buffers should be." << suffix);
2216  // indices may be local or global only if they are allocated
2217  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2218  // indicesAreGlobal_)
2219  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2220  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2221  ! this->indicesAreAllocated_,
2222  std::logic_error, "Indices may be local or global only if they are "
2223  "allocated." << suffix);
2224  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2225  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2226  std::logic_error, "Indices may not be both local and global." << suffix);
2227  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2228  (indicesAreLocal_ && k_gblInds1D_.extent (0) != 0,
2229  std::logic_error, "Indices are local, but "
2230  "k_gblInds1D_.extent(0) (= " << k_gblInds1D_.extent (0)
2231  << ") != 0. In other words, if indices are local, then "
2232  "allocations of global indices should not be present."
2233  << suffix);
2234  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2235  (indicesAreGlobal_ && k_lclInds1D_.extent (0) != 0,
2236  std::logic_error, "Indices are global, but "
2237  "k_lclInds1D_.extent(0) (= " << k_lclInds1D_.extent(0)
2238  << ") != 0. In other words, if indices are global, "
2239  "then allocations for local indices should not be present."
2240  << suffix);
2241  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2242  (indicesAreLocal_ && nodeAllocSize > 0 &&
2243  k_lclInds1D_.extent (0) == 0 && getNodeNumRows () > 0,
2244  std::logic_error, "Indices are local and "
2245  "getNodeAllocationSize() = " << nodeAllocSize << " > 0, but "
2246  "k_lclInds1D_.extent(0) = 0 and getNodeNumRows() = "
2247  << getNodeNumRows () << " > 0." << suffix);
2248  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2249  (indicesAreGlobal_ && nodeAllocSize > 0 &&
2250  k_gblInds1D_.extent (0) == 0 && getNodeNumRows () > 0,
2251  std::logic_error, "Indices are global and "
2252  "getNodeAllocationSize() = " << nodeAllocSize << " > 0, but "
2253  "k_gblInds1D_.extent(0) = 0 and getNodeNumRows() = "
2254  << getNodeNumRows () << " > 0." << suffix);
2255  // check the actual allocations
2256  if (this->indicesAreAllocated () &&
2257  this->k_rowPtrs_.extent (0) != 0) {
2258  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2259  (static_cast<size_t> (this->k_rowPtrs_.extent (0)) !=
2260  this->getNodeNumRows () + 1,
2261  std::logic_error, "Indices are allocated and "
2262  "k_rowPtrs_ has nonzero length, but k_rowPtrs_.extent(0) = "
2263  << this->k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = "
2264  << (this->getNodeNumRows () + 1) << "." << suffix);
2265  const size_t actualNumAllocated =
2266  ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, this->getNodeNumRows ());
2267  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2268  (this->isLocallyIndexed () &&
2269  static_cast<size_t> (this->k_lclInds1D_.extent (0)) != actualNumAllocated,
2270  std::logic_error, "Graph is locally indexed, indices are "
2271  "are allocated, and k_rowPtrs_ has nonzero length, but "
2272  "k_lclInds1D_.extent(0) = " << this->k_lclInds1D_.extent (0)
2273  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2274  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2275  (this->isGloballyIndexed () &&
2276  static_cast<size_t> (this->k_gblInds1D_.extent (0)) != actualNumAllocated,
2277  std::logic_error, "Graph is globally indexed, indices "
2278  "are allocated, and k_rowPtrs_ has nonzero length, but "
2279  "k_gblInds1D_.extent(0) = " << this->k_gblInds1D_.extent (0)
2280  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2281  }
2282 
2283  if (verbose_) {
2284  std::ostringstream os;
2285  os << *prefix << "Done" << endl;
2286  std::cerr << os.str();
2287  }
2288  }
2289  }
2290 
2291 
2292  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2293  size_t
2295  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2296  {
2297  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2298  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2299  return Teuchos::OrdinalTraits<size_t>::invalid ();
2300  }
2301  else {
2302  return rowInfo.numEntries;
2303  }
2304  }
2305 
2306 
2307  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2308  size_t
2310  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2311  {
2312  const RowInfo rowInfo = this->getRowInfo (localRow);
2313  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2314  return Teuchos::OrdinalTraits<size_t>::invalid ();
2315  }
2316  else {
2317  return rowInfo.numEntries;
2318  }
2319  }
2320 
2321 
2322  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2323  size_t
2325  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2326  {
2327  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2328  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2329  return Teuchos::OrdinalTraits<size_t>::invalid ();
2330  }
2331  else {
2332  return rowInfo.allocSize;
2333  }
2334  }
2335 
2336 
2337  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2338  size_t
2340  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2341  {
2342  const RowInfo rowInfo = this->getRowInfo (localRow);
2343  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2344  return Teuchos::OrdinalTraits<size_t>::invalid ();
2345  }
2346  else {
2347  return rowInfo.allocSize;
2348  }
2349  }
2350 
2351 
2352  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2353  Teuchos::ArrayRCP<const size_t>
2356  {
2357  using Kokkos::ViewAllocateWithoutInitializing;
2358  using Kokkos::create_mirror_view;
2359  using Teuchos::ArrayRCP;
2360  typedef typename local_graph_type::row_map_type row_map_type;
2361  typedef typename row_map_type::non_const_value_type row_offset_type;
2362  const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2363  const char suffix[] = " Please report this bug to the Tpetra developers.";
2364 
2365  const size_t size = k_rowPtrs_.extent (0);
2366  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2367 
2368  if (size == 0) {
2369  return ArrayRCP<const size_t> ();
2370  }
2371 
2372  ArrayRCP<const row_offset_type> ptr_rot;
2373  ArrayRCP<const size_t> ptr_st;
2374  if (same) { // size_t == row_offset_type
2375  // NOTE (mfh 22 Mar 2015) In a debug build of Kokkos, the result
2376  // of create_mirror_view might actually be a new allocation.
2377  // This helps with debugging when there are two memory spaces.
2378  typename row_map_type::HostMirror ptr_h = create_mirror_view (k_rowPtrs_);
2379  Kokkos::deep_copy (ptr_h, k_rowPtrs_);
2380  if (debug_) {
2381  TEUCHOS_TEST_FOR_EXCEPTION
2382  (ptr_h.extent (0) != k_rowPtrs_.extent (0), std::logic_error,
2383  prefix << "size_t == row_offset_type, but ptr_h.extent(0) = "
2384  << ptr_h.extent (0) << " != k_rowPtrs_.extent(0) = "
2385  << k_rowPtrs_.extent (0) << ".");
2386  TEUCHOS_TEST_FOR_EXCEPTION
2387  (same && size != 0 && k_rowPtrs_.data () == nullptr, std::logic_error,
2388  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2389  << size << " != 0, but k_rowPtrs_.data() == nullptr." << suffix);
2390  TEUCHOS_TEST_FOR_EXCEPTION
2391  (same && size != 0 && ptr_h.data () == nullptr, std::logic_error,
2392  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2393  << size << " != 0, but create_mirror_view(k_rowPtrs_).data() "
2394  "== nullptr." << suffix);
2395  }
2396  ptr_rot = Kokkos::Compat::persistingView (ptr_h);
2397  }
2398  else { // size_t != row_offset_type
2399  typedef Kokkos::View<size_t*, device_type> ret_view_type;
2400  ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2401  ::Tpetra::Details::copyOffsets (ptr_d, k_rowPtrs_);
2402  typename ret_view_type::HostMirror ptr_h = create_mirror_view (ptr_d);
2403  Kokkos::deep_copy (ptr_h, ptr_d);
2404  ptr_st = Kokkos::Compat::persistingView (ptr_h);
2405  }
2406  if (debug_) {
2407  TEUCHOS_TEST_FOR_EXCEPTION
2408  (same && size != 0 && ptr_rot.is_null (), std::logic_error,
2409  prefix << "size_t == row_offset_type and size = " << size
2410  << " != 0, but ptr_rot is null." << suffix);
2411  TEUCHOS_TEST_FOR_EXCEPTION
2412  (! same && size != 0 && ptr_st.is_null (), std::logic_error,
2413  prefix << "size_t != row_offset_type and size = " << size
2414  << " != 0, but ptr_st is null." << suffix);
2415  }
2416 
2417  // If size_t == row_offset_type, return a persisting host view of
2418  // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2419  ArrayRCP<const size_t> retval =
2420  Kokkos::Impl::if_c<same,
2421  ArrayRCP<const row_offset_type>,
2422  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2423  if (debug_) {
2424  TEUCHOS_TEST_FOR_EXCEPTION
2425  (size != 0 && retval.is_null (), std::logic_error,
2426  prefix << "size = " << size << " != 0, but retval is null." << suffix);
2427  }
2428  return retval;
2429  }
2430 
2431 
2432  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2433  Teuchos::ArrayRCP<const LocalOrdinal>
2436  {
2437  return Kokkos::Compat::persistingView (k_lclInds1D_);
2438  }
2439 
2440 
2441  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2442  void
2444  getLocalRowCopy (LocalOrdinal localRow,
2445  const Teuchos::ArrayView<LocalOrdinal>&indices,
2446  size_t& numEntries) const
2447  {
2448  using Teuchos::ArrayView;
2449  typedef LocalOrdinal LO;
2450  typedef GlobalOrdinal GO;
2451  const char tfecfFuncName[] = "getLocalRowCopy: ";
2452 
2453  TEUCHOS_TEST_FOR_EXCEPTION(
2454  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2455  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2456  "does not have a column Map yet. That means we don't have local indices "
2457  "for columns yet, so it doesn't make sense to call this method. If the "
2458  "graph doesn't have a column Map yet, you should call fillComplete on "
2459  "it first.");
2460 
2461  // This does the right thing (reports an empty row) if the input
2462  // row is invalid.
2463  const RowInfo rowinfo = this->getRowInfo (localRow);
2464  // No side effects on error.
2465  const size_t theNumEntries = rowinfo.numEntries;
2466  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2467  (static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2468  "Specified storage (size==" << indices.size () << ") does not suffice "
2469  "to hold all " << theNumEntries << " entry/ies for this row.");
2470  numEntries = theNumEntries;
2471 
2472  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2473  if (isLocallyIndexed ()) {
2474  ArrayView<const LO> lview = getLocalView (rowinfo);
2475  for (size_t j = 0; j < theNumEntries; ++j) {
2476  indices[j] = lview[j];
2477  }
2478  }
2479  else if (isGloballyIndexed ()) {
2480  ArrayView<const GO> gview = getGlobalView (rowinfo);
2481  for (size_t j = 0; j < theNumEntries; ++j) {
2482  indices[j] = colMap_->getLocalElement (gview[j]);
2483  }
2484  }
2485  }
2486  }
2487 
2488 
2489  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2490  void
2492  getGlobalRowCopy (GlobalOrdinal globalRow,
2493  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2494  size_t& numEntries) const
2495  {
2496  using Teuchos::ArrayView;
2497  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2498 
2499  // This does the right thing (reports an empty row) if the input
2500  // row is invalid.
2501  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2502  const size_t theNumEntries = rowinfo.numEntries;
2503  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2504  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2505  "Specified storage (size==" << indices.size () << ") does not suffice "
2506  "to hold all " << theNumEntries << " entry/ies for this row.");
2507  numEntries = theNumEntries; // first side effect
2508 
2509  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2510  if (isLocallyIndexed ()) {
2511  ArrayView<const LocalOrdinal> lview = getLocalView (rowinfo);
2512  for (size_t j = 0; j < theNumEntries; ++j) {
2513  indices[j] = colMap_->getGlobalElement (lview[j]);
2514  }
2515  }
2516  else if (isGloballyIndexed ()) {
2517  ArrayView<const GlobalOrdinal> gview = getGlobalView (rowinfo);
2518  for (size_t j = 0; j < theNumEntries; ++j) {
2519  indices[j] = gview[j];
2520  }
2521  }
2522  }
2523  }
2524 
2525 
2526  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2527  void
2529  getLocalRowView (const LocalOrdinal localRow,
2530  Teuchos::ArrayView<const LocalOrdinal>& indices) const
2531  {
2532  const char tfecfFuncName[] = "getLocalRowView: ";
2533 
2534  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2535  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2536  "currently stored as global indices, so we cannot return a view with "
2537  "local column indices, whether or not the graph has a column Map. If "
2538  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2539 
2540  // This does the right thing (reports an empty row) if the input
2541  // row is invalid.
2542  const RowInfo rowInfo = getRowInfo (localRow);
2543  indices = Teuchos::null;
2544  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2545  rowInfo.numEntries > 0) {
2546  indices = this->getLocalView (rowInfo);
2547  // getLocalView returns a view of the _entire_ row, including
2548  // any extra space at the end (which 1-D unpacked storage
2549  // might have, for example). That's why we have to take a
2550  // subview of the returned view.
2551  indices = indices (0, rowInfo.numEntries);
2552  }
2553 
2554  if (debug_) {
2555  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2556  (static_cast<size_t> (indices.size ()) !=
2557  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2558  "= " << indices.size () << " != getNumEntriesInLocalRow(localRow=" <<
2559  localRow << ") = " << getNumEntriesInLocalRow (localRow) <<
2560  ". Please report this bug to the Tpetra developers.");
2561  }
2562  }
2563 
2564 
2565  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2566  void
2568  getGlobalRowView (const GlobalOrdinal globalRow,
2569  Teuchos::ArrayView<const GlobalOrdinal>& indices) const
2570  {
2571  const char tfecfFuncName[] = "getGlobalRowView: ";
2572 
2573  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2574  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2575  "currently stored as local indices, so we cannot return a view with "
2576  "global column indices. Use getGlobalRowCopy() instead.");
2577 
2578  // This does the right thing (reports an empty row) if the input
2579  // row is invalid.
2580  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2581  indices = Teuchos::null;
2582  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2583  rowInfo.numEntries > 0) {
2584  indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
2585  }
2586 
2587  if (debug_) {
2588  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2589  (static_cast<size_t> (indices.size ()) !=
2590  getNumEntriesInGlobalRow (globalRow),
2591  std::logic_error, "indices.size() = " << indices.size ()
2592  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2593  << getNumEntriesInGlobalRow (globalRow)
2594  << ". Please report this bug to the Tpetra developers.");
2595  }
2596  }
2597 
2598 
2599  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2600  void
2602  insertLocalIndices (const LocalOrdinal localRow,
2603  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2604  {
2605  const char tfecfFuncName[] = "insertLocalIndices: ";
2606 
2607  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2608  (! isFillActive (), std::runtime_error, "Fill must be active.");
2609  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2610  (isGloballyIndexed (), std::runtime_error,
2611  "Graph indices are global; use insertGlobalIndices().");
2612  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2613  (! hasColMap (), std::runtime_error,
2614  "Cannot insert local indices without a column Map.");
2615  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2616  (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2617  "Local row index " << localRow << " is not in the row Map "
2618  "on the calling process.");
2619  if (! indicesAreAllocated ()) {
2620  allocateIndices (LocalIndices, verbose_);
2621  }
2622 
2623  if (debug_) {
2624  // In debug mode, if the graph has a column Map, test whether any
2625  // of the given column indices are not in the column Map. Keep
2626  // track of the invalid column indices so we can tell the user
2627  // about them.
2628  if (hasColMap ()) {
2629  using Teuchos::Array;
2630  using Teuchos::toString;
2631  using std::endl;
2632  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2633 
2634  const map_type& colMap = *colMap_;
2635  Array<LocalOrdinal> badColInds;
2636  bool allInColMap = true;
2637  for (size_type k = 0; k < indices.size (); ++k) {
2638  if (! colMap.isNodeLocalElement (indices[k])) {
2639  allInColMap = false;
2640  badColInds.push_back (indices[k]);
2641  }
2642  }
2643  if (! allInColMap) {
2644  std::ostringstream os;
2645  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2646  "entries in owned row " << localRow << ", at the following column "
2647  "indices: " << toString (indices) << "." << endl;
2648  os << "Of those, the following indices are not in the column Map on "
2649  "this process: " << toString (badColInds) << "." << endl << "Since "
2650  "the graph has a column Map already, it is invalid to insert entries "
2651  "at those locations.";
2652  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2653  }
2654  }
2655  }
2656 
2657  insertLocalIndicesImpl (localRow, indices);
2658 
2659  if (debug_) {
2660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2661  (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2662  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2663  "! isLocallyIndexed() is true. Please report this bug to the "
2664  "Tpetra developers.");
2665  }
2666  }
2667 
2668  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2669  void
2671  insertLocalIndices (const LocalOrdinal localRow,
2672  const LocalOrdinal numEnt,
2673  const LocalOrdinal inds[])
2674  {
2675  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2676  this->insertLocalIndices (localRow, indsT);
2677  }
2678 
2679 
2680  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2681  void
2683  insertGlobalIndices (const GlobalOrdinal gblRow,
2684  const LocalOrdinal numInputInds,
2685  const GlobalOrdinal inputGblColInds[])
2686  {
2687  typedef LocalOrdinal LO;
2688  const char tfecfFuncName[] = "insertGlobalIndices: ";
2689 
2690  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2691  (this->isLocallyIndexed (), std::runtime_error,
2692  "graph indices are local; use insertLocalIndices().");
2693  // This can't really be satisfied for now, because if we are
2694  // fillComplete(), then we are local. In the future, this may
2695  // change. However, the rule that modification require active
2696  // fill will not change.
2697  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2698  (! this->isFillActive (), std::runtime_error,
2699  "You are not allowed to call this method if fill is not active. "
2700  "If fillComplete has been called, you must first call resumeFill "
2701  "before you may insert indices.");
2702  if (! indicesAreAllocated ()) {
2703  allocateIndices (GlobalIndices, verbose_);
2704  }
2705  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2706  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2707  if (debug_) {
2708  if (this->hasColMap ()) {
2709  using std::endl;
2710  const map_type& colMap = * (this->colMap_);
2711  // In a debug build, keep track of the nonowned ("bad") column
2712  // indices, so that we can display them in the exception
2713  // message. In a release build, just ditch the loop early if
2714  // we encounter a nonowned column index.
2715  std::vector<GlobalOrdinal> badColInds;
2716  bool allInColMap = true;
2717  for (LO k = 0; k < numInputInds; ++k) {
2718  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2719  allInColMap = false;
2720  badColInds.push_back (inputGblColInds[k]);
2721  }
2722  }
2723  if (! allInColMap) {
2724  std::ostringstream os;
2725  os << "You attempted to insert entries in owned row " << gblRow
2726  << ", at the following column indices: [";
2727  for (LO k = 0; k < numInputInds; ++k) {
2728  os << inputGblColInds[k];
2729  if (k + static_cast<LO> (1) < numInputInds) {
2730  os << ",";
2731  }
2732  }
2733  os << "]." << endl << "Of those, the following indices are not in "
2734  "the column Map on this process: [";
2735  for (size_t k = 0; k < badColInds.size (); ++k) {
2736  os << badColInds[k];
2737  if (k + size_t (1) < badColInds.size ()) {
2738  os << ",";
2739  }
2740  }
2741  os << "]." << endl << "Since the matrix has a column Map already, "
2742  "it is invalid to insert entries at those locations.";
2743  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2744  (true, std::invalid_argument, os.str ());
2745  }
2746  }
2747  } // debug_
2748  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2749  }
2750  else { // a nonlocal row
2751  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2752  numInputInds);
2753  }
2754  }
2755 
2756 
2757  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2758  void
2760  insertGlobalIndices (const GlobalOrdinal gblRow,
2761  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2762  {
2763  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2764  inputGblColInds.getRawPtr ());
2765  }
2766 
2767 
2768  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2769  void
2771  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2772  const GlobalOrdinal gblColInds[],
2773  const LocalOrdinal numGblColInds)
2774  {
2775  typedef LocalOrdinal LO;
2776  typedef GlobalOrdinal GO;
2777  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2778 
2779  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2780  (this->isLocallyIndexed (), std::runtime_error,
2781  "Graph indices are local; use insertLocalIndices().");
2782  // This can't really be satisfied for now, because if we are
2783  // fillComplete(), then we are local. In the future, this may
2784  // change. However, the rule that modification require active
2785  // fill will not change.
2786  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2787  (! this->isFillActive (), std::runtime_error,
2788  "You are not allowed to call this method if fill is not active. "
2789  "If fillComplete has been called, you must first call resumeFill "
2790  "before you may insert indices.");
2791  if (! indicesAreAllocated ()) {
2792  allocateIndices (GlobalIndices, verbose_);
2793  }
2794 
2795  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2796  // If we have a column Map, use it to filter the entries.
2797  if (! colMap_.is_null ()) {
2798  const map_type& colMap = * (this->colMap_);
2799 
2800  LO curOffset = 0;
2801  while (curOffset < numGblColInds) {
2802  // Find a sequence of input indices that are in the column Map
2803  // on the calling process. Doing a sequence at a time,
2804  // instead of one at a time, amortizes some overhead.
2805  LO endOffset = curOffset;
2806  for ( ; endOffset < numGblColInds; ++endOffset) {
2807  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2808  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2809  break; // first entry, in current sequence, not in the column Map
2810  }
2811  }
2812  // curOffset, endOffset: half-exclusive range of indices in
2813  // the column Map on the calling process. If endOffset ==
2814  // curOffset, the range is empty.
2815  const LO numIndInSeq = (endOffset - curOffset);
2816  if (numIndInSeq != 0) {
2817  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2818  numIndInSeq);
2819  }
2820  // Invariant before this line: Either endOffset ==
2821  // numGblColInds, or gblColInds[endOffset] is not in the
2822  // column Map on the calling process.
2823  curOffset = endOffset + 1;
2824  }
2825  }
2826  else {
2827  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2828  gblColInds_av.size ());
2829  }
2830  }
2831 
2832  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2833  void
2835  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2836  const GlobalOrdinal gblColInds[],
2837  const LocalOrdinal numGblColInds)
2838  {
2839  // This creates the std::vector if it doesn't exist yet.
2840  // std::map's operator[] does a lookup each time, so it's better
2841  // to pull nonlocals_[grow] out of the loop.
2842  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2843  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2844  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2845  // order to avoid duplicates. globalAssemble() sorts these
2846  // anyway.
2847  nonlocalRow.push_back (gblColInds[k]);
2848  }
2849  }
2850 
2851  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2852  void
2854  removeLocalIndices (LocalOrdinal lrow)
2855  {
2856  const char tfecfFuncName[] = "removeLocalIndices: ";
2857  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2858  ! isFillActive (), std::runtime_error, "requires that fill is active.");
2859  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2860  isStorageOptimized (), std::runtime_error,
2861  "cannot remove indices after optimizeStorage() has been called.");
2862  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2863  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2864  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2865  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2866  "Local row " << lrow << " is not in the row Map on the calling process.");
2867  if (! indicesAreAllocated ()) {
2868  allocateIndices (LocalIndices, verbose_);
2869  }
2870 
2871  // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
2872  // all processes?
2873  clearGlobalConstants ();
2874 
2875  if (k_numRowEntries_.extent (0) != 0) {
2876  this->k_numRowEntries_(lrow) = 0;
2877  }
2878 
2879  if (debug_) {
2880  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2881  (getNumEntriesInLocalRow (lrow) != 0 ||
2882  ! indicesAreAllocated () ||
2883  ! isLocallyIndexed (), std::logic_error,
2884  "Violated stated post-conditions. Please contact Tpetra team.");
2885  }
2886  }
2887 
2888 
2889  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2890  void
2892  setAllIndices (const typename local_graph_type::row_map_type& rowPointers,
2893  const typename local_graph_type::entries_type::non_const_type& columnIndices)
2894  {
2895  const char tfecfFuncName[] = "setAllIndices: ";
2896  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2897  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2898  "The graph must have a column Map before you may call this method.");
2899  LocalOrdinal numLocalRows = this->getNodeNumRows ();
2900  {
2901  LocalOrdinal rowPtrLen = rowPointers.size();
2902  if(numLocalRows == 0) {
2903  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2904  rowPtrLen != 0 && rowPtrLen != 1,
2905  std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2906  }
2907  else {
2908  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2909  rowPtrLen != numLocalRows + 1,
2910  std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2911  " != this->getNodeNumRows()+1 = " << (numLocalRows + 1) << ".");
2912  }
2913  }
2914 
2915  if (debug_ && this->isSorted()) {
2916  // Verify that the local indices are actually sorted
2917  int notSorted = 0;
2918  using exec_space = typename local_graph_type::execution_space;
2919  using size_type = typename local_graph_type::size_type;
2920  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2921  KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2922  {
2923  size_type rowBegin = rowPointers(i);
2924  size_type rowEnd = rowPointers(i + 1);
2925  for(size_type j = rowBegin + 1; j < rowEnd; j++)
2926  {
2927  if(columnIndices(j - 1) > columnIndices(j))
2928  {
2929  lNotSorted = 1;
2930  }
2931  }
2932  }, notSorted);
2933  //All-reduce notSorted to avoid rank divergence
2934  int globalNotSorted = 0;
2935  auto comm = this->getComm();
2936  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2937  Teuchos::outArg (globalNotSorted));
2938  if (globalNotSorted)
2939  {
2940  std::string message;
2941  if (notSorted)
2942  {
2943  //Only print message from ranks with the problem
2944  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2945  }
2946  Details::gathervPrint(std::cout, message, *comm);
2947  throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2948  }
2949  }
2950 
2951  // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
2952  // since the future model will be allocation at construction, not
2953  // lazy allocation on first insert.
2954  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2955  ((this->k_lclInds1D_.extent (0) != 0 || this->k_gblInds1D_.extent (0) != 0),
2956  std::runtime_error, "You may not call this method if 1-D data "
2957  "structures are already allocated.");
2958 
2959  indicesAreAllocated_ = true;
2960  indicesAreLocal_ = true;
2961  indicesAreSorted_ = true;
2962  noRedundancies_ = true;
2963  k_lclInds1D_ = columnIndices;
2964  k_rowPtrs_ = rowPointers;
2965  // Storage MUST be packed, since the interface doesn't give any
2966  // way to indicate any extra space at the end of each row.
2967  storageStatus_ = Details::STORAGE_1D_PACKED;
2968 
2969  // Build the local graph.
2970  lclGraph_ = local_graph_type (k_lclInds1D_, k_rowPtrs_);
2971 
2972  // These normally get cleared out at the end of allocateIndices.
2973  // It makes sense to clear them out here, because at the end of
2974  // this method, the graph is allocated on the calling process.
2975  numAllocForAllRows_ = 0;
2976  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
2977 
2978  checkInternalState ();
2979  }
2980 
2981 
2982  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2983  void
2985  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
2986  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
2987  {
2988  using Kokkos::View;
2989  typedef typename local_graph_type::row_map_type row_map_type;
2990  typedef typename row_map_type::array_layout layout_type;
2991  typedef typename row_map_type::non_const_value_type row_offset_type;
2992  typedef View<size_t*, layout_type , Kokkos::HostSpace,
2993  Kokkos::MemoryUnmanaged> input_view_type;
2994  typedef typename row_map_type::non_const_type nc_row_map_type;
2995 
2996  const size_t size = static_cast<size_t> (rowPointers.size ());
2997  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2998  input_view_type ptr_in (rowPointers.getRawPtr (), size);
2999 
3000  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3001 
3002  if (same) { // size_t == row_offset_type
3003  // This compile-time logic ensures that the compiler never sees
3004  // an assignment of View<row_offset_type*, ...> to View<size_t*,
3005  // ...> unless size_t == row_offset_type.
3006  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3007  Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3008  nc_row_map_type,
3009  input_view_type>::select (ptr_rot, ptr_decoy),
3010  ptr_in);
3011  }
3012  else { // size_t != row_offset_type
3013  // CudaUvmSpace != HostSpace, so this will be false in that case.
3014  constexpr bool inHostMemory =
3015  std::is_same<typename row_map_type::memory_space,
3016  Kokkos::HostSpace>::value;
3017  if (inHostMemory) {
3018  // Copy (with cast from size_t to row_offset_type, with bounds
3019  // checking if necessary) to ptr_rot.
3020  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
3021  }
3022  else { // Copy input row offsets to device first.
3023  //
3024  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3025  // execution space would avoid the double copy.
3026  //
3027  View<size_t*, layout_type ,execution_space > ptr_st ("Tpetra::CrsGraph::ptr", size);
3028  Kokkos::deep_copy (ptr_st, ptr_in);
3029  // Copy on device (casting from size_t to row_offset_type,
3030  // with bounds checking if necessary) to ptr_rot. This
3031  // executes in the output View's execution space, which is the
3032  // same as execution_space.
3033  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
3034  }
3035  }
3036 
3037  Kokkos::View<LocalOrdinal*, layout_type , execution_space > k_ind =
3038  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3039  setAllIndices (ptr_rot, k_ind);
3040  }
3041 
3042 
3043  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3044  void
3046  getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3047  size_t& boundForAllLocalRows,
3048  bool& boundSameForAllLocalRows) const
3049  {
3050  const char tfecfFuncName[] = "getNumEntriesPerLocalRowUpperBound: ";
3051  const char suffix[] = " Please report this bug to the Tpetra developers.";
3052 
3053  // The three output arguments. We assign them to the actual
3054  // output arguments at the end, in order to implement
3055  // transactional semantics.
3056  Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3057  size_t numEntriesForAll = 0;
3058  bool allRowsSame = true;
3059 
3060  const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3061 
3062  if (this->indicesAreAllocated ()) {
3063  if (this->isStorageOptimized ()) {
3064  // left with the case that we have optimized storage. in this
3065  // case, we have to construct a list of row sizes.
3066  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3067  (numRows != 0 && k_rowPtrs_.extent (0) == 0, std::logic_error,
3068  "The graph has " << numRows << " (> 0) row"
3069  << (numRows != 1 ? "s" : "") << " on the calling process, "
3070  "but the k_rowPtrs_ array has zero entries." << suffix);
3071  Teuchos::ArrayRCP<size_t> numEnt;
3072  if (numRows != 0) {
3073  numEnt = Teuchos::arcp<size_t> (numRows);
3074  }
3075 
3076  // We have to iterate through the row offsets anyway, so we
3077  // might as well check whether all rows' bounds are the same.
3078  bool allRowsReallySame = false;
3079  for (ptrdiff_t i = 0; i < numRows; ++i) {
3080  numEnt[i] = this->k_rowPtrs_(i+1) - this->k_rowPtrs_(i);
3081  if (i != 0 && numEnt[i] != numEnt[i-1]) {
3082  allRowsReallySame = false;
3083  }
3084  }
3085  if (allRowsReallySame) {
3086  if (numRows == 0) {
3087  numEntriesForAll = 0;
3088  } else {
3089  numEntriesForAll = numEnt[1] - numEnt[0];
3090  }
3091  allRowsSame = true;
3092  }
3093  else {
3094  numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3095  allRowsSame = false; // conservatively; we don't check the array
3096  }
3097  }
3098  else if (k_numRowEntries_.extent (0) != 0) {
3099  // This is a shallow copy; the ArrayRCP wraps the View in a
3100  // custom destructor, which ensures correct deallocation if
3101  // that is the only reference to the View. Furthermore, this
3102  // View is a host View, so this doesn't assume UVM.
3103  numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_);
3104  allRowsSame = false; // conservatively; we don't check the array
3105  }
3106  else {
3107  numEntriesForAll = 0;
3108  allRowsSame = true;
3109  }
3110  }
3111  else { // indices not allocated
3112  if (k_numAllocPerRow_.extent (0) != 0) {
3113  // This is a shallow copy; the ArrayRCP wraps the View in a
3114  // custom destructor, which ensures correct deallocation if
3115  // that is the only reference to the View. Furthermore, this
3116  // View is a host View, so this doesn't assume UVM.
3117  numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_);
3118  allRowsSame = false; // conservatively; we don't check the array
3119  }
3120  else {
3121  numEntriesForAll = numAllocForAllRows_;
3122  allRowsSame = true;
3123  }
3124  }
3125 
3126  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3127  (numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3128  "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3129  "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3130  "size " << numEntriesPerRow.size () << "." << suffix);
3131  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3132  (numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3133  "numEntriesForAll and allRowsSame are not consistent. The former "
3134  "is nonzero (" << numEntriesForAll << "), but the latter is false."
3135  << suffix);
3136  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3137  (numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3138  "numEntriesPerRow and allRowsSame are not consistent. The former has "
3139  "nonzero length " << numEntriesForAll << ", but the latter is true."
3140  << suffix);
3141 
3142  boundPerLocalRow = numEntriesPerRow;
3143  boundForAllLocalRows = numEntriesForAll;
3144  boundSameForAllLocalRows = allRowsSame;
3145  }
3146 
3147 
3148  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3149  void
3152  {
3153  using Teuchos::Comm;
3154  using Teuchos::outArg;
3155  using Teuchos::RCP;
3156  using Teuchos::rcp;
3157  using Teuchos::REDUCE_MAX;
3158  using Teuchos::REDUCE_MIN;
3159  using Teuchos::reduceAll;
3160  using std::endl;
3161  using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
3162  using LO = local_ordinal_type;
3163  using GO = global_ordinal_type;
3164  using size_type = typename Teuchos::Array<GO>::size_type;
3165  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3166 
3167  std::unique_ptr<std::string> prefix;
3168  if (verbose_) {
3169  prefix = this->createPrefix("CrsGraph", "globalAssemble");
3170  std::ostringstream os;
3171  os << *prefix << "Start" << endl;
3172  std::cerr << os.str();
3173  }
3174  RCP<const Comm<int> > comm = getComm ();
3175 
3176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3177  (! isFillActive (), std::runtime_error, "Fill must be active before "
3178  "you may call this method.");
3179 
3180  const size_t myNumNonlocalRows = this->nonlocals_.size ();
3181 
3182  // If no processes have nonlocal rows, then we don't have to do
3183  // anything. Checking this is probably cheaper than constructing
3184  // the Map of nonlocal rows (see below) and noticing that it has
3185  // zero global entries.
3186  {
3187  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3188  int someoneHasNonlocalRows = 0;
3189  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3190  outArg (someoneHasNonlocalRows));
3191  if (someoneHasNonlocalRows == 0) {
3192  if (verbose_) {
3193  std::ostringstream os;
3194  os << *prefix << "Done: No nonlocal rows" << endl;
3195  std::cerr << os.str();
3196  }
3197  return;
3198  }
3199  else if (verbose_) {
3200  std::ostringstream os;
3201  os << *prefix << "At least 1 process has nonlocal rows"
3202  << endl;
3203  std::cerr << os.str();
3204  }
3205  }
3206 
3207  // 1. Create a list of the "nonlocal" rows on each process. this
3208  // requires iterating over nonlocals_, so while we do this,
3209  // deduplicate the entries and get a count for each nonlocal
3210  // row on this process.
3211  // 2. Construct a new row Map corresponding to those rows. This
3212  // Map is likely overlapping. We know that the Map is not
3213  // empty on all processes, because the above all-reduce and
3214  // return exclude that case.
3215 
3216  RCP<const map_type> nonlocalRowMap;
3217  // Keep this for CrsGraph's constructor.
3218  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3219  {
3220  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3221  size_type curPos = 0;
3222  for (auto mapIter = this->nonlocals_.begin ();
3223  mapIter != this->nonlocals_.end ();
3224  ++mapIter, ++curPos) {
3225  myNonlocalGblRows[curPos] = mapIter->first;
3226  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
3227  std::sort (gblCols.begin (), gblCols.end ());
3228  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
3229  gblCols.erase (vecLast, gblCols.end ());
3230  numEntPerNonlocalRow[curPos] = gblCols.size ();
3231  }
3232 
3233  // Currently, Map requires that its indexBase be the global min
3234  // of all its global indices. Map won't compute this for us, so
3235  // we must do it. If our process has no nonlocal rows, set the
3236  // "min" to the max possible GO value. This ensures that if
3237  // some process has at least one nonlocal row, then it will pick
3238  // that up as the min. We know that at least one process has a
3239  // nonlocal row, since the all-reduce and return at the top of
3240  // this method excluded that case.
3241  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3242  {
3243  auto iter = std::min_element (myNonlocalGblRows.begin (),
3244  myNonlocalGblRows.end ());
3245  if (iter != myNonlocalGblRows.end ()) {
3246  myMinNonlocalGblRow = *iter;
3247  }
3248  }
3249  GO gblMinNonlocalGblRow = 0;
3250  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3251  outArg (gblMinNonlocalGblRow));
3252  const GO indexBase = gblMinNonlocalGblRow;
3253  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3254  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3255  }
3256 
3257  if (verbose_) {
3258  std::ostringstream os;
3259  os << *prefix << "nonlocalRowMap->getIndexBase()="
3260  << nonlocalRowMap->getIndexBase() << endl;
3261  std::cerr << os.str();
3262  }
3263 
3264  // 3. Use the column indices for each nonlocal row, as stored in
3265  // nonlocals_, to construct a CrsGraph corresponding to
3266  // nonlocal rows. We need, but we have, exact counts of the
3267  // number of entries in each nonlocal row.
3268 
3269  RCP<crs_graph_type> nonlocalGraph =
3270  rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow(),
3271  StaticProfile));
3272  {
3273  size_type curPos = 0;
3274  for (auto mapIter = this->nonlocals_.begin ();
3275  mapIter != this->nonlocals_.end ();
3276  ++mapIter, ++curPos) {
3277  const GO gblRow = mapIter->first;
3278  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3279  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3280  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3281  }
3282  }
3283  if (verbose_) {
3284  std::ostringstream os;
3285  os << *prefix << "Built nonlocal graph" << endl;
3286  std::cerr << os.str();
3287  }
3288  // There's no need to fill-complete the nonlocals graph.
3289  // We just use it as a temporary container for the Export.
3290 
3291  // 4. If the original row Map is one to one, then we can Export
3292  // directly from nonlocalGraph into this. Otherwise, we have
3293  // to create a temporary graph with a one-to-one row Map,
3294  // Export into that, then Import from the temporary graph into
3295  // *this.
3296 
3297  auto origRowMap = this->getRowMap ();
3298  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3299 
3300  if (origRowMapIsOneToOne) {
3301  if (verbose_) {
3302  std::ostringstream os;
3303  os << *prefix << "Original row Map is 1-to-1" << endl;
3304  std::cerr << os.str();
3305  }
3306  export_type exportToOrig (nonlocalRowMap, origRowMap);
3307  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3308  // We're done at this point!
3309  }
3310  else {
3311  if (verbose_) {
3312  std::ostringstream os;
3313  os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3314  std::cerr << os.str();
3315  }
3316  // If you ask a Map whether it is one to one, it does some
3317  // communication and stashes intermediate results for later use
3318  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3319  // much more then the original cost of calling isOneToOne.
3320  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3321  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3322 
3323  // Create a temporary graph with the one-to-one row Map.
3324  //
3325  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3326  // row, to avoid reallocation during the Export operation.
3327  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3328 
3329  // Export from graph of nonlocals into the temp one-to-one graph.
3330  if (verbose_) {
3331  std::ostringstream os;
3332  os << *prefix << "Export nonlocal graph" << endl;
3333  std::cerr << os.str();
3334  }
3335  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3336 
3337  // We don't need the graph of nonlocals anymore, so get rid of
3338  // it, to keep the memory high-water mark down.
3339  nonlocalGraph = Teuchos::null;
3340 
3341  // Import from the one-to-one graph to the original graph.
3342  import_type importToOrig (oneToOneRowMap, origRowMap);
3343  if (verbose_) {
3344  std::ostringstream os;
3345  os << *prefix << "Import nonlocal graph" << endl;
3346  std::cerr << os.str();
3347  }
3348  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3349  }
3350 
3351  // It's safe now to clear out nonlocals_, since we've already
3352  // committed side effects to *this. The standard idiom for
3353  // clearing a Container like std::map, is to swap it with an empty
3354  // Container and let the swapped Container fall out of scope.
3355  decltype (this->nonlocals_) newNonlocals;
3356  std::swap (this->nonlocals_, newNonlocals);
3357 
3358  checkInternalState ();
3359  if (verbose_) {
3360  std::ostringstream os;
3361  os << *prefix << "Done" << endl;
3362  std::cerr << os.str();
3363  }
3364  }
3365 
3366 
3367  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3368  void
3370  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3371  {
3372  clearGlobalConstants();
3373  if (params != Teuchos::null) this->setParameterList (params);
3374  // either still sorted/merged or initially sorted/merged
3375  indicesAreSorted_ = true;
3376  noRedundancies_ = true;
3377  fillComplete_ = false;
3378  }
3379 
3380 
3381  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3382  void
3384  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3385  {
3386  // If the graph already has domain and range Maps, don't clobber
3387  // them. If it doesn't, use the current row Map for both the
3388  // domain and range Maps.
3389  //
3390  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3391  // column Map, and column indices are inserted which are not in
3392  // the row Map on any process, this will cause troubles. However,
3393  // that is not a common case for most applications that we
3394  // encounter, and checking for it might require more
3395  // communication.
3396  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3397  if (domMap.is_null ()) {
3398  domMap = this->getRowMap ();
3399  }
3400  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3401  if (ranMap.is_null ()) {
3402  ranMap = this->getRowMap ();
3403  }
3404  this->fillComplete (domMap, ranMap, params);
3405  }
3406 
3407 
3408  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3409  void
3411  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3412  const Teuchos::RCP<const map_type>& rangeMap,
3413  const Teuchos::RCP<Teuchos::ParameterList>& params)
3414  {
3415  using std::endl;
3416  const char tfecfFuncName[] = "fillComplete: ";
3417  const bool verbose = verbose_;
3418 
3419  std::unique_ptr<std::string> prefix;
3420  if (verbose) {
3421  prefix = this->createPrefix("CrsGraph", "fillComplete");
3422  std::ostringstream os;
3423  os << *prefix << "Start" << endl;
3424  std::cerr << os.str();
3425  }
3426 
3427  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3428  (! isFillActive () || isFillComplete (), std::runtime_error,
3429  "Graph fill state must be active (isFillActive() "
3430  "must be true) before calling fillComplete().");
3431 
3432  const int numProcs = getComm ()->getSize ();
3433 
3434  //
3435  // Read and set parameters
3436  //
3437 
3438  // Does the caller want to sort remote GIDs (within those owned by
3439  // the same process) in makeColMap()?
3440  if (! params.is_null ()) {
3441  if (params->isParameter ("sort column map ghost gids")) {
3442  sortGhostsAssociatedWithEachProcessor_ =
3443  params->get<bool> ("sort column map ghost gids",
3444  sortGhostsAssociatedWithEachProcessor_);
3445  }
3446  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3447  sortGhostsAssociatedWithEachProcessor_ =
3448  params->get<bool> ("Sort column Map ghost GIDs",
3449  sortGhostsAssociatedWithEachProcessor_);
3450  }
3451  }
3452 
3453  // If true, the caller promises that no process did nonlocal
3454  // changes since the last call to fillComplete.
3455  bool assertNoNonlocalInserts = false;
3456  if (! params.is_null ()) {
3457  assertNoNonlocalInserts =
3458  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3459  }
3460 
3461  //
3462  // Allocate indices, if they haven't already been allocated
3463  //
3464  if (! indicesAreAllocated ()) {
3465  if (hasColMap ()) {
3466  // We have a column Map, so use local indices.
3467  allocateIndices (LocalIndices, verbose);
3468  } else {
3469  // We don't have a column Map, so use global indices.
3470  allocateIndices (GlobalIndices, verbose);
3471  }
3472  }
3473 
3474  //
3475  // Do global assembly, if requested and if the communicator
3476  // contains more than one process.
3477  //
3478  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3479  if (mayNeedGlobalAssemble) {
3480  // This first checks if we need to do global assembly.
3481  // The check costs a single all-reduce.
3482  globalAssemble ();
3483  }
3484  else {
3485  const size_t numNonlocals = nonlocals_.size();
3486  if (verbose) {
3487  std::ostringstream os;
3488  os << *prefix << "Do not need to call globalAssemble; "
3489  "assertNoNonlocalInserts="
3490  << (assertNoNonlocalInserts ? "true" : "false")
3491  << "numProcs=" << numProcs
3492  << ", nonlocals_.size()=" << numNonlocals << endl;
3493  std::cerr << os.str();
3494  }
3495  const int lclNeededGlobalAssemble =
3496  (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3497  if (lclNeededGlobalAssemble != 0 && verbose) {
3498  std::ostringstream os;
3499  os << *prefix;
3500  Details::Impl::verbosePrintMap(
3501  os, nonlocals_.begin(), nonlocals_.end(),
3502  nonlocals_.size(), "nonlocals_");
3503  std::cerr << os.str() << endl;
3504  }
3505 
3506  if (debug_) {
3507  auto map = this->getMap();
3508  auto comm = map.is_null() ? Teuchos::null : map->getComm();
3509  int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3510  if (! comm.is_null()) {
3511  using Teuchos::REDUCE_MAX;
3512  using Teuchos::reduceAll;
3513  reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3514  Teuchos::outArg(gblNeededGlobalAssemble));
3515  }
3516  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3517  (gblNeededGlobalAssemble != 0, std::runtime_error,
3518  "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3519  "least one process in the CrsGraph's communicator. This "
3520  "means either that you incorrectly set the "
3521  "\"No Nonlocal Changes\" fillComplete parameter to true, "
3522  "or that you inserted invalid entries. "
3523  "Rerun with the environment variable TPETRA_VERBOSE="
3524  "CrsGraph set to see the entries of nonlocals_ on every "
3525  "MPI process (WARNING: lots of output).");
3526  }
3527  else {
3528  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3529  (lclNeededGlobalAssemble != 0, std::runtime_error,
3530  "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3531  "calling process. This means either that you incorrectly "
3532  "set the \"No Nonlocal Changes\" fillComplete parameter "
3533  "to true, or that you inserted invalid entries. "
3534  "Rerun with the environment "
3535  "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3536  "of nonlocals_ on every MPI process (WARNING: lots of "
3537  "output).");
3538  }
3539  }
3540 
3541  // Set domain and range Map. This may clear the Import / Export
3542  // objects if the new Maps differ from any old ones.
3543  setDomainRangeMaps (domainMap, rangeMap);
3544 
3545  // If the graph does not already have a column Map (either from
3546  // the user constructor calling the version of the constructor
3547  // that takes a column Map, or from a previous fillComplete call),
3548  // then create it.
3549  Teuchos::Array<int> remotePIDs (0);
3550  const bool mustBuildColMap = ! this->hasColMap ();
3551  if (mustBuildColMap) {
3552  this->makeColMap (remotePIDs); // resized on output
3553  }
3554 
3555  // Make indices local, if they aren't already.
3556  // The method doesn't do any work if the indices are already local.
3557  const std::pair<size_t, std::string> makeIndicesLocalResult =
3558  this->makeIndicesLocal(verbose);
3559  if (debug_) {
3560  using Details::gathervPrint;
3561  using Teuchos::RCP;
3562  using Teuchos::REDUCE_MIN;
3563  using Teuchos::reduceAll;
3564  using Teuchos::outArg;
3565 
3566  RCP<const map_type> map = this->getMap ();
3567  RCP<const Teuchos::Comm<int> > comm;
3568  if (! map.is_null ()) {
3569  comm = map->getComm ();
3570  }
3571  if (comm.is_null ()) {
3572  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3573  (makeIndicesLocalResult.first != 0, std::runtime_error,
3574  makeIndicesLocalResult.second);
3575  }
3576  else {
3577  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3578  int gblSuccess = 0; // output argument
3579  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3580  if (gblSuccess != 1) {
3581  std::ostringstream os;
3582  gathervPrint (os, makeIndicesLocalResult.second, *comm);
3583  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3584  (true, std::runtime_error, os.str ());
3585  }
3586  }
3587  }
3588  else {
3589  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3590  // the error state to makeImportExport or
3591  // computeGlobalConstants, which may do all-reduces and thus may
3592  // have the opportunity to communicate that error state.
3593  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3594  (makeIndicesLocalResult.first != 0, std::runtime_error,
3595  makeIndicesLocalResult.second);
3596  }
3597 
3598  // If this process has no indices, then CrsGraph considers it
3599  // already trivially sorted and merged. Thus, this method need
3600  // not be called on all processes in the row Map's communicator.
3601  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3602 
3603  // Make Import and Export objects, if they haven't been made
3604  // already. If we made a column Map above, reuse information from
3605  // that process to avoid communiation in the Import setup.
3606  this->makeImportExport (remotePIDs, mustBuildColMap);
3607 
3608  // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3609  this->fillLocalGraph (params);
3610 
3611  const bool callComputeGlobalConstants = params.get () == nullptr ||
3612  params->get ("compute global constants", true);
3613  if (callComputeGlobalConstants) {
3614  this->computeGlobalConstants ();
3615  }
3616  else {
3617  this->computeLocalConstants ();
3618  }
3619  this->fillComplete_ = true;
3620  this->checkInternalState ();
3621 
3622  if (verbose) {
3623  std::ostringstream os;
3624  os << *prefix << "Done" << endl;
3625  std::cerr << os.str();
3626  }
3627  }
3628 
3629 
3630  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3631  void
3633  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3634  const Teuchos::RCP<const map_type>& rangeMap,
3635  const Teuchos::RCP<const import_type>& importer,
3636  const Teuchos::RCP<const export_type>& exporter,
3637  const Teuchos::RCP<Teuchos::ParameterList>& params)
3638  {
3639  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3640 #ifdef HAVE_TPETRA_MMM_TIMINGS
3641  std::string label;
3642  if(!params.is_null())
3643  label = params->get("Timer Label",label);
3644  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3645  using Teuchos::TimeMonitor;
3646  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3647 #endif
3648 
3649 
3650  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3651  domainMap.is_null () || rangeMap.is_null (),
3652  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3653  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3654  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3655  "call this method unless the graph has a column Map.");
3656  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3657  getNodeNumRows () > 0 && k_rowPtrs_.extent (0) == 0,
3658  std::runtime_error, "The calling process has getNodeNumRows() = "
3659  << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
3660  "been set.");
3661  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3662  static_cast<size_t> (k_rowPtrs_.extent (0)) != getNodeNumRows () + 1,
3663  std::runtime_error, "The row offsets array has length " <<
3664  k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = " <<
3665  (getNodeNumRows () + 1) << ".");
3666 
3667  // Note: We don't need to do the following things which are normally done in fillComplete:
3668  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3669 
3670  // Constants from allocateIndices
3671  //
3672  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3673  // away once the graph is allocated. expertStaticFillComplete
3674  // either presumes that the graph is allocated, or "allocates" it.
3675  //
3676  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3677  // version of CrsGraph is to allocate in the constructor, not
3678  // lazily on first insert. That will make both
3679  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3680  numAllocForAllRows_ = 0;
3681  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3682  indicesAreAllocated_ = true;
3683 
3684  // Constants from makeIndicesLocal
3685  //
3686  // The graph has a column Map, so its indices had better be local.
3687  indicesAreLocal_ = true;
3688  indicesAreGlobal_ = false;
3689 
3690  // set domain/range map: may clear the import/export objects
3691 #ifdef HAVE_TPETRA_MMM_TIMINGS
3692  MM = Teuchos::null;
3693  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3694 #endif
3695  setDomainRangeMaps (domainMap, rangeMap);
3696 
3697  // Presume the user sorted and merged the arrays first
3698  indicesAreSorted_ = true;
3699  noRedundancies_ = true;
3700 
3701  // makeImportExport won't create a new importer/exporter if I set one here first.
3702 #ifdef HAVE_TPETRA_MMM_TIMINGS
3703  MM = Teuchos::null;
3704  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3705 #endif
3706 
3707  importer_ = Teuchos::null;
3708  exporter_ = Teuchos::null;
3709  if (importer != Teuchos::null) {
3710  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3711  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3712  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3713  std::invalid_argument,": importer does not match matrix maps.");
3714  importer_ = importer;
3715 
3716  }
3717 
3718 #ifdef HAVE_TPETRA_MMM_TIMINGS
3719  MM = Teuchos::null;
3720  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3721 #endif
3722 
3723  if (exporter != Teuchos::null) {
3724  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3725  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3726  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3727  std::invalid_argument,": exporter does not match matrix maps.");
3728  exporter_ = exporter;
3729  }
3730 
3731 #ifdef HAVE_TPETRA_MMM_TIMINGS
3732  MM = Teuchos::null;
3733  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3734 #endif
3735  Teuchos::Array<int> remotePIDs (0); // unused output argument
3736  this->makeImportExport (remotePIDs, false);
3737 
3738 #ifdef HAVE_TPETRA_MMM_TIMINGS
3739  MM = Teuchos::null;
3740  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3741 #endif
3742  this->fillLocalGraph (params);
3743 
3744  const bool callComputeGlobalConstants = params.get () == nullptr ||
3745  params->get ("compute global constants", true);
3746 
3747  if (callComputeGlobalConstants) {
3748 #ifdef HAVE_TPETRA_MMM_TIMINGS
3749  MM = Teuchos::null;
3750  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3751 #endif // HAVE_TPETRA_MMM_TIMINGS
3752  this->computeGlobalConstants ();
3753  }
3754  else {
3755 #ifdef HAVE_TPETRA_MMM_TIMINGS
3756  MM = Teuchos::null;
3757  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3758 #endif // HAVE_TPETRA_MMM_TIMINGS
3759  this->computeLocalConstants ();
3760  }
3761 
3762  fillComplete_ = true;
3763 
3764 #ifdef HAVE_TPETRA_MMM_TIMINGS
3765  MM = Teuchos::null;
3766  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3767 #endif
3768  checkInternalState ();
3769  }
3770 
3771 
3772  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3773  void
3775  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3776  {
3778  typedef decltype (k_numRowEntries_) row_entries_type;
3779  typedef typename local_graph_type::row_map_type row_map_type;
3780  typedef typename row_map_type::non_const_type non_const_row_map_type;
3781  typedef typename local_graph_type::entries_type::non_const_type lclinds_1d_type;
3782  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3783  "expertStaticFillComplete): ";
3784  const size_t lclNumRows = this->getNodeNumRows ();
3785 
3786  // This method's goal is to fill in the two arrays (compressed
3787  // sparse row format) that define the sparse graph's structure.
3788  //
3789  // Use the nonconst version of row_map_type for ptr_d, because
3790  // the latter is const and we need to modify ptr_d here.
3791  non_const_row_map_type ptr_d;
3792  row_map_type ptr_d_const;
3793  lclinds_1d_type ind_d;
3794 
3795  bool requestOptimizedStorage = true;
3796  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3797  requestOptimizedStorage = false;
3798  }
3799 
3800  // The graph's column indices are currently stored in a 1-D
3801  // format, with row offsets in k_rowPtrs_ and local column indices
3802  // in k_lclInds1D_.
3803 
3804  if (debug_) {
3805  // The graph's array of row offsets must already be allocated.
3806  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3807  (k_rowPtrs_.extent (0) == 0, std::logic_error,
3808  "k_rowPtrs_ has size zero, but shouldn't");
3809  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3810  (k_rowPtrs_.extent (0) != lclNumRows + 1, std::logic_error,
3811  "k_rowPtrs_.extent(0) = "
3812  << k_rowPtrs_.extent (0) << " != (lclNumRows + 1) = "
3813  << (lclNumRows + 1) << ".");
3814  const size_t numOffsets = k_rowPtrs_.extent (0);
3815  const auto valToCheck =
3816  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
3817  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3818  (numOffsets != 0 &&
3819  k_lclInds1D_.extent (0) != valToCheck,
3820  std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3821  " and k_lclInds1D_.extent(0)=" << k_lclInds1D_.extent(0)
3822  << " != k_rowPtrs_(" << numOffsets << ")=" << valToCheck
3823  << ".");
3824  }
3825 
3826  size_t allocSize = 0;
3827  try {
3828  allocSize = this->getNodeAllocationSize ();
3829  }
3830  catch (std::logic_error& e) {
3831  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3832  (true, std::logic_error, "getNodeAllocationSize threw "
3833  "std::logic_error: " << e.what ());
3834  }
3835  catch (std::runtime_error& e) {
3836  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3837  (true, std::runtime_error, "getNodeAllocationSize threw "
3838  "std::runtime_error: " << e.what ());
3839  }
3840  catch (std::exception& e) {
3841  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3842  (true, std::runtime_error, "getNodeAllocationSize threw "
3843  "std::exception: " << e.what ());
3844  }
3845  catch (...) {
3846  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3847  (true, std::runtime_error, "getNodeAllocationSize threw "
3848  "an exception not a subclass of std::exception.");
3849  }
3850 
3851  if (this->getNodeNumEntries () != allocSize) {
3852  // The graph's current 1-D storage is "unpacked." This means
3853  // the row offsets may differ from what the final row offsets
3854  // should be. This could happen, for example, if the user set
3855  // an upper bound on the number of entries in each row, but
3856  // didn't fill all those entries.
3857 
3858  if (debug_) {
3859  if (k_rowPtrs_.extent (0) != 0) {
3860  const size_t numOffsets =
3861  static_cast<size_t> (k_rowPtrs_.extent (0));
3862  const auto valToCheck =
3863  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
3864  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3865  (valToCheck != size_t(k_lclInds1D_.extent(0)),
3866  std::logic_error, "(Unpacked branch) Before allocating "
3867  "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3868  << valToCheck << " != k_lclInds1D_.extent(0)="
3869  << k_lclInds1D_.extent (0) << ".");
3870  }
3871  }
3872 
3873  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3874  // array of valid entry counts per row (k_numRowEntries_).
3875 
3876  // Total number of entries in the matrix on the calling
3877  // process. We will compute this in the loop below. It's
3878  // cheap to compute and useful as a sanity check.
3879  size_t lclTotalNumEntries = 0;
3880  {
3881  // Allocate the packed row offsets array.
3882  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3883  ptr_d_const = ptr_d;
3884 
3885  // It's ok that k_numRowEntries_ is a host View; the
3886  // function can handle this.
3887  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3888  if (debug_) {
3889  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3890  (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3891  std::logic_error, "(Unpacked branch) "
3892  "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3893  << " != getNodeNumRows()=" << lclNumRows << "");
3894  }
3895 
3896  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3897 
3898  if (debug_) {
3899  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3900  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3901  std::logic_error, "(Unpacked branch) After allocating "
3902  "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3903  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3904  const auto valToCheck =
3905  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3906  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3907  (valToCheck != lclTotalNumEntries, std::logic_error,
3908  "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3909  "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3910  << ") = " << valToCheck << " != total number of entries "
3911  "on the calling process = " << lclTotalNumEntries
3912  << ".");
3913  }
3914  }
3915 
3916  // Allocate the array of packed column indices.
3917  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
3918 
3919  // k_rowPtrs_ and k_lclInds1D_ are currently unpacked. Pack
3920  // them, using the packed row offsets array ptr_d that we
3921  // created above.
3922  //
3923  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3924  // CrsMatrix?), we need to keep around the unpacked row
3925  // offsets and column indices.
3926 
3927  // Pack the column indices from unpacked k_lclInds1D_ into
3928  // packed ind_d. We will replace k_lclInds1D_ below.
3929  typedef pack_functor<
3930  typename local_graph_type::entries_type::non_const_type,
3931  row_map_type> inds_packer_type;
3932  inds_packer_type f (ind_d, k_lclInds1D_, ptr_d, k_rowPtrs_);
3933  {
3934  typedef typename decltype (ind_d)::execution_space exec_space;
3935  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3936  Kokkos::parallel_for (range_type (0, lclNumRows), f);
3937  }
3938 
3939  if (debug_) {
3940  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3941  (ptr_d.extent (0) == 0, std::logic_error,
3942  "(\"Optimize Storage\"=true branch) After packing, "
3943  "ptr_d.extent(0)=0. This probably means k_rowPtrs_ was "
3944  "never allocated.");
3945  if (ptr_d.extent (0) != 0) {
3946  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
3947  const auto valToCheck =
3948  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
3949  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3950  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3951  std::logic_error, "(\"Optimize Storage\"=true branch) "
3952  "After packing, ptr_d(" << (numOffsets-1) << ")="
3953  << valToCheck << " != ind_d.extent(0)="
3954  << ind_d.extent(0) << ".");
3955  }
3956  }
3957  }
3958  else { // We don't have to pack, so just set the pointers.
3959  ptr_d_const = k_rowPtrs_;
3960  ind_d = k_lclInds1D_;
3961 
3962  if (debug_) {
3963  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3964  (ptr_d_const.extent (0) == 0, std::logic_error,
3965  "(\"Optimize Storage\"=false branch) "
3966  "ptr_d_const.extent(0) = 0. This probably means that "
3967  "k_rowPtrs_ was never allocated.");
3968  if (ptr_d_const.extent (0) != 0) {
3969  const size_t numOffsets =
3970  static_cast<size_t> (ptr_d_const.extent (0));
3971  const size_t valToCheck =
3972  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
3973  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3974  (valToCheck != size_t(ind_d.extent (0)),
3975  std::logic_error, "(\"Optimize Storage\"=false branch) "
3976  "ptr_d_const(" << (numOffsets-1) << ")=" << valToCheck
3977  << " != ind_d.extent(0)=" << ind_d.extent (0) << ".");
3978  }
3979  }
3980  }
3981 
3982  if (debug_) {
3983  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3984  (static_cast<size_t> (ptr_d_const.extent (0)) != lclNumRows + 1,
3985  std::logic_error, "After packing, ptr_d_const.extent(0) = " <<
3986  ptr_d_const.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
3987  << ".");
3988  if (ptr_d_const.extent (0) != 0) {
3989  const size_t numOffsets = static_cast<size_t> (ptr_d_const.extent (0));
3990  const auto valToCheck =
3991  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
3992  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3993  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3994  std::logic_error, "After packing, ptr_d_const(" << (numOffsets-1)
3995  << ") = " << valToCheck << " != ind_d.extent(0) = "
3996  << ind_d.extent (0) << ".");
3997  }
3998  }
3999 
4000  if (requestOptimizedStorage) {
4001  // With optimized storage, we don't need to store
4002  // the array of row entry counts.
4003 
4004  // Free graph data structures that are only needed for
4005  // unpacked 1-D storage.
4006  k_numRowEntries_ = row_entries_type ();
4007 
4008  // Keep the new 1-D packed allocations.
4009  k_rowPtrs_ = ptr_d_const;
4010  k_lclInds1D_ = ind_d;
4011 
4012  storageStatus_ = Details::STORAGE_1D_PACKED;
4013  }
4014 
4015  // FIXME (mfh 28 Aug 2014) "Local Graph" sublist no longer used.
4016 
4017  // Build the local graph.
4018  lclGraph_ = local_graph_type (ind_d, ptr_d_const);
4019  }
4020 
4021  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4022  void
4023  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4024  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4025  {
4026  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4027  //
4028  // FIXME (mfh 18 Aug 2014) This will break if the calling process
4029  // has no entries, because in that case, currently it is neither
4030  // locally nor globally indexed. This will change once we get rid
4031  // of lazy allocation (so that the constructor allocates indices
4032  // and therefore commits to local vs. global).
4033  const char tfecfFuncName[] = "replaceColMap: ";
4034  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4035  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4036  "Requires matching maps and non-static graph.");
4037  colMap_ = newColMap;
4038  }
4039 
4040  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4041  void
4043  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4044  const Teuchos::RCP<const import_type>& newImport,
4045  const bool sortIndicesInEachRow)
4046  {
4047  using Teuchos::REDUCE_MIN;
4048  using Teuchos::reduceAll;
4049  using Teuchos::RCP;
4050  typedef GlobalOrdinal GO;
4051  typedef LocalOrdinal LO;
4052  typedef typename local_graph_type::entries_type::non_const_type col_inds_type;
4053  const char tfecfFuncName[] = "reindexColumns: ";
4054 
4055  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4056  isFillComplete (), std::runtime_error, "The graph is fill complete "
4057  "(isFillComplete() returns true). You must call resumeFill() before "
4058  "you may call this method.");
4059 
4060  // mfh 19 Aug 2014: This method does NOT redistribute data; it
4061  // doesn't claim to do the work of an Import or Export. This
4062  // means that for all processes, the calling process MUST own all
4063  // column indices, in both the old column Map (if it exists) and
4064  // the new column Map. We check this via an all-reduce.
4065  //
4066  // Some processes may be globally indexed, others may be locally
4067  // indexed, and others (that have no graph entries) may be
4068  // neither. This method will NOT change the graph's current
4069  // state. If it's locally indexed, it will stay that way, and
4070  // vice versa. It would easy to add an option to convert indices
4071  // from global to local, so as to save a global-to-local
4072  // conversion pass. However, we don't do this here. The intended
4073  // typical use case is that the graph already has a column Map and
4074  // is locally indexed, and this is the case for which we optimize.
4075 
4076  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4077 
4078  // Attempt to convert indices to the new column Map's version of
4079  // local. This will fail if on the calling process, the graph has
4080  // indices that are not on that process in the new column Map.
4081  // After the local conversion attempt, we will do an all-reduce to
4082  // see if any processes failed.
4083 
4084  // If this is false, then either the graph contains a column index
4085  // which is invalid in the CURRENT column Map, or the graph is
4086  // locally indexed but currently has no column Map. In either
4087  // case, there is no way to convert the current local indices into
4088  // global indices, so that we can convert them into the new column
4089  // Map's local indices. It's possible for this to be true on some
4090  // processes but not others, due to replaceColMap.
4091  bool allCurColIndsValid = true;
4092  // On the calling process, are all valid current column indices
4093  // also in the new column Map on the calling process? In other
4094  // words, does local reindexing suffice, or should the user have
4095  // done an Import or Export instead?
4096  bool localSuffices = true;
4097 
4098  // Final arrays for the local indices. We will allocate exactly
4099  // one of these ONLY if the graph is locally indexed on the
4100  // calling process, and ONLY if the graph has one or more entries
4101  // (is not empty) on the calling process. In that case, we
4102  // allocate the first (1-D storage) if the graph has a static
4103  // profile, else we allocate the second (2-D storage).
4104  typename local_graph_type::entries_type::non_const_type newLclInds1D;
4105  Teuchos::ArrayRCP<Teuchos::Array<LO> > newLclInds2D;
4106 
4107  // If indices aren't allocated, that means the calling process
4108  // owns no entries in the graph. Thus, there is nothing to
4109  // convert, and it trivially succeeds locally.
4110  if (indicesAreAllocated ()) {
4111  if (isLocallyIndexed ()) {
4112  if (hasColMap ()) { // locally indexed, and currently has a column Map
4113  const map_type& oldColMap = * (getColMap ());
4114  // Allocate storage for the new local indices.
4115  const size_t allocSize = this->getNodeAllocationSize ();
4116  newLclInds1D = col_inds_type ("Tpetra::CrsGraph::ind", allocSize);
4117  // Attempt to convert the new indices locally.
4118  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4119  const RowInfo rowInfo = this->getRowInfo (lclRow);
4120  const size_t beg = rowInfo.offset1D;
4121  const size_t end = beg + rowInfo.numEntries;
4122  for (size_t k = beg; k < end; ++k) {
4123  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4124  // use a DualView instead.
4125  const LO oldLclCol = k_lclInds1D_(k);
4126  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4127  allCurColIndsValid = false;
4128  break; // Stop at the first invalid index
4129  }
4130  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4131 
4132  // The above conversion MUST succeed. Otherwise, the
4133  // current local index is invalid, which means that
4134  // the graph was constructed incorrectly.
4135  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4136  allCurColIndsValid = false;
4137  break; // Stop at the first invalid index
4138  }
4139  else {
4140  const LO newLclCol = newColMap->getLocalElement (gblCol);
4141  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4142  localSuffices = false;
4143  break; // Stop at the first invalid index
4144  }
4145  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4146  // use a DualView instead.
4147  newLclInds1D(k) = newLclCol;
4148  }
4149  } // for each entry in the current row
4150  } // for each locally owned row
4151  }
4152  else { // locally indexed, but no column Map
4153  // This case is only possible if replaceColMap() was called
4154  // with a null argument on the calling process. It's
4155  // possible, but it means that this method can't possibly
4156  // succeed, since we have no way of knowing how to convert
4157  // the current local indices to global indices.
4158  allCurColIndsValid = false;
4159  }
4160  }
4161  else { // globally indexed
4162  // If the graph is globally indexed, we don't need to save
4163  // local indices, but we _do_ need to know whether the current
4164  // global indices are valid in the new column Map. We may
4165  // need to do a getRemoteIndexList call to find this out.
4166  //
4167  // In this case, it doesn't matter whether the graph currently
4168  // has a column Map. We don't need the old column Map to
4169  // convert from global indices to the _new_ column Map's local
4170  // indices. Furthermore, we can use the same code, whether
4171  // the graph is static or dynamic profile.
4172 
4173  // Test whether the current global indices are in the new
4174  // column Map on the calling process.
4175  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4176  const RowInfo rowInfo = this->getRowInfo (lclRow);
4177  Teuchos::ArrayView<const GO> oldGblRowView = getGlobalView (rowInfo);
4178  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4179  const GO gblCol = oldGblRowView[k];
4180  if (! newColMap->isNodeGlobalElement (gblCol)) {
4181  localSuffices = false;
4182  break; // Stop at the first invalid index
4183  }
4184  } // for each entry in the current row
4185  } // for each locally owned row
4186  } // locally or globally indexed
4187  } // whether indices are allocated
4188 
4189  // Do an all-reduce to check both possible error conditions.
4190  int lclSuccess[2];
4191  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4192  lclSuccess[1] = localSuffices ? 1 : 0;
4193  int gblSuccess[2];
4194  gblSuccess[0] = 0;
4195  gblSuccess[1] = 0;
4196  RCP<const Teuchos::Comm<int> > comm =
4197  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4198  if (! comm.is_null ()) {
4199  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4200  }
4201 
4202  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4203  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4204  " The most likely reason is that the graph is locally indexed, but the "
4205  "column Map is missing (null) on some processes, due to a previous call "
4206  "to replaceColMap().");
4207 
4208  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4209  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4210  "contains column indices that are in the old column Map, but not in the "
4211  "new column Map (on that process). This method does NOT redistribute "
4212  "data; it does not claim to do the work of an Import or Export operation."
4213  " This means that for all processess, the calling process MUST own all "
4214  "column indices, in both the old column Map and the new column Map. In "
4215  "this case, you will need to do an Import or Export operation to "
4216  "redistribute data.");
4217 
4218  // Commit the results.
4219  if (isLocallyIndexed ()) {
4220  k_lclInds1D_ = newLclInds1D;
4221  // We've reindexed, so we don't know if the indices are sorted.
4222  //
4223  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4224  // since we're already going through all the indices above. We
4225  // could also sort each row in place; that way, we would only
4226  // have to make one pass over the rows.
4227  indicesAreSorted_ = false;
4228  if (sortIndicesInEachRow) {
4229  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4230  // order to call this method.
4231  //
4232  // FIXME (mfh 17 Sep 2014) This violates the strong exception
4233  // guarantee. It would be better to sort the new index arrays
4234  // before committing them.
4235  const bool sorted = false; // need to resort
4236  const bool merged = true; // no need to merge, since no dups
4237  this->sortAndMergeAllIndices (sorted, merged);
4238  }
4239  }
4240  colMap_ = newColMap;
4241 
4242  if (newImport.is_null ()) {
4243  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4244  // check whether the input Import is null on any process.
4245  //
4246  // If the domain Map hasn't been set yet, we can't compute a new
4247  // Import object. Leave it what it is; it should be null, but
4248  // it doesn't matter. If the domain Map _has_ been set, then
4249  // compute a new Import object if necessary.
4250  if (! domainMap_.is_null ()) {
4251  if (! domainMap_->isSameAs (* newColMap)) {
4252  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4253  } else {
4254  importer_ = Teuchos::null; // don't need an Import
4255  }
4256  }
4257  } else {
4258  // The caller gave us an Import object. Assume that it's valid.
4259  importer_ = newImport;
4260  }
4261  }
4262 
4263 
4264  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4265  void
4267  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4268  const Teuchos::RCP<const import_type>& newImporter)
4269  {
4270  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4271  TEUCHOS_TEST_FOR_EXCEPTION(
4272  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4273  "this method unless the graph already has a column Map.");
4274  TEUCHOS_TEST_FOR_EXCEPTION(
4275  newDomainMap.is_null (), std::invalid_argument,
4276  prefix << "The new domain Map must be nonnull.");
4277 
4278  if (debug_) {
4279  if (newImporter.is_null ()) {
4280  // It's not a good idea to put expensive operations in a macro
4281  // clause, even if they are side effect - free, because macros
4282  // don't promise that they won't evaluate their arguments more
4283  // than once. It's polite for them to do so, but not required.
4284  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4285  TEUCHOS_TEST_FOR_EXCEPTION
4286  (colSameAsDom, std::invalid_argument, "If the new Import is null, "
4287  "then the new domain Map must be the same as the current column Map.");
4288  }
4289  else {
4290  const bool colSameAsTgt =
4291  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4292  const bool newDomSameAsSrc =
4293  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4294  TEUCHOS_TEST_FOR_EXCEPTION
4295  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4296  "new Import is nonnull, then the current column Map must be the same "
4297  "as the new Import's target Map, and the new domain Map must be the "
4298  "same as the new Import's source Map.");
4299  }
4300  }
4301 
4302  domainMap_ = newDomainMap;
4303  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4304  }
4305 
4306  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4310  {
4311  return lclGraph_;
4312  }
4313 
4314  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4315  void
4318  {
4319  using ::Tpetra::Details::ProfilingRegion;
4320  using Teuchos::ArrayView;
4321  using Teuchos::outArg;
4322  using Teuchos::reduceAll;
4323  typedef global_size_t GST;
4324 
4325  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4326 
4327  this->computeLocalConstants ();
4328 
4329  // Compute global constants from local constants. Processes that
4330  // already have local constants still participate in the
4331  // all-reduces, using their previously computed values.
4332  if (! this->haveGlobalConstants_) {
4333  const Teuchos::Comm<int>& comm = * (this->getComm ());
4334  // Promote all the nodeNum* and nodeMaxNum* quantities from
4335  // size_t to global_size_t, when doing the all-reduces for
4336  // globalNum* / globalMaxNum* results.
4337  //
4338  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4339  // this in two all-reduces (one for the sum and the other for
4340  // the max), or use a custom MPI_Op that combines the sum and
4341  // the max. The latter might even be slower than two
4342  // all-reduces on modern network hardware. It would also be a
4343  // good idea to use nonblocking all-reduces (MPI 3), so that we
4344  // don't have to wait around for the first one to finish before
4345  // starting the second one.
4346  GST lcl, gbl;
4347  lcl = static_cast<GST> (this->getNodeNumEntries ());
4348 
4349  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4350  this->globalNumEntries_ = gbl;
4351 
4352  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4353  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4354  outArg (this->globalMaxNumRowEntries_));
4355  this->haveGlobalConstants_ = true;
4356  }
4357  }
4358 
4359 
4360  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4361  void
4364  {
4365  using ::Tpetra::Details::ProfilingRegion;
4366 
4367  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4368  if (this->haveLocalConstants_) {
4369  return;
4370  }
4371 
4372  // Reset local properties
4373  this->nodeMaxNumRowEntries_ =
4374  Teuchos::OrdinalTraits<size_t>::invalid();
4375 
4376  using LO = local_ordinal_type;
4377 
4378  // KJ: This one is a bit different from the above. Conservatively thinking,
4379  // we also need the fence here as lclGraph_.row_map is on UVM and it can be
4380  // still updated. In practice, the local graph construction should be done
4381  // before this is called. This routine is computeLocalConstants. If we want
4382  // a better code, we need a flag stating that the local graph is completed
4383  // and safe to use it without fence.
4384  // For now, I recommend to put the fence. Defining the state of local
4385  // object can be improvements in the code.
4386  execution_space().fence ();
4387 
4388  auto ptr = this->lclGraph_.row_map;
4389  const LO lclNumRows = ptr.extent(0) == 0 ?
4390  static_cast<LO> (0) :
4391  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4392 
4393  const LO lclMaxNumRowEnt =
4394  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4395  ptr, lclNumRows);
4396  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4397  this->haveLocalConstants_ = true;
4398  }
4399 
4400 
4401  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4402  std::pair<size_t, std::string>
4404  makeIndicesLocal (const bool verbose)
4405  {
4407  using Teuchos::arcp;
4408  using Teuchos::Array;
4409  using std::endl;
4410  typedef LocalOrdinal LO;
4411  typedef GlobalOrdinal GO;
4412  typedef device_type DT;
4413  typedef typename local_graph_type::row_map_type::non_const_value_type offset_type;
4414  typedef decltype (k_numRowEntries_) row_entries_type;
4415  typedef typename row_entries_type::non_const_value_type num_ent_type;
4416  typedef typename local_graph_type::entries_type::non_const_type
4417  lcl_col_inds_type;
4418  typedef Kokkos::View<GO*, typename lcl_col_inds_type::array_layout,
4419  device_type> gbl_col_inds_type;
4420  const char tfecfFuncName[] = "makeIndicesLocal: ";
4421  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4422 
4423  std::unique_ptr<std::string> prefix;
4424  if (verbose) {
4425  prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4426  std::ostringstream os;
4427  os << *prefix << "lclNumRows: " << getNodeNumRows() << endl;
4428  std::cerr << os.str();
4429  }
4430 
4431  // These are somewhat global properties, so it's safe to have
4432  // exception checks for them, rather than returning an error code.
4433  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4434  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4435  "column Map yet. This method should never be called in that case. "
4436  "Please report this bug to the Tpetra developers.");
4437  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4438  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4439  "that it has a column Map, because hasColMap() returns true. However, "
4440  "the result of getColMap() is null. This should never happen. Please "
4441  "report this bug to the Tpetra developers.");
4442 
4443  // Return value 1: The number of column indices (counting
4444  // duplicates) that could not be converted to local indices,
4445  // because they were not in the column Map on the calling process.
4446  size_t lclNumErrs = 0;
4447  std::ostringstream errStrm; // for return value 2 (error string)
4448 
4449  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4450  const map_type& colMap = * (this->getColMap ());
4451 
4452  if (this->isGloballyIndexed () && lclNumRows != 0) {
4453  // This is a host-accessible View.
4454  typename row_entries_type::const_type h_numRowEnt =
4455  this->k_numRowEntries_;
4456 
4457  // Allocate space for local indices.
4458  // If GO and LO are the same size, we can reuse the existing
4459  // array of 1-D index storage to convert column indices from
4460  // GO to LO. Otherwise, we'll just allocate a new buffer.
4461  constexpr bool LO_GO_same = std::is_same<LO, GO>::value;
4462  if (LO_GO_same) {
4463  // This prevents a build error (illegal assignment) if
4464  // LO_GO_same is _not_ true. Only the first branch
4465  // (returning k_gblInds1D_) should ever get taken.
4466  k_lclInds1D_ = Kokkos::Impl::if_c<LO_GO_same,
4468  lcl_col_inds_type>::select (k_gblInds1D_, k_lclInds1D_);
4469  }
4470  else {
4471  if (k_rowPtrs_.extent (0) == 0) {
4472  errStrm << "k_rowPtrs_.extent(0) == 0. This should never "
4473  "happen here. Please report this bug to the Tpetra developers."
4474  << endl;
4475  // Need to return early.
4476  return std::make_pair (Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4477  errStrm.str ());
4478  }
4479  const auto numEnt = ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, lclNumRows);
4480 
4481  // mfh 17 Dec 2016: We don't need initial zero-fill of
4482  // k_lclInds1D_, because we will fill it below anyway.
4483  // AllowPadding would only help for aligned access (e.g.,
4484  // for vectorization) if we also were to pad each row to the
4485  // same alignment, so we'll skip AllowPadding for now.
4486 
4487  // using Kokkos::AllowPadding;
4488  using Kokkos::view_alloc;
4489  using Kokkos::WithoutInitializing;
4490 
4491  // When giving the label as an argument to
4492  // Kokkos::view_alloc, the label must be a string and not a
4493  // char*, else the code won't compile. This is because
4494  // view_alloc also allows a raw pointer as its first
4495  // argument. See
4496  // https://github.com/kokkos/kokkos/issues/434. This is a
4497  // large allocation typically, so the overhead of creating
4498  // an std::string is minor.
4499  const std::string label ("Tpetra::CrsGraph::lclind");
4500  if (verbose) {
4501  std::ostringstream os;
4502  os << *prefix << "(Re)allocate k_lclInds1D_: old="
4503  << k_lclInds1D_.extent(0) << ", new=" << numEnt << endl;
4504  std::cerr << os.str();
4505  }
4506  k_lclInds1D_ =
4507  lcl_col_inds_type (view_alloc (label, WithoutInitializing), numEnt);
4508  }
4509 
4510  auto lclColMap = colMap.getLocalMap ();
4511  // This is a "device mirror" of the host View h_numRowEnt.
4512  //
4513  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4514  // Device instance is to use its default constructor. See the
4515  // following Kokkos issue:
4516  //
4517  // https://github.com/kokkos/kokkos/issues/442
4518  if (verbose) {
4519  std::ostringstream os;
4520  os << *prefix << "Allocate device mirror k_numRowEnt: "
4521  << h_numRowEnt.extent(0) << endl;
4522  std::cerr << os.str();
4523  }
4524  auto k_numRowEnt = Kokkos::create_mirror_view (device_type (), h_numRowEnt);
4525 
4527  lclNumErrs =
4528  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (k_lclInds1D_,
4529  k_gblInds1D_,
4530  k_rowPtrs_,
4531  lclColMap,
4532  k_numRowEnt);
4533  if (lclNumErrs != 0) {
4534  const int myRank = [this] () {
4535  auto map = this->getMap ();
4536  if (map.is_null ()) {
4537  return 0;
4538  }
4539  else {
4540  auto comm = map->getComm ();
4541  return comm.is_null () ? 0 : comm->getRank ();
4542  }
4543  } ();
4544  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4545  errStrm << "(Process " << myRank << ") When converting column "
4546  "indices from global to local, we encountered " << lclNumErrs
4547  << " ind" << (pluralNumErrs ? "ices" : "ex")
4548  << " that do" << (pluralNumErrs ? "es" : "")
4549  << " not live in the column Map on this process." << endl;
4550  }
4551 
4552  // We've converted column indices from global to local, so we
4553  // can deallocate the global column indices (which we know are
4554  // in 1-D storage, because the graph has static profile).
4555  if (verbose) {
4556  std::ostringstream os;
4557  os << *prefix << "Free k_gblInds1D_: "
4558  << k_gblInds1D_.extent(0) << endl;
4559  std::cerr << os.str();
4560  }
4561  k_gblInds1D_ = gbl_col_inds_type ();
4562  } // globallyIndexed() && lclNumRows > 0
4563 
4564  this->lclGraph_ = local_graph_type (this->k_lclInds1D_, this->k_rowPtrs_);
4565  this->indicesAreLocal_ = true;
4566  this->indicesAreGlobal_ = false;
4567  this->checkInternalState ();
4568 
4569  return std::make_pair (lclNumErrs, errStrm.str ());
4570  }
4571 
4572  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4573  void
4575  makeColMap (Teuchos::Array<int>& remotePIDs)
4576  {
4578  using std::endl;
4579  const char tfecfFuncName[] = "makeColMap";
4580 
4581  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4582  std::unique_ptr<std::string> prefix;
4583  if (verbose_) {
4584  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4585  std::ostringstream os;
4586  os << *prefix << "Start" << endl;
4587  std::cerr << os.str();
4588  }
4589 
4590  // this->colMap_ should be null at this point, but we accept the
4591  // future possibility that it might not be (esp. if we decide
4592  // later to support graph structure changes after first
4593  // fillComplete, which CrsGraph does not currently (as of 12 Feb
4594  // 2017) support).
4595  Teuchos::RCP<const map_type> colMap = this->colMap_;
4596  const bool sortEachProcsGids =
4597  this->sortGhostsAssociatedWithEachProcessor_;
4598 
4599  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4600  // per-process error code. If an error does occur on a process,
4601  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4602  // notice that error. This is the caller's responsibility. For
4603  // now, we only propagate (to all processes) and report the error
4604  // in debug mode. In the future, we need to add the local/global
4605  // error handling scheme used in BlockCrsMatrix to this class.
4606  if (debug_) {
4607  using Teuchos::outArg;
4608  using Teuchos::REDUCE_MIN;
4609  using Teuchos::reduceAll;
4610 
4611  std::ostringstream errStrm;
4612  const int lclErrCode =
4613  Details::makeColMap (colMap, remotePIDs,
4614  getDomainMap (), *this, sortEachProcsGids, &errStrm);
4615  auto comm = this->getComm ();
4616  if (! comm.is_null ()) {
4617  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4618  int gblSuccess = 0; // output argument
4619  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4620  outArg (gblSuccess));
4621  if (gblSuccess != 1) {
4622  std::ostringstream os;
4623  Details::gathervPrint (os, errStrm.str (), *comm);
4624  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4625  (true, std::runtime_error, ": An error happened on at "
4626  "least one process in the CrsGraph's communicator. "
4627  "Here are all processes' error messages:" << std::endl
4628  << os.str ());
4629  }
4630  }
4631  }
4632  else {
4633  (void) Details::makeColMap (colMap, remotePIDs,
4634  getDomainMap (), *this, sortEachProcsGids, nullptr);
4635  }
4636  // See above. We want to admit the possibility of makeColMap
4637  // actually revising an existing column Map, even though that
4638  // doesn't currently (as of 10 May 2017) happen.
4639  this->colMap_ = colMap;
4640 
4641  checkInternalState ();
4642  if (verbose_) {
4643  std::ostringstream os;
4644  os << *prefix << "Done" << endl;
4645  std::cerr << os.str();
4646  }
4647  }
4648 
4649 
4650  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4651  void
4653  sortAndMergeAllIndices (const bool sorted, const bool merged)
4654  {
4655  using std::endl;
4656  using LO = LocalOrdinal;
4657  using host_execution_space =
4658  typename Kokkos::View<LO*, device_type>::HostMirror::
4659  execution_space;
4660  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4661  const char tfecfFuncName[] = "sortAndMergeAllIndices";
4662  Details::ProfilingRegion regionSortAndMerge
4663  ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4664 
4665  std::unique_ptr<std::string> prefix;
4666  if (verbose_) {
4667  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4668  std::ostringstream os;
4669  os << *prefix << "Start: "
4670  << "sorted=" << (sorted ? "true" : "false")
4671  << ", merged=" << (merged ? "true" : "false") << endl;
4672  std::cerr << os.str();
4673  }
4674  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4675  (this->isGloballyIndexed(), std::logic_error,
4676  "This method may only be called after makeIndicesLocal." );
4677  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4678  (! merged && this->isStorageOptimized(), std::logic_error,
4679  "The graph is already storage optimized, so we shouldn't be "
4680  "merging any indices. "
4681  "Please report this bug to the Tpetra developers.");
4682 
4683  if (! sorted || ! merged) {
4684  const LO lclNumRows(this->getNodeNumRows());
4685  auto range = range_type(0, lclNumRows);
4686 
4687  // FIXME (mfh 08 May 2017) Loops below assume CUDA UVM.
4688  if (verbose_) {
4689  size_t totalNumDups = 0;
4690  Kokkos::parallel_reduce(range,
4691  [this, sorted, merged] (const LO lclRow, size_t& numDups)
4692  {
4693  const RowInfo rowInfo = this->getRowInfo(lclRow);
4694  numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4695  },
4696  totalNumDups);
4697  std::ostringstream os;
4698  os << *prefix << "totalNumDups=" << totalNumDups << endl;
4699  std::cerr << os.str();
4700  }
4701  else {
4702  // FIXME (mfh 08 May 2017) This may assume CUDA UVM.
4703  Kokkos::parallel_for(range,
4704  [this, sorted, merged] (const LO lclRow)
4705  {
4706  const RowInfo rowInfo = this->getRowInfo(lclRow);
4707  this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4708  });
4709  }
4710  this->indicesAreSorted_ = true; // we just sorted every row
4711  this->noRedundancies_ = true; // we just merged every row
4712  }
4713 
4714  if (verbose_) {
4715  std::ostringstream os;
4716  os << *prefix << "Done" << endl;
4717  std::cerr << os.str();
4718  }
4719  }
4720 
4721  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4722  void
4723  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4724  makeImportExport (Teuchos::Array<int>& remotePIDs,
4725  const bool useRemotePIDs)
4726  {
4727  using ::Tpetra::Details::ProfilingRegion;
4728  using Teuchos::ParameterList;
4729  using Teuchos::RCP;
4730  using Teuchos::rcp;
4731  const char tfecfFuncName[] = "makeImportExport: ";
4732  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4733 
4734  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4735  (! this->hasColMap (), std::logic_error,
4736  "This method may not be called unless the graph has a column Map.");
4737  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4738 
4739  // Don't do any checks to see if we need to create the Import, if
4740  // it exists already.
4741  //
4742  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4743  // change CrsGraph in the future to allow changing the column
4744  // Map after fillComplete. For now, the column Map is fixed
4745  // after the first fillComplete call.
4746  if (importer_.is_null ()) {
4747  // Create the Import instance if necessary.
4748  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4749  if (params.is_null () || ! params->isSublist ("Import")) {
4750  if (useRemotePIDs) {
4751  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4752  }
4753  else {
4754  importer_ = rcp (new import_type (domainMap_, colMap_));
4755  }
4756  }
4757  else {
4758  RCP<ParameterList> importSublist = sublist (params, "Import", true);
4759  if (useRemotePIDs) {
4760  RCP<import_type> newImp =
4761  rcp (new import_type (domainMap_, colMap_, remotePIDs,
4762  importSublist));
4763  importer_ = newImp;
4764  }
4765  else {
4766  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4767  }
4768  }
4769  }
4770  }
4771 
4772  // Don't do any checks to see if we need to create the Export, if
4773  // it exists already.
4774  if (exporter_.is_null ()) {
4775  // Create the Export instance if necessary.
4776  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4777  if (params.is_null () || ! params->isSublist ("Export")) {
4778  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4779  }
4780  else {
4781  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4782  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4783  }
4784  }
4785  }
4786  }
4787 
4788 
4789  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4790  std::string
4793  {
4794  std::ostringstream oss;
4795  oss << dist_object_type::description ();
4796  if (isFillComplete ()) {
4797  oss << "{status = fill complete"
4798  << ", global rows = " << getGlobalNumRows()
4799  << ", global cols = " << getGlobalNumCols()
4800  << ", global num entries = " << getGlobalNumEntries()
4801  << "}";
4802  }
4803  else {
4804  oss << "{status = fill not complete"
4805  << ", global rows = " << getGlobalNumRows()
4806  << "}";
4807  }
4808  return oss.str();
4809  }
4810 
4811 
4812  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4813  void
4815  describe (Teuchos::FancyOStream &out,
4816  const Teuchos::EVerbosityLevel verbLevel) const
4817  {
4818  using Teuchos::ArrayView;
4819  using Teuchos::Comm;
4820  using Teuchos::RCP;
4821  using Teuchos::VERB_DEFAULT;
4822  using Teuchos::VERB_NONE;
4823  using Teuchos::VERB_LOW;
4824  using Teuchos::VERB_MEDIUM;
4825  using Teuchos::VERB_HIGH;
4826  using Teuchos::VERB_EXTREME;
4827  using std::endl;
4828  using std::setw;
4829 
4830  Teuchos::EVerbosityLevel vl = verbLevel;
4831  if (vl == VERB_DEFAULT) vl = VERB_LOW;
4832  RCP<const Comm<int> > comm = this->getComm();
4833  const int myImageID = comm->getRank(),
4834  numImages = comm->getSize();
4835  size_t width = 1;
4836  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4837  ++width;
4838  }
4839  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4840  Teuchos::OSTab tab (out);
4841  // none: print nothing
4842  // low: print O(1) info from node 0
4843  // medium: print O(P) info, num entries per node
4844  // high: print O(N) info, num entries per row
4845  // extreme: print O(NNZ) info: print graph indices
4846  //
4847  // for medium and higher, print constituent objects at specified verbLevel
4848  if (vl != VERB_NONE) {
4849  if (myImageID == 0) out << this->description() << std::endl;
4850  // O(1) globals, minus what was already printed by description()
4851  if (isFillComplete() && myImageID == 0) {
4852  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4853  }
4854  // constituent objects
4855  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4856  if (myImageID == 0) out << "\nRow map: " << std::endl;
4857  rowMap_->describe(out,vl);
4858  if (colMap_ != Teuchos::null) {
4859  if (myImageID == 0) out << "\nColumn map: " << std::endl;
4860  colMap_->describe(out,vl);
4861  }
4862  if (domainMap_ != Teuchos::null) {
4863  if (myImageID == 0) out << "\nDomain map: " << std::endl;
4864  domainMap_->describe(out,vl);
4865  }
4866  if (rangeMap_ != Teuchos::null) {
4867  if (myImageID == 0) out << "\nRange map: " << std::endl;
4868  rangeMap_->describe(out,vl);
4869  }
4870  }
4871  // O(P) data
4872  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4873  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4874  if (myImageID == imageCtr) {
4875  out << "Node ID = " << imageCtr << std::endl
4876  << "Node number of entries = " << this->getNodeNumEntries () << std::endl
4877  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4878  if (! indicesAreAllocated ()) {
4879  out << "Indices are not allocated." << std::endl;
4880  }
4881  }
4882  comm->barrier();
4883  comm->barrier();
4884  comm->barrier();
4885  }
4886  }
4887  // O(N) and O(NNZ) data
4888  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4889  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4890  if (myImageID == imageCtr) {
4891  out << std::setw(width) << "Node ID"
4892  << std::setw(width) << "Global Row"
4893  << std::setw(width) << "Num Entries";
4894  if (vl == VERB_EXTREME) {
4895  out << " Entries";
4896  }
4897  out << std::endl;
4898  const LocalOrdinal lclNumRows =
4899  static_cast<LocalOrdinal> (this->getNodeNumRows ());
4900  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
4901  const RowInfo rowinfo = this->getRowInfo (r);
4902  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4903  out << std::setw(width) << myImageID
4904  << std::setw(width) << gid
4905  << std::setw(width) << rowinfo.numEntries;
4906  if (vl == VERB_EXTREME) {
4907  out << " ";
4908  if (isGloballyIndexed()) {
4909  ArrayView<const GlobalOrdinal> rowview = getGlobalView(rowinfo);
4910  for (size_t j=0; j < rowinfo.numEntries; ++j) out << rowview[j] << " ";
4911  }
4912  else if (isLocallyIndexed()) {
4913  ArrayView<const LocalOrdinal> rowview = getLocalView(rowinfo);
4914  for (size_t j=0; j < rowinfo.numEntries; ++j) out << colMap_->getGlobalElement(rowview[j]) << " ";
4915  }
4916  }
4917  out << std::endl;
4918  }
4919  }
4920  comm->barrier();
4921  comm->barrier();
4922  comm->barrier();
4923  }
4924  }
4925  }
4926  }
4927 
4928 
4929  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4930  bool
4932  checkSizes (const SrcDistObject& /* source */)
4933  {
4934  // It's not clear what kind of compatibility checks on sizes can
4935  // be performed here. Epetra_CrsGraph doesn't check any sizes for
4936  // compatibility.
4937  return true;
4938  }
4939 
4940  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4941  void
4944  (const SrcDistObject& source,
4945  const size_t numSameIDs,
4946  const Kokkos::DualView<const local_ordinal_type*,
4947  buffer_device_type>& permuteToLIDs,
4948  const Kokkos::DualView<const local_ordinal_type*,
4949  buffer_device_type>& permuteFromLIDs)
4950  {
4951  using std::endl;
4952  using LO = local_ordinal_type;
4953  using GO = global_ordinal_type;
4954  using this_type = CrsGraph<LO, GO, node_type>;
4955  using row_graph_type = RowGraph<LO, GO, node_type>;
4956  const char tfecfFuncName[] = "copyAndPermute: ";
4957  const bool verbose = verbose_;
4958 
4959  std::unique_ptr<std::string> prefix;
4960  if (verbose) {
4961  prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4962  std::ostringstream os;
4963  os << *prefix << endl;
4964  std::cerr << os.str ();
4965  }
4966 
4967  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4968  (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
4969  std::runtime_error, "permuteToLIDs.extent(0) = "
4970  << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
4971  << permuteFromLIDs.extent (0) << ".");
4972 
4973  // We know from checkSizes that the source object is a
4974  // row_graph_type, so we don't need to check again.
4975  const row_graph_type& srcRowGraph =
4976  dynamic_cast<const row_graph_type&> (source);
4977 
4978  if (verbose) {
4979  std::ostringstream os;
4980  os << *prefix << "Compute padding" << endl;
4981  std::cerr << os.str ();
4982  }
4983  auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4984  permuteToLIDs, permuteFromLIDs, verbose);
4985  applyCrsPadding(*padding, verbose);
4986 
4987  // If the source object is actually a CrsGraph, we can use view
4988  // mode instead of copy mode to access the entries in each row,
4989  // if the graph is not fill complete.
4990  const this_type* srcCrsGraph =
4991  dynamic_cast<const this_type*> (&source);
4992 
4993  const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4994  const map_type& tgtRowMap = *(getRowMap());
4995  const bool src_filled = srcRowGraph.isFillComplete();
4996  Teuchos::Array<GO> row_copy;
4997  LO myid = 0;
4998 
4999  //
5000  // "Copy" part of "copy and permute."
5001  //
5002  if (src_filled || srcCrsGraph == nullptr) {
5003  if (verbose) {
5004  std::ostringstream os;
5005  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
5006  std::cerr << os.str ();
5007  }
5008  // If the source graph is fill complete, we can't use view mode,
5009  // because the data might be stored in a different format not
5010  // compatible with the expectations of view mode. Also, if the
5011  // source graph is not a CrsGraph, we can't use view mode,
5012  // because RowGraph only provides copy mode access to the data.
5013  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5014  const GO gid = srcRowMap.getGlobalElement (myid);
5015  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
5016  row_copy.resize (row_length);
5017  size_t check_row_length = 0;
5018  srcRowGraph.getGlobalRowCopy (gid, row_copy (), check_row_length);
5019  this->insertGlobalIndices (gid, row_copy ());
5020  }
5021  } else {
5022  if (verbose) {
5023  std::ostringstream os;
5024  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
5025  std::cerr << os.str ();
5026  }
5027  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5028  const GO gid = srcRowMap.getGlobalElement (myid);
5029  Teuchos::ArrayView<const GO> row;
5030  srcCrsGraph->getGlobalRowView (gid, row);
5031  this->insertGlobalIndices (gid, row);
5032  }
5033  }
5034 
5035  //
5036  // "Permute" part of "copy and permute."
5037  //
5038  auto permuteToLIDs_h = permuteToLIDs.view_host ();
5039  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5040 
5041  if (src_filled || srcCrsGraph == nullptr) {
5042  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5043  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5044  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5045  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
5046  row_copy.resize (row_length);
5047  size_t check_row_length = 0;
5048  srcRowGraph.getGlobalRowCopy (srcgid, row_copy (), check_row_length);
5049  this->insertGlobalIndices (mygid, row_copy ());
5050  }
5051  } else {
5052  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5053  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5054  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5055  Teuchos::ArrayView<const GO> row;
5056  srcCrsGraph->getGlobalRowView (srcgid, row);
5057  this->insertGlobalIndices (mygid, row);
5058  }
5059  }
5060 
5061  if (verbose) {
5062  std::ostringstream os;
5063  os << *prefix << "Done" << endl;
5064  std::cerr << os.str ();
5065  }
5066  }
5067 
5068  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5069  void
5070  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5071  applyCrsPadding(const padding_type& padding,
5072  const bool verbose)
5073  {
5074  using Details::ProfilingRegion;
5075  using Details::padCrsArrays;
5076  using std::endl;
5077  using LO = local_ordinal_type;
5078  using execution_space = typename device_type::execution_space;
5079  using row_ptrs_type =
5080  typename local_graph_type::row_map_type::non_const_type;
5081  using range_policy =
5082  Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5083  const char tfecfFuncName[] = "applyCrsPadding";
5084  ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
5085 
5086  std::unique_ptr<std::string> prefix;
5087  if (verbose) {
5088  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5089  std::ostringstream os;
5090  os << *prefix << "padding: ";
5091  padding.print(os);
5092  os << endl;
5093  std::cerr << os.str();
5094  }
5095  const int myRank = ! verbose ? -1 : [&] () {
5096  auto map = this->getMap();
5097  if (map.is_null()) {
5098  return -1;
5099  }
5100  auto comm = map->getComm();
5101  if (comm.is_null()) {
5102  return -1;
5103  }
5104  return comm->getRank();
5105  } ();
5106 
5107  // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
5108  // row_ptrs_beg or allocate row_ptrs_end unless the allocation
5109  // size needs to increase. That should be the job of
5110  // padCrsArrays.
5111 
5112  // Assume global indexing we don't have any indices yet
5113  if (! indicesAreAllocated()) {
5114  if (verbose) {
5115  std::ostringstream os;
5116  os << *prefix << "Call allocateIndices" << endl;
5117  std::cerr << os.str();
5118  }
5119  allocateIndices(GlobalIndices, verbose);
5120  }
5121  TEUCHOS_ASSERT( indicesAreAllocated() );
5122 
5123  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
5124  // would use it directly.
5125 
5126  if (verbose) {
5127  std::ostringstream os;
5128  os << *prefix << "Allocate row_ptrs_beg: "
5129  << k_rowPtrs_.extent(0) << endl;
5130  std::cerr << os.str();
5131  }
5132  using Kokkos::view_alloc;
5133  using Kokkos::WithoutInitializing;
5134  row_ptrs_type row_ptrs_beg(
5135  view_alloc("row_ptrs_beg", WithoutInitializing),
5136  k_rowPtrs_.extent(0));
5137  Kokkos::deep_copy(row_ptrs_beg, k_rowPtrs_);
5138 
5139  const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
5140  size_t(row_ptrs_beg.extent(0) - 1);
5141  if (verbose) {
5142  std::ostringstream os;
5143  os << *prefix << "Allocate row_ptrs_end: " << N << endl;
5144  std::cerr << os.str();
5145  }
5146  row_ptrs_type row_ptrs_end(
5147  view_alloc("row_ptrs_end", WithoutInitializing), N);
5148 
5149  const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
5150  if (refill_num_row_entries) { // Case 1: Unpacked storage
5151  // We can't assume correct *this capture until C++17, and it's
5152  // likely more efficient just to capture what we need anyway.
5153  auto num_row_entries = this->k_numRowEntries_;
5154  Kokkos::parallel_for
5155  ("Fill end row pointers", range_policy(0, N),
5156  KOKKOS_LAMBDA (const size_t i) {
5157  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
5158  });
5159  }
5160  else {
5161  // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
5162  // storage, we don't need row_ptr_end to be separate allocation;
5163  // could just have it alias row_ptr_beg+1.
5164  Kokkos::parallel_for
5165  ("Fill end row pointers", range_policy(0, N),
5166  KOKKOS_LAMBDA (const size_t i) {
5167  row_ptrs_end(i) = row_ptrs_beg(i+1);
5168  });
5169  }
5170 
5171  if (isGloballyIndexed()) {
5172  padCrsArrays(row_ptrs_beg, row_ptrs_end, k_gblInds1D_,
5173  padding, myRank, verbose);
5174  }
5175  else {
5176  padCrsArrays(row_ptrs_beg, row_ptrs_end, k_lclInds1D_,
5177  padding, myRank, verbose);
5178  }
5179 
5180  if (refill_num_row_entries) {
5181  auto num_row_entries = this->k_numRowEntries_;
5182  Kokkos::parallel_for
5183  ("Fill num entries", range_policy(0, N),
5184  KOKKOS_LAMBDA (const size_t i) {
5185  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5186  });
5187  }
5188  if (verbose) {
5189  std::ostringstream os;
5190  os << *prefix << "Reassign k_rowPtrs_; old size: "
5191  << k_rowPtrs_.extent(0) << ", new size: "
5192  << row_ptrs_beg.extent(0) << endl;
5193  std::cerr << os.str();
5194  TEUCHOS_ASSERT( k_rowPtrs_.extent(0) == row_ptrs_beg.extent(0) );
5195  }
5196  this->k_rowPtrs_ = row_ptrs_beg;
5197  }
5198 
5199  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5200  std::unique_ptr<
5201  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5202  >
5203  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5204  computeCrsPadding(
5205  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5206  const size_t numSameIDs,
5207  const Kokkos::DualView<const local_ordinal_type*,
5208  buffer_device_type>& permuteToLIDs,
5209  const Kokkos::DualView<const local_ordinal_type*,
5210  buffer_device_type>& permuteFromLIDs,
5211  const bool verbose) const
5212  {
5213  using LO = local_ordinal_type;
5214  using std::endl;
5215 
5216  std::unique_ptr<std::string> prefix;
5217  if (verbose) {
5218  prefix = this->createPrefix("CrsGraph",
5219  "computeCrsPadding(same & permute)");
5220  std::ostringstream os;
5221  os << *prefix << "{numSameIDs: " << numSameIDs
5222  << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5223  << endl;
5224  std::cerr << os.str();
5225  }
5226 
5227  const int myRank = [&] () {
5228  auto comm = rowMap_.is_null() ? Teuchos::null :
5229  rowMap_->getComm();
5230  return comm.is_null() ? -1 : comm->getRank();
5231  } ();
5232  std::unique_ptr<padding_type> padding(
5233  new padding_type(myRank, numSameIDs,
5234  permuteFromLIDs.extent(0)));
5235 
5236  // We're accessing data on host, so make sure all device
5237  // computations on the graphs' data are done.
5238  //
5239  // NOTE (mfh 08 Feb 2020) If we ever get rid of this fence, look
5240  // carefully in computeCrsPaddingFor{Same,Permuted}IDs to see if
5241  // we need a fence there.
5242  Kokkos::fence();
5243 
5244  computeCrsPaddingForSameIDs(*padding, source,
5245  static_cast<LO>(numSameIDs));
5246  computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5247  permuteFromLIDs);
5248  return padding;
5249  }
5250 
5251  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5252  void
5253  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5254  computeCrsPaddingForSameIDs(
5255  padding_type& padding,
5256  const RowGraph<local_ordinal_type, global_ordinal_type,
5257  node_type>& source,
5258  const local_ordinal_type numSameIDs) const
5259  {
5260  using LO = local_ordinal_type;
5261  using GO = global_ordinal_type;
5262  using Details::Impl::getRowGraphGlobalRow;
5263  using std::endl;
5264  const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5265 
5266  std::unique_ptr<std::string> prefix;
5267  const bool verbose = verbose_;
5268  if (verbose) {
5269  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5270  std::ostringstream os;
5271  os << *prefix << "numSameIDs: " << numSameIDs << endl;
5272  std::cerr << os.str();
5273  }
5274 
5275  if (numSameIDs == 0) {
5276  return;
5277  }
5278 
5279  const map_type& srcRowMap = *(source.getRowMap());
5280  const map_type& tgtRowMap = *rowMap_;
5281  using this_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5282  const this_type* srcCrs = dynamic_cast<const this_type*>(&source);
5283  const bool src_is_unique =
5284  srcCrs == nullptr ? false : srcCrs->isMerged();
5285  const bool tgt_is_unique = this->isMerged();
5286 
5287  std::vector<GO> srcGblColIndsScratch;
5288  std::vector<GO> tgtGblColIndsScratch;
5289  for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5290  const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5291  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5292  auto srcGblColInds = getRowGraphGlobalRow(
5293  srcGblColIndsScratch, source, srcGblRowInd);
5294  auto tgtGblColInds = getRowGraphGlobalRow(
5295  tgtGblColIndsScratch, *this, tgtGblRowInd);
5296  padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5297  tgtGblColInds.size(), tgt_is_unique,
5298  srcGblColInds.getRawPtr(),
5299  srcGblColInds.size(), src_is_unique);
5300  }
5301  if (verbose) {
5302  std::ostringstream os;
5303  os << *prefix << "Done" << endl;
5304  std::cerr << os.str();
5305  }
5306  }
5307 
5308  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5309  void
5310  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5311  computeCrsPaddingForPermutedIDs(
5312  padding_type& padding,
5313  const RowGraph<local_ordinal_type, global_ordinal_type,
5314  node_type>& source,
5315  const Kokkos::DualView<const local_ordinal_type*,
5316  buffer_device_type>& permuteToLIDs,
5317  const Kokkos::DualView<const local_ordinal_type*,
5318  buffer_device_type>& permuteFromLIDs) const
5319  {
5320  using LO = local_ordinal_type;
5321  using GO = global_ordinal_type;
5322  using Details::Impl::getRowGraphGlobalRow;
5323  using std::endl;
5324  const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5325 
5326  std::unique_ptr<std::string> prefix;
5327  const bool verbose = verbose_;
5328  if (verbose) {
5329  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5330  std::ostringstream os;
5331  os << *prefix << "permuteToLIDs.extent(0): "
5332  << permuteToLIDs.extent(0)
5333  << ", permuteFromLIDs.extent(0): "
5334  << permuteFromLIDs.extent(0) << endl;
5335  std::cerr << os.str();
5336  }
5337 
5338  if (permuteToLIDs.extent(0) == 0) {
5339  return;
5340  }
5341 
5342  const map_type& srcRowMap = *(source.getRowMap());
5343  const map_type& tgtRowMap = *rowMap_;
5344  using this_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5345  const this_type* srcCrs = dynamic_cast<const this_type*>(&source);
5346  const bool src_is_unique =
5347  srcCrs == nullptr ? false : srcCrs->isMerged();
5348  const bool tgt_is_unique = this->isMerged();
5349 
5350  TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5351  auto permuteToLIDs_h = permuteToLIDs.view_host();
5352  TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5353  auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5354 
5355  std::vector<GO> srcGblColIndsScratch;
5356  std::vector<GO> tgtGblColIndsScratch;
5357  const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5358  for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5359  const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5360  const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5361  auto srcGblColInds = getRowGraphGlobalRow(
5362  srcGblColIndsScratch, source, srcGblRowInd);
5363  const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5364  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5365  auto tgtGblColInds = getRowGraphGlobalRow(
5366  tgtGblColIndsScratch, *this, tgtGblRowInd);
5367  padding.update_permute(whichPermute, tgtLclRowInd,
5368  tgtGblColInds.getRawPtr(),
5369  tgtGblColInds.size(), tgt_is_unique,
5370  srcGblColInds.getRawPtr(),
5371  srcGblColInds.size(), src_is_unique);
5372  }
5373 
5374  if (verbose) {
5375  std::ostringstream os;
5376  os << *prefix << "Done" << endl;
5377  std::cerr << os.str();
5378  }
5379  }
5380 
5381  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5382  std::unique_ptr<
5383  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5384  >
5385  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5386  computeCrsPaddingForImports(
5387  const Kokkos::DualView<const local_ordinal_type*,
5388  buffer_device_type>& importLIDs,
5389  Kokkos::DualView<packet_type*, buffer_device_type> imports,
5390  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5391  const bool verbose) const
5392  {
5393  using Details::Impl::getRowGraphGlobalRow;
5394  using std::endl;
5395  using LO = local_ordinal_type;
5396  using GO = global_ordinal_type;
5397  const char tfecfFuncName[] = "computeCrsPaddingForImports";
5398 
5399  std::unique_ptr<std::string> prefix;
5400  if (verbose) {
5401  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5402  std::ostringstream os;
5403  os << *prefix << "importLIDs.extent(0): "
5404  << importLIDs.extent(0)
5405  << ", imports.extent(0): "
5406  << imports.extent(0)
5407  << ", numPacketsPerLID.extent(0): "
5408  << numPacketsPerLID.extent(0) << endl;
5409  std::cerr << os.str();
5410  }
5411 
5412  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5413  const int myRank = [&] () {
5414  auto comm = rowMap_.is_null() ? Teuchos::null :
5415  rowMap_->getComm();
5416  return comm.is_null() ? -1 : comm->getRank();
5417  } ();
5418  std::unique_ptr<padding_type> padding(
5419  new padding_type(myRank, numImports));
5420  Kokkos::fence(); // Make sure device sees changes made by host
5421  if (imports.need_sync_host()) {
5422  imports.sync_host();
5423  }
5424  auto imports_h = imports.view_host();
5425  if (numPacketsPerLID.need_sync_host ()) {
5426  numPacketsPerLID.sync_host();
5427  }
5428  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5429 
5430  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5431  auto importLIDs_h = importLIDs.view_host();
5432 
5433  const map_type& tgtRowMap = *rowMap_;
5434  // Always merge source column indices, since isMerged() is
5435  // per-process state, and we don't know its value on other
5436  // processes that sent us data.
5437  constexpr bool src_is_unique = false;
5438  const bool tgt_is_unique = isMerged();
5439 
5440  std::vector<GO> tgtGblColIndsScratch;
5441  size_t offset = 0;
5442  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5443  // CrsGraph packs just global column indices, while CrsMatrix
5444  // packs bytes (first the number of entries in the row, then the
5445  // global column indices, then other stuff like the matrix
5446  // values in that row).
5447  const LO origSrcNumEnt =
5448  static_cast<LO>(numPacketsPerLID_h[whichImport]);
5449  GO* const srcGblColInds = imports_h.data() + offset;
5450 
5451  const LO tgtLclRowInd = importLIDs_h[whichImport];
5452  const GO tgtGblRowInd =
5453  tgtRowMap.getGlobalElement(tgtLclRowInd);
5454  auto tgtGblColInds = getRowGraphGlobalRow(
5455  tgtGblColIndsScratch, *this, tgtGblRowInd);
5456  const size_t origTgtNumEnt(tgtGblColInds.size());
5457 
5458  padding->update_import(whichImport, tgtLclRowInd,
5459  tgtGblColInds.getRawPtr(),
5460  origTgtNumEnt, tgt_is_unique,
5461  srcGblColInds,
5462  origSrcNumEnt, src_is_unique);
5463  offset += origSrcNumEnt;
5464  }
5465 
5466  if (verbose) {
5467  std::ostringstream os;
5468  os << *prefix << "Done" << endl;
5469  std::cerr << os.str();
5470  }
5471  return padding;
5472  }
5473 
5474  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5475  std::unique_ptr<
5476  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5477  >
5478  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5479  computePaddingForCrsMatrixUnpack(
5480  const Kokkos::DualView<const local_ordinal_type*,
5481  buffer_device_type>& importLIDs,
5482  Kokkos::DualView<char*, buffer_device_type> imports,
5483  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5484  const bool verbose) const
5485  {
5486  using Details::Impl::getRowGraphGlobalRow;
5487  using Details::PackTraits;
5488  using std::endl;
5489  using LO = local_ordinal_type;
5490  using GO = global_ordinal_type;
5491  const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5492 
5493  std::unique_ptr<std::string> prefix;
5494  if (verbose) {
5495  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5496  std::ostringstream os;
5497  os << *prefix << "importLIDs.extent(0): "
5498  << importLIDs.extent(0)
5499  << ", imports.extent(0): "
5500  << imports.extent(0)
5501  << ", numPacketsPerLID.extent(0): "
5502  << numPacketsPerLID.extent(0) << endl;
5503  std::cerr << os.str();
5504  }
5505  const bool extraVerbose =
5506  verbose && Details::Behavior::verbose("CrsPadding");
5507 
5508  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5509  TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5510  const int myRank = [&] () {
5511  auto comm = rowMap_.is_null() ? Teuchos::null :
5512  rowMap_->getComm();
5513  return comm.is_null() ? -1 : comm->getRank();
5514  } ();
5515  std::unique_ptr<padding_type> padding(
5516  new padding_type(myRank, numImports));
5517  Kokkos::fence(); // Make sure host sees changes made by device
5518  if (imports.need_sync_host()) {
5519  imports.sync_host();
5520  }
5521  auto imports_h = imports.view_host();
5522  if (numPacketsPerLID.need_sync_host ()) {
5523  numPacketsPerLID.sync_host();
5524  }
5525  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5526 
5527  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5528  auto importLIDs_h = importLIDs.view_host();
5529 
5530  const map_type& tgtRowMap = *rowMap_;
5531  // Always merge source column indices, since isMerged() is
5532  // per-process state, and we don't know its value on other
5533  // processes that sent us data.
5534  constexpr bool src_is_unique = false;
5535  const bool tgt_is_unique = isMerged();
5536 
5537  std::vector<GO> srcGblColIndsScratch;
5538  std::vector<GO> tgtGblColIndsScratch;
5539  size_t offset = 0;
5540  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5541  // CrsGraph packs just global column indices, while CrsMatrix
5542  // packs bytes (first the number of entries in the row, then the
5543  // global column indices, then other stuff like the matrix
5544  // values in that row).
5545  const size_t numBytes = numPacketsPerLID_h[whichImport];
5546  if (extraVerbose) {
5547  std::ostringstream os;
5548  os << *prefix << "whichImport=" << whichImport
5549  << ", numImports=" << numImports
5550  << ", numBytes=" << numBytes << endl;
5551  std::cerr << os.str();
5552  }
5553  if (numBytes == 0) {
5554  continue; // special case: no entries to unpack for this row
5555  }
5556  LO origSrcNumEnt = 0;
5557  const size_t numEntBeg = offset;
5558  const size_t numEntLen =
5559  PackTraits<LO>::packValueCount(origSrcNumEnt);
5560  TEUCHOS_ASSERT( numBytes >= numEntLen );
5561  TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5562  PackTraits<LO>::unpackValue(origSrcNumEnt,
5563  imports_h.data() + numEntBeg);
5564  if (extraVerbose) {
5565  std::ostringstream os;
5566  os << *prefix << "whichImport=" << whichImport
5567  << ", numImports=" << numImports
5568  << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5569  std::cerr << os.str();
5570  }
5571  TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5572  TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5573  const size_t gidsBeg = numEntBeg + numEntLen;
5574  if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5575  srcGblColIndsScratch.resize(origSrcNumEnt);
5576  }
5577  GO* const srcGblColInds = srcGblColIndsScratch.data();
5578  PackTraits<GO>::unpackArray(srcGblColInds,
5579  imports_h.data() + gidsBeg,
5580  origSrcNumEnt);
5581  const LO tgtLclRowInd = importLIDs_h[whichImport];
5582  const GO tgtGblRowInd =
5583  tgtRowMap.getGlobalElement(tgtLclRowInd);
5584  auto tgtGblColInds = getRowGraphGlobalRow(
5585  tgtGblColIndsScratch, *this, tgtGblRowInd);
5586  const size_t origNumTgtEnt(tgtGblColInds.size());
5587 
5588  if (extraVerbose) {
5589  std::ostringstream os;
5590  os << *prefix << "whichImport=" << whichImport
5591  << ", numImports=" << numImports
5592  << ": Call padding->update_import" << endl;
5593  std::cerr << os.str();
5594  }
5595  padding->update_import(whichImport, tgtLclRowInd,
5596  tgtGblColInds.getRawPtr(),
5597  origNumTgtEnt, tgt_is_unique,
5598  srcGblColInds,
5599  origSrcNumEnt, src_is_unique);
5600  offset += numBytes;
5601  }
5602 
5603  if (verbose) {
5604  std::ostringstream os;
5605  os << *prefix << "Done" << endl;
5606  std::cerr << os.str();
5607  }
5608  return padding;
5609  }
5610 
5611  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5612  void
5613  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5614  packAndPrepare
5615  (const SrcDistObject& source,
5616  const Kokkos::DualView<const local_ordinal_type*,
5617  buffer_device_type>& exportLIDs,
5618  Kokkos::DualView<packet_type*,
5619  buffer_device_type>& exports,
5620  Kokkos::DualView<size_t*,
5621  buffer_device_type> numPacketsPerLID,
5622  size_t& constantNumPackets,
5623  Distributor& distor)
5624  {
5626  using GO = global_ordinal_type;
5627  using std::endl;
5628  using crs_graph_type =
5629  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5630  using row_graph_type =
5631  RowGraph<local_ordinal_type, global_ordinal_type, node_type>;
5632  const char tfecfFuncName[] = "packAndPrepare: ";
5633  ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5634 
5635  const bool verbose = verbose_;
5636  std::unique_ptr<std::string> prefix;
5637  if (verbose) {
5638  prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5639  std::ostringstream os;
5640  os << *prefix << "Start" << endl;
5641  std::cerr << os.str();
5642  }
5643 
5644  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5645  (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5646  std::runtime_error,
5647  "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5648  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5649  << ".");
5650  const row_graph_type* srcRowGraphPtr =
5651  dynamic_cast<const row_graph_type*> (&source);
5652  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5653  (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5654  "or Import operation to a CrsGraph must be a RowGraph with the same "
5655  "template parameters.");
5656  // We don't check whether src_graph has had fillComplete called,
5657  // because it doesn't matter whether the *source* graph has been
5658  // fillComplete'd. The target graph can not be fillComplete'd yet.
5659  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5660  (this->isFillComplete (), std::runtime_error,
5661  "The target graph of an Import or Export must not be fill complete.");
5662 
5663  const crs_graph_type* srcCrsGraphPtr =
5664  dynamic_cast<const crs_graph_type*> (&source);
5665 
5666  if (srcCrsGraphPtr == nullptr) {
5667  using Teuchos::ArrayView;
5668  using LO = local_ordinal_type;
5669 
5670  if (verbose) {
5671  std::ostringstream os;
5672  os << *prefix << "Source is a RowGraph but not a CrsGraph"
5673  << endl;
5674  std::cerr << os.str();
5675  }
5676  // RowGraph::pack serves the "old" DistObject interface. It
5677  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5678  // entails deep-copying the exports buffer on output. RowGraph
5679  // is a convenience interface when not a CrsGraph, so we accept
5680  // the performance hit.
5681  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5682  auto exportLIDs_h = exportLIDs.view_host ();
5683  ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5684  exportLIDs_h.extent (0));
5685  Teuchos::Array<GO> exports_a;
5686 
5687  numPacketsPerLID.clear_sync_state ();
5688  numPacketsPerLID.modify_host ();
5689  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5690  ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5691  numPacketsPerLID_h.extent (0));
5692  srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5693  constantNumPackets, distor);
5694  const size_t newSize = static_cast<size_t> (exports_a.size ());
5695  if (static_cast<size_t> (exports.extent (0)) != newSize) {
5696  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5697  exports = exports_dv_type ("exports", newSize);
5698  }
5699  Kokkos::View<const packet_type*, Kokkos::HostSpace,
5700  Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5701  exports.clear_sync_state ();
5702  exports.modify_host ();
5703  Kokkos::deep_copy (exports.view_host (), exports_a_h);
5704  }
5705  // packCrsGraphNew requires a valid localGraph.
5706  else if (! getColMap ().is_null () &&
5707  (lclGraph_.row_map.extent (0) != 0 ||
5708  getRowMap ()->getNodeNumElements () == 0)) {
5709  if (verbose) {
5710  std::ostringstream os;
5711  os << *prefix << "packCrsGraphNew path" << endl;
5712  std::cerr << os.str();
5713  }
5714  using export_pids_type =
5715  Kokkos::DualView<const int*, buffer_device_type>;
5716  export_pids_type exportPIDs; // not filling it; needed for syntax
5717  using LO = local_ordinal_type;
5718  using NT = node_type;
5720  packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5721  exports, numPacketsPerLID,
5722  constantNumPackets, false, distor);
5723  }
5724  else {
5725  srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5726  constantNumPackets, distor);
5727  }
5728 
5729  if (verbose) {
5730  std::ostringstream os;
5731  os << *prefix << "Done" << endl;
5732  std::cerr << os.str();
5733  }
5734  }
5735 
5736  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5737  void
5738  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5739  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5740  Teuchos::Array<GlobalOrdinal>& exports,
5741  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5742  size_t& constantNumPackets,
5743  Distributor& distor) const
5744  {
5745  auto col_map = this->getColMap();
5746  // packCrsGraph requires a valid localGraph.
5747  if( !col_map.is_null() && (lclGraph_.row_map.extent(0) != 0 || getRowMap()->getNodeNumElements() ==0)) {
5749  packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5750  exportLIDs, constantNumPackets, distor);
5751  }
5752  else {
5753  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5754  constantNumPackets, distor);
5755  }
5756  }
5757 
5758  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5759  void
5761  packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5762  Teuchos::Array<GlobalOrdinal>& exports,
5763  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5764  size_t& constantNumPackets,
5765  Distributor& /* distor */) const
5766  {
5767  using std::endl;
5768  using LO = LocalOrdinal;
5769  using GO = GlobalOrdinal;
5770  using host_execution_space =
5771  typename Kokkos::View<size_t*, device_type>::
5772  HostMirror::execution_space;
5773  using device_execution_space =
5774  typename device_type::execution_space;
5775  const char tfecfFuncName[] = "packFillActive: ";
5776  const bool verbose = verbose_;
5777 
5778  const auto numExportLIDs = exportLIDs.size ();
5779  std::unique_ptr<std::string> prefix;
5780  if (verbose) {
5781  prefix = this->createPrefix("CrsGraph", "allocateIndices");
5782  std::ostringstream os;
5783  os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5784  std::cerr << os.str();
5785  }
5786  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5787  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5788  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5789  " = " << numPacketsPerLID.size () << ".");
5790 
5791  // We may be accessing UVM data on host below, so ensure that the
5792  // device is done accessing it.
5793  device_execution_space().fence ();
5794 
5795  const map_type& rowMap = * (this->getRowMap ());
5796  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5797  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5798  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5799  "This graph claims to be locally indexed, but its column Map is nullptr. "
5800  "This should never happen. Please report this bug to the Tpetra "
5801  "developers.");
5802 
5803  // We may pack different amounts of data for different rows.
5804  constantNumPackets = 0;
5805 
5806  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5807  // it might be now, but we might as well be safe).
5808  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5809  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5810 
5811  // Count the total number of packets (column indices, in the case
5812  // of a CrsGraph) to pack. While doing so, set
5813  // numPacketsPerLID[i] to the number of entries owned by the
5814  // calling process in (local) row exportLIDs[i] of the graph, that
5815  // the caller wants us to send out.
5816  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5817  size_t totalNumPackets = 0;
5818  size_t errCount = 0;
5819  // lambdas turn what they capture const, so we can't
5820  // atomic_add(&errCount,1). Instead, we need a View to modify.
5821  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5822  host_device_type;
5823  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5824  constexpr size_t ONE = 1;
5825 
5826  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5827  inputRange,
5828  [=] (const LO& i, size_t& curTotalNumPackets) {
5829  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5830  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5831  Kokkos::atomic_add (&errCountView(), ONE);
5832  numPacketsPerLID_raw[i] = 0;
5833  }
5834  else {
5835  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5836  numPacketsPerLID_raw[i] = numEnt;
5837  curTotalNumPackets += numEnt;
5838  }
5839  },
5840  totalNumPackets);
5841 
5842  if (verbose) {
5843  std::ostringstream os;
5844  os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5845  std::cerr << os.str();
5846  }
5847  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5848  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5849  "one or more errors! errCount = " << errCount
5850  << ", totalNumPackets = " << totalNumPackets << ".");
5851  errCount = 0;
5852 
5853  // Allocate space for all the column indices to pack.
5854  exports.resize (totalNumPackets);
5855 
5856  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5857  (! this->supportsRowViews (), std::logic_error,
5858  "this->supportsRowViews() returns false; this should never happen. "
5859  "Please report this bug to the Tpetra developers.");
5860 
5861  // Loop again over the rows to export, and pack rows of indices
5862  // into the output buffer.
5863 
5864  if (verbose) {
5865  std::ostringstream os;
5866  os << *prefix << "Pack into exports" << endl;
5867  std::cerr << os.str();
5868  }
5869 
5870  // Teuchos::ArrayView may not be thread safe, or may not be
5871  // efficiently thread safe. Better to use the raw pointer.
5872  GO* const exports_raw = exports.getRawPtr ();
5873  errCount = 0;
5874  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
5875  inputRange, [=, &prefix]
5876  (const LO i, size_t& exportsOffset, const bool final) {
5877  const size_t curOffset = exportsOffset;
5878  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5879  const RowInfo rowInfo =
5880  this->getRowInfoFromGlobalRowIndex (gblRow);
5881 
5882  using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5883  if (rowInfo.localRow == TDO::invalid ()) {
5884  if (verbose) {
5885  std::ostringstream os;
5886  os << *prefix << ": INVALID rowInfo: i=" << i
5887  << ", lclRow=" << exportLIDs_raw[i] << endl;
5888  std::cerr << os.str();
5889  }
5890  Kokkos::atomic_add (&errCountView(), ONE);
5891  }
5892  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5893  if (verbose) {
5894  std::ostringstream os;
5895  os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5896  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5897  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
5898  << ") > totalNumPackets (= " << totalNumPackets << ")."
5899  << endl;
5900  std::cerr << os.str();
5901  }
5902  Kokkos::atomic_add (&errCountView(), ONE);
5903  }
5904  else {
5905  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5906  if (this->isLocallyIndexed ()) {
5907  const LO* lclColInds = nullptr;
5908  LO capacity = 0;
5909  const LO errCode =
5910  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
5911  if (errCode == 0) {
5912  if (final) {
5913  for (LO k = 0; k < numEnt; ++k) {
5914  const LO lclColInd = lclColInds[k];
5915  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5916  // Pack it, even if it's wrong. Let the receiving
5917  // process deal with it. Otherwise, we'll miss out
5918  // on any correct data.
5919  exports_raw[curOffset + k] = gblColInd;
5920  } // for each entry in the row
5921  } // final pass?
5922  exportsOffset = curOffset + numEnt;
5923  }
5924  else { // error in getting local row view
5925  Kokkos::atomic_add (&errCountView(), ONE);
5926  }
5927  }
5928  else if (this->isGloballyIndexed ()) {
5929  const GO* gblColInds = nullptr;
5930  LO capacity = 0;
5931  const LO errCode =
5932  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
5933  if (errCode == 0) {
5934  if (final) {
5935  for (LO k = 0; k < numEnt; ++k) {
5936  const GO gblColInd = gblColInds[k];
5937  // Pack it, even if it's wrong. Let the receiving
5938  // process deal with it. Otherwise, we'll miss out
5939  // on any correct data.
5940  exports_raw[curOffset + k] = gblColInd;
5941  } // for each entry in the row
5942  } // final pass?
5943  exportsOffset = curOffset + numEnt;
5944  }
5945  else { // error in getting global row view
5946  Kokkos::atomic_add (&errCountView(), ONE);
5947  }
5948  }
5949  // If neither globally nor locally indexed, then the graph
5950  // has no entries in this row (or indeed, in any row on this
5951  // process) to pack.
5952  }
5953  });
5954 
5955  // We may have accessed UVM data on host above, so ensure that the
5956  // device sees these changes.
5957  device_execution_space().fence ();
5958 
5959  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5960  (errCount != 0, std::logic_error, "Packing encountered "
5961  "one or more errors! errCount = " << errCount
5962  << ", totalNumPackets = " << totalNumPackets << ".");
5963 
5964  if (verbose) {
5965  std::ostringstream os;
5966  os << *prefix << "Done" << endl;
5967  std::cerr << os.str();
5968  }
5969  }
5970 
5971  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5972  void
5973  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5974  packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
5975  buffer_device_type>& exportLIDs,
5976  Kokkos::DualView<packet_type*,
5977  buffer_device_type>& exports,
5978  Kokkos::DualView<size_t*,
5979  buffer_device_type> numPacketsPerLID,
5980  size_t& constantNumPackets,
5981  Distributor& distor) const
5982  {
5983  using std::endl;
5984  using LO = local_ordinal_type;
5985  using GO = global_ordinal_type;
5986  using host_execution_space = typename Kokkos::View<size_t*,
5987  device_type>::HostMirror::execution_space;
5988  using host_device_type =
5989  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5990  using device_execution_space = typename device_type::execution_space;
5991  using exports_dv_type =
5992  Kokkos::DualView<packet_type*, buffer_device_type>;
5993  const char tfecfFuncName[] = "packFillActiveNew: ";
5994  const bool verbose = verbose_;
5995 
5996  const auto numExportLIDs = exportLIDs.extent (0);
5997  std::unique_ptr<std::string> prefix;
5998  if (verbose) {
5999  prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
6000  std::ostringstream os;
6001  os << *prefix << "numExportLIDs: " << numExportLIDs
6002  << ", numPacketsPerLID.extent(0): "
6003  << numPacketsPerLID.extent(0) << endl;
6004  std::cerr << os.str();
6005  }
6006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6007  (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
6008  "exportLIDs.extent(0) = " << numExportLIDs
6009  << " != numPacketsPerLID.extent(0) = "
6010  << numPacketsPerLID.extent (0) << ".");
6011  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6012  auto exportLIDs_h = exportLIDs.view_host ();
6013 
6014  // We may be accessing UVM data on host below, so ensure that the
6015  // device is done accessing it.
6016  device_execution_space().fence ();
6017 
6018  const map_type& rowMap = * (this->getRowMap ());
6019  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
6020  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6021  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
6022  "This graph claims to be locally indexed, but its column Map is nullptr. "
6023  "This should never happen. Please report this bug to the Tpetra "
6024  "developers.");
6025 
6026  // We may pack different amounts of data for different rows.
6027  constantNumPackets = 0;
6028 
6029  numPacketsPerLID.clear_sync_state ();
6030  numPacketsPerLID.modify_host ();
6031  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6032 
6033  // Count the total number of packets (column indices, in the case
6034  // of a CrsGraph) to pack. While doing so, set
6035  // numPacketsPerLID[i] to the number of entries owned by the
6036  // calling process in (local) row exportLIDs[i] of the graph, that
6037  // the caller wants us to send out.
6038  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
6039  range_type inputRange (0, numExportLIDs);
6040  size_t totalNumPackets = 0;
6041  size_t errCount = 0;
6042  // lambdas turn what they capture const, so we can't
6043  // atomic_add(&errCount,1). Instead, we need a View to modify.
6044  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6045  constexpr size_t ONE = 1;
6046 
6047  if (verbose) {
6048  std::ostringstream os;
6049  os << *prefix << "Compute totalNumPackets" << endl;
6050  std::cerr << os.str ();
6051  }
6052 
6053  Kokkos::parallel_reduce
6054  ("Tpetra::CrsGraph::pack: totalNumPackets",
6055  inputRange,
6056  [=, &prefix] (const LO i, size_t& curTotalNumPackets) {
6057  const LO lclRow = exportLIDs_h[i];
6058  const GO gblRow = rowMap.getGlobalElement (lclRow);
6059  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6060  if (verbose) {
6061  std::ostringstream os;
6062  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6063  << " not in row Map on this process" << endl;
6064  std::cerr << os.str();
6065  }
6066  Kokkos::atomic_add (&errCountView(), ONE);
6067  numPacketsPerLID_h(i) = 0;
6068  }
6069  else {
6070  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6071  numPacketsPerLID_h(i) = numEnt;
6072  curTotalNumPackets += numEnt;
6073  }
6074  },
6075  totalNumPackets);
6076 
6077  if (verbose) {
6078  std::ostringstream os;
6079  os << *prefix << "totalNumPackets: " << totalNumPackets
6080  << ", errCount: " << errCount << endl;
6081  std::cerr << os.str ();
6082  }
6083  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6084  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
6085  "one or more errors! totalNumPackets: " << totalNumPackets
6086  << ", errCount: " << errCount << ".");
6087 
6088  // Allocate space for all the column indices to pack.
6089  if (size_t(exports.extent (0)) < totalNumPackets) {
6090  // FIXME (mfh 09 Apr 2019) Create without initializing.
6091  exports = exports_dv_type ("exports", totalNumPackets);
6092  }
6093 
6094  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6095  (! this->supportsRowViews (), std::logic_error,
6096  "this->supportsRowViews() returns false; this should never happen. "
6097  "Please report this bug to the Tpetra developers.");
6098 
6099  // Loop again over the rows to export, and pack rows of indices
6100  // into the output buffer.
6101 
6102  if (verbose) {
6103  std::ostringstream os;
6104  os << *prefix << "Pack into exports buffer" << endl;
6105  std::cerr << os.str();
6106  }
6107 
6108  exports.clear_sync_state ();
6109  exports.modify_host ();
6110  auto exports_h = exports.view_host ();
6111 
6112  // The graph may store its data in UVM memory, so make sure that
6113  // any device kernels are done modifying the graph's data before
6114  // reading the data.
6115  device_execution_space().fence ();
6116 
6117  errCount = 0;
6118  Kokkos::parallel_scan
6119  ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
6120  inputRange, [=, &prefix]
6121  (const LO i, size_t& exportsOffset, const bool final) {
6122  const size_t curOffset = exportsOffset;
6123  const LO lclRow = exportLIDs_h(i);
6124  const GO gblRow = rowMap.getGlobalElement (lclRow);
6125  if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
6126  if (verbose) {
6127  std::ostringstream os;
6128  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6129  << " not in row Map on this process" << endl;
6130  std::cerr << os.str();
6131  }
6132  Kokkos::atomic_add (&errCountView(), ONE);
6133  return;
6134  }
6135 
6136  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6137  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
6138  if (verbose) {
6139  std::ostringstream os;
6140  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6141  << ", gblRow=" << gblRow << ": invalid rowInfo"
6142  << endl;
6143  std::cerr << os.str();
6144  }
6145  Kokkos::atomic_add (&errCountView(), ONE);
6146  return;
6147  }
6148 
6149  if (curOffset + rowInfo.numEntries > totalNumPackets) {
6150  if (verbose) {
6151  std::ostringstream os;
6152  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6153  << ", gblRow=" << gblRow << ", curOffset (= "
6154  << curOffset << ") + numEnt (= " << rowInfo.numEntries
6155  << ") > totalNumPackets (= " << totalNumPackets
6156  << ")." << endl;
6157  std::cerr << os.str();
6158  }
6159  Kokkos::atomic_add (&errCountView(), ONE);
6160  return;
6161  }
6162 
6163  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6164  if (this->isLocallyIndexed ()) {
6165  const LO* lclColInds = nullptr;
6166  LO capacity = 0;
6167  const LO errCode =
6168  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
6169  if (errCode == 0) {
6170  if (final) {
6171  for (LO k = 0; k < numEnt; ++k) {
6172  const LO lclColInd = lclColInds[k];
6173  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6174  // Pack it, even if it's wrong. Let the receiving
6175  // process deal with it. Otherwise, we'll miss out
6176  // on any correct data.
6177  exports_h(curOffset + k) = gblColInd;
6178  } // for each entry in the row
6179  } // final pass?
6180  exportsOffset = curOffset + numEnt;
6181  }
6182  else { // error in getting local row view
6183  if (verbose) {
6184  std::ostringstream os;
6185  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6186  << ", gblRow=" << gblRow << ": getLocalViewRawConst"
6187  "returned nonzero error code " << errCode << endl;
6188  std::cerr << os.str();
6189  }
6190  Kokkos::atomic_add (&errCountView(), ONE);
6191  }
6192  }
6193  else if (this->isGloballyIndexed ()) {
6194  const GO* gblColInds = nullptr;
6195  LO capacity = 0;
6196  const LO errCode =
6197  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
6198  if (errCode == 0) {
6199  if (final) {
6200  for (LO k = 0; k < numEnt; ++k) {
6201  const GO gblColInd = gblColInds[k];
6202  // Pack it, even if it's wrong. Let the receiving
6203  // process deal with it. Otherwise, we'll miss out
6204  // on any correct data.
6205  exports_h(curOffset + k) = gblColInd;
6206  } // for each entry in the row
6207  } // final pass?
6208  exportsOffset = curOffset + numEnt;
6209  }
6210  else { // error in getting global row view
6211  if (verbose) {
6212  std::ostringstream os;
6213  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6214  << ", gblRow=" << gblRow << ": "
6215  "getGlobalViewRawConst returned nonzero error code "
6216  << errCode << endl;
6217  std::cerr << os.str();
6218  }
6219  Kokkos::atomic_add (&errCountView(), ONE);
6220  }
6221  }
6222  // If neither globally nor locally indexed, then the graph
6223  // has no entries in this row (or indeed, in any row on this
6224  // process) to pack.
6225  });
6226 
6227  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6228  // (errCount != 0, std::logic_error, "Packing encountered "
6229  // "one or more errors! errCount = " << errCount
6230  // << ", totalNumPackets = " << totalNumPackets << ".");
6231 
6232  if (verbose) {
6233  std::ostringstream os;
6234  os << *prefix << "errCount=" << errCount << "; Done" << endl;
6235  std::cerr << os.str();
6236  }
6237  }
6238 
6239  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6240  void
6241  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6242  unpackAndCombine
6243  (const Kokkos::DualView<const local_ordinal_type*,
6244  buffer_device_type>& importLIDs,
6245  Kokkos::DualView<packet_type*,
6246  buffer_device_type> imports,
6247  Kokkos::DualView<size_t*,
6248  buffer_device_type> numPacketsPerLID,
6249  const size_t /* constantNumPackets */,
6250  Distributor& /* distor */,
6251  const CombineMode /* combineMode */ )
6252  {
6253  using Details::ProfilingRegion;
6254  using std::endl;
6255  using LO = local_ordinal_type;
6256  using GO = global_ordinal_type;
6257  const char tfecfFuncName[] = "unpackAndCombine";
6258 
6259  ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
6260  const bool verbose = verbose_;
6261 
6262  std::unique_ptr<std::string> prefix;
6263  if (verbose) {
6264  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6265  std::ostringstream os;
6266  os << *prefix << "Start" << endl;
6267  std::cerr << os.str ();
6268  }
6269  {
6270  auto padding = computeCrsPaddingForImports(
6271  importLIDs, imports, numPacketsPerLID, verbose);
6272  applyCrsPadding(*padding, verbose);
6273  if (verbose) {
6274  std::ostringstream os;
6275  os << *prefix << "Done computing & applying padding" << endl;
6276  std::cerr << os.str ();
6277  }
6278  }
6279 
6280  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6281  // reasonable meaning, whether or not the matrix is fill complete.
6282  // It's just more work to implement.
6283 
6284  // We are not checking the value of the CombineMode input
6285  // argument. For CrsGraph, we only support import/export
6286  // operations if fillComplete has not yet been called. Any
6287  // incoming column-indices are inserted into the target graph. In
6288  // this context, CombineMode values of ADD vs INSERT are
6289  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6290  // duplicate column-index is inserted, it will be compressed out
6291  // when fillComplete is called.
6292  //
6293  // Note: I think REPLACE means that an existing row is replaced by
6294  // the imported row, i.e., the existing indices are cleared. CGB,
6295  // 6/17/2010
6296 
6297  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6298  (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6299  std::runtime_error, ": importLIDs.extent(0) = "
6300  << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6301  << numPacketsPerLID.extent (0) << ".");
6302  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6303  (isFillComplete (), std::runtime_error,
6304  ": Import or Export operations are not allowed on a target "
6305  "CrsGraph that is fillComplete.");
6306 
6307  const size_t numImportLIDs(importLIDs.extent(0));
6308  if (numPacketsPerLID.need_sync_host()) {
6309  numPacketsPerLID.sync_host();
6310  }
6311  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6312  if (imports.need_sync_host()) {
6313  imports.sync_host();
6314  }
6315  auto imports_h = imports.view_host();
6316  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6317  auto importLIDs_h = importLIDs.view_host();
6318 
6319  // If we're inserting in local indices, let's pre-allocate
6320  Teuchos::Array<LO> lclColInds;
6321  if (isLocallyIndexed()) {
6322  if (verbose) {
6323  std::ostringstream os;
6324  os << *prefix << "Preallocate local indices scratch" << endl;
6325  std::cerr << os.str();
6326  }
6327  size_t maxNumInserts = 0;
6328  for (size_t i = 0; i < numImportLIDs; ++i) {
6329  maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6330  }
6331  if (verbose) {
6332  std::ostringstream os;
6333  os << *prefix << "Local indices scratch size: "
6334  << maxNumInserts << endl;
6335  std::cerr << os.str();
6336  }
6337  lclColInds.resize (maxNumInserts);
6338  }
6339  else {
6340  if (verbose) {
6341  std::ostringstream os;
6342  os << *prefix;
6343  if (isGloballyIndexed()) {
6344  os << "Graph is globally indexed";
6345  }
6346  else {
6347  os << "Graph is neither locally nor globally indexed";
6348  }
6349  os << endl;
6350  std::cerr << os.str();
6351  }
6352  }
6353 
6354  TEUCHOS_ASSERT( ! rowMap_.is_null() );
6355  const map_type& rowMap = *rowMap_;
6356 
6357  try {
6358  size_t importsOffset = 0;
6359  for (size_t i = 0; i < numImportLIDs; ++i) {
6360  if (verbose) {
6361  std::ostringstream os;
6362  os << *prefix << "i=" << i << ", numImportLIDs="
6363  << numImportLIDs << endl;
6364  std::cerr << os.str();
6365  }
6366  // We can only unpack into owned rows, since we only have
6367  // local row indices.
6368  const LO lclRow = importLIDs_h[i];
6369  const GO gblRow = rowMap.getGlobalElement(lclRow);
6370  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6371  (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6372  std::logic_error, "importLIDs[i=" << i << "]="
6373  << lclRow << " is not in the row Map on the calling "
6374  "process.");
6375  const LO numEnt = numPacketsPerLID_h[i];
6376  const GO* const gblColInds = (numEnt == 0) ? nullptr :
6377  imports_h.data() + importsOffset;
6378  if (! isLocallyIndexed()) {
6379  insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6380  }
6381  else {
6382  // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6383  // column Map filtering.
6384  for (LO j = 0; j < numEnt; j++) {
6385  lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6386  }
6387  insertLocalIndices(lclRow, numEnt, lclColInds.data());
6388  }
6389  importsOffset += numEnt;
6390  }
6391  }
6392  catch (std::exception& e) {
6393  TEUCHOS_TEST_FOR_EXCEPTION
6394  (true, std::runtime_error,
6395  "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6396  "exception: " << endl << e.what());
6397  }
6398 
6399  if (verbose) {
6400  std::ostringstream os;
6401  os << *prefix << "Done" << endl;
6402  std::cerr << os.str();
6403  }
6404  }
6405 
6406  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6407  void
6409  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6410  {
6411  using Teuchos::Comm;
6412  using Teuchos::null;
6413  using Teuchos::ParameterList;
6414  using Teuchos::RCP;
6415 
6416  // We'll set all the state "transactionally," so that this method
6417  // satisfies the strong exception guarantee. This object's state
6418  // won't be modified until the end of this method.
6419  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6420  RCP<import_type> importer;
6421  RCP<export_type> exporter;
6422 
6423  rowMap = newMap;
6424  RCP<const Comm<int> > newComm =
6425  (newMap.is_null ()) ? null : newMap->getComm ();
6426 
6427  if (! domainMap_.is_null ()) {
6428  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6429  // Common case: original domain and row Maps are identical.
6430  // In that case, we need only replace the original domain Map
6431  // with the new Map. This ensures that the new domain and row
6432  // Maps _stay_ identical.
6433  domainMap = newMap;
6434  } else {
6435  domainMap = domainMap_->replaceCommWithSubset (newComm);
6436  }
6437  }
6438  if (! rangeMap_.is_null ()) {
6439  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6440  // Common case: original range and row Maps are identical. In
6441  // that case, we need only replace the original range Map with
6442  // the new Map. This ensures that the new range and row Maps
6443  // _stay_ identical.
6444  rangeMap = newMap;
6445  } else {
6446  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6447  }
6448  }
6449  if (! colMap.is_null ()) {
6450  colMap = colMap_->replaceCommWithSubset (newComm);
6451  }
6452 
6453  // (Re)create the Export and / or Import if necessary.
6454  if (! newComm.is_null ()) {
6455  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6456  //
6457  // The operations below are collective on the new communicator.
6458  //
6459  // (Re)create the Export object if necessary. If I haven't
6460  // called fillComplete yet, I don't have a rangeMap, so I must
6461  // first check if the _original_ rangeMap is not null. Ditto
6462  // for the Import object and the domain Map.
6463  if (! rangeMap_.is_null () &&
6464  rangeMap != rowMap &&
6465  ! rangeMap->isSameAs (*rowMap)) {
6466  if (params.is_null () || ! params->isSublist ("Export")) {
6467  exporter = rcp (new export_type (rowMap, rangeMap));
6468  }
6469  else {
6470  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6471  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6472  }
6473  }
6474  // (Re)create the Import object if necessary.
6475  if (! domainMap_.is_null () &&
6476  domainMap != colMap &&
6477  ! domainMap->isSameAs (*colMap)) {
6478  if (params.is_null () || ! params->isSublist ("Import")) {
6479  importer = rcp (new import_type (domainMap, colMap));
6480  } else {
6481  RCP<ParameterList> importSublist = sublist (params, "Import", true);
6482  importer = rcp (new import_type (domainMap, colMap, importSublist));
6483  }
6484  }
6485  } // if newComm is not null
6486 
6487  // Defer side effects until the end. If no destructors throw
6488  // exceptions (they shouldn't anyway), then this method satisfies
6489  // the strong exception guarantee.
6490  exporter_ = exporter;
6491  importer_ = importer;
6492  rowMap_ = rowMap;
6493  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6494  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6495  // the same object. We might want to get rid of this redundant
6496  // pointer sometime, but for now, we'll leave it alone and just
6497  // set map_ to the same object.
6498  this->map_ = rowMap;
6499  domainMap_ = domainMap;
6500  rangeMap_ = rangeMap;
6501  colMap_ = colMap;
6502  }
6503 
6504  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6505  void
6507  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6508  {
6509  using std::endl;
6510  using LO = LocalOrdinal;
6511  using GO = GlobalOrdinal;
6512  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6513  const bool verbose = verbose_;
6514 
6515  std::unique_ptr<std::string> prefix;
6516  if (verbose) {
6517  prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6518  std::ostringstream os;
6519  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6520  << endl;
6521  std::cerr << os.str();
6522  }
6523 
6524  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6525  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6526  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
6527  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6528  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6529  std::invalid_argument, "offsets.extent(0) = " <<
6530  offsets.extent (0) << " < getNodeNumRows() = " << lclNumRows << ".");
6531 
6532  const map_type& rowMap = * (this->getRowMap ());
6533  const map_type& colMap = * (this->getColMap ());
6534 
6535  // We only use these in debug mode, but since debug mode is a
6536  // run-time option, they need to exist here. That's why we create
6537  // the vector with explicit size zero, to avoid overhead if debug
6538  // mode is off.
6539  bool allRowMapDiagEntriesInColMap = true;
6540  bool allDiagEntriesFound = true;
6541  bool allOffsetsCorrect = true;
6542  bool noOtherWeirdness = true;
6543  using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6544  wrong_offsets_type wrongOffsets(0);
6545 
6546  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6547  // the subset of Map functionality that we need below.
6548  auto lclRowMap = rowMap.getLocalMap ();
6549  auto lclColMap = colMap.getLocalMap ();
6550 
6551  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6552  // setup, at least on the host. For CUDA, we have to use LocalMap
6553  // (that comes from each of the two Maps).
6554 
6555  const bool sorted = this->isSorted ();
6556  if (isFillComplete ()) {
6557  auto lclGraph = this->getLocalGraph ();
6558  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6559  lclGraph.row_map,
6560  lclGraph.entries, sorted);
6561  }
6562  else {
6563  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6564  // since the graph is not fill complete. The previous version
6565  // of this code assumed UVM; this version does not.
6566  auto offsets_h = Kokkos::create_mirror_view (offsets);
6567 
6568  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6569  // Find the diagonal entry. Since the row Map and column Map
6570  // may differ, we have to compare global row and column
6571  // indices, not local.
6572  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6573  const GO gblColInd = gblRowInd;
6574  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6575 
6576  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6577  allRowMapDiagEntriesInColMap = false;
6578  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6579  }
6580  else {
6581  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6582  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6583  rowInfo.numEntries > 0) {
6584 
6585  auto colInds = this->getLocalKokkosRowView (rowInfo);
6586  const size_t hint = 0; // not needed for this algorithm
6587  const size_t offset =
6588  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6589  lclColInd, hint, sorted);
6590  offsets_h(lclRowInd) = offset;
6591 
6592  if (debug_) {
6593  // Now that we have what we think is an offset, make sure
6594  // that it really does point to the diagonal entry. Offsets
6595  // are _relative_ to each row, not absolute (for the whole
6596  // (local) graph).
6597  Teuchos::ArrayView<const LO> lclColInds;
6598  try {
6599  this->getLocalRowView (lclRowInd, lclColInds);
6600  }
6601  catch (...) {
6602  noOtherWeirdness = false;
6603  }
6604  // Don't continue with error checking if the above failed.
6605  if (noOtherWeirdness) {
6606  const size_t numEnt = lclColInds.size ();
6607  if (offset >= numEnt) {
6608  // Offsets are relative to each row, so this means that
6609  // the offset is out of bounds.
6610  allOffsetsCorrect = false;
6611  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6612  } else {
6613  const LO actualLclColInd = lclColInds[offset];
6614  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6615  if (actualGblColInd != gblColInd) {
6616  allOffsetsCorrect = false;
6617  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6618  }
6619  }
6620  }
6621  } // debug_
6622  }
6623  else { // either row is empty, or something went wrong w/ getRowInfo()
6624  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6625  allDiagEntriesFound = false;
6626  }
6627  } // whether lclColInd is a valid local column index
6628  } // for each local row
6629 
6630  Kokkos::deep_copy (offsets, offsets_h);
6631  } // whether the graph is fill complete
6632 
6633  if (verbose && wrongOffsets.size () != 0) {
6634  std::ostringstream os;
6635  os << *prefix << "Wrong offsets: [";
6636  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6637  os << "(" << wrongOffsets[k].first << ","
6638  << wrongOffsets[k].second << ")";
6639  if (k + 1 < wrongOffsets.size ()) {
6640  os << ", ";
6641  }
6642  }
6643  os << "]" << endl;
6644  std::cerr << os.str();
6645  }
6646 
6647  if (debug_) {
6648  using Teuchos::reduceAll;
6649  using std::endl;
6650  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6651  const bool localSuccess =
6652  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6653  const int numResults = 5;
6654  int lclResults[5];
6655  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6656  lclResults[1] = allDiagEntriesFound ? 1 : 0;
6657  lclResults[2] = allOffsetsCorrect ? 1 : 0;
6658  lclResults[3] = noOtherWeirdness ? 1 : 0;
6659  // min-all-reduce will compute least rank of all the processes
6660  // that didn't succeed.
6661  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6662 
6663  int gblResults[5];
6664  gblResults[0] = 0;
6665  gblResults[1] = 0;
6666  gblResults[2] = 0;
6667  gblResults[3] = 0;
6668  gblResults[4] = 0;
6669  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6670  numResults, lclResults, gblResults);
6671 
6672  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6673  || gblResults[3] != 1) {
6674  std::ostringstream os; // build error message
6675  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6676  "possibly among others): " << endl;
6677  if (gblResults[0] == 0) {
6678  os << " - The column Map does not contain at least one diagonal entry "
6679  "of the graph." << endl;
6680  }
6681  if (gblResults[1] == 0) {
6682  os << " - On one or more processes, some row does not contain a "
6683  "diagonal entry." << endl;
6684  }
6685  if (gblResults[2] == 0) {
6686  os << " - On one or more processes, some offsets are incorrect."
6687  << endl;
6688  }
6689  if (gblResults[3] == 0) {
6690  os << " - One or more processes had some other error."
6691  << endl;
6692  }
6693  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6694  }
6695  } // debug_
6696  }
6697 
6698  namespace { // (anonymous)
6699 
6700  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6701  // below). The point is to avoid the deep copy between the input
6702  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6703  // can't use UVM to avoid the deep copy with CUDA, because the
6704  // ArrayRCP is a host pointer, while the input to the graph's
6705  // getLocalDiagOffsets method is a device pointer. Assigning a
6706  // host pointer to a device pointer is incorrect unless the host
6707  // pointer points to host pinned memory. The goal is to get rid
6708  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6709  // copy for backwards compatibility.
6710  //
6711  // We have to use template magic because
6712  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6713  // if device_type::memory_space is not Kokkos::HostSpace (as is
6714  // the case with CUDA).
6715 
6716  template<class DeviceType,
6717  const bool memSpaceIsHostSpace =
6718  std::is_same<typename DeviceType::memory_space,
6719  Kokkos::HostSpace>::value>
6720  struct HelpGetLocalDiagOffsets {};
6721 
6722  template<class DeviceType>
6723  struct HelpGetLocalDiagOffsets<DeviceType, true> {
6724  typedef DeviceType device_type;
6725  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6726  Kokkos::MemoryUnmanaged> device_offsets_type;
6727  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6728  Kokkos::MemoryUnmanaged> host_offsets_type;
6729 
6730  static device_offsets_type
6731  getDeviceOffsets (const host_offsets_type& hostOffsets)
6732  {
6733  // Host and device are the same; no need to allocate a
6734  // temporary device View.
6735  return hostOffsets;
6736  }
6737 
6738  static void
6739  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6740  const device_offsets_type& /* deviceOffsets */)
6741  { /* copy back not needed; host and device are the same */ }
6742  };
6743 
6744  template<class DeviceType>
6745  struct HelpGetLocalDiagOffsets<DeviceType, false> {
6746  typedef DeviceType device_type;
6747  // We have to do a deep copy, since host memory space != device
6748  // memory space. Thus, the device View is managed (we need to
6749  // allocate a temporary device View).
6750  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6751  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6752  Kokkos::MemoryUnmanaged> host_offsets_type;
6753 
6754  static device_offsets_type
6755  getDeviceOffsets (const host_offsets_type& hostOffsets)
6756  {
6757  // Host memory space != device memory space, so we must
6758  // allocate a temporary device View for the graph.
6759  return device_offsets_type ("offsets", hostOffsets.extent (0));
6760  }
6761 
6762  static void
6763  copyBackIfNeeded (const host_offsets_type& hostOffsets,
6764  const device_offsets_type& deviceOffsets)
6765  {
6766  Kokkos::deep_copy (hostOffsets, deviceOffsets);
6767  }
6768  };
6769  } // namespace (anonymous)
6770 
6771 
6772  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6773  void
6774  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6775  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6776  {
6777  typedef LocalOrdinal LO;
6778  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6779  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6780  (! this->hasColMap (), std::runtime_error,
6781  "The graph does not yet have a column Map.");
6782  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
6783  if (static_cast<LO> (offsets.size ()) != myNumRows) {
6784  // NOTE (mfh 21 Jan 2016) This means that the method does not
6785  // satisfy the strong exception guarantee (no side effects
6786  // unless successful).
6787  offsets.resize (myNumRows);
6788  }
6789 
6790  // mfh 21 Jan 2016: This method unfortunately takes a
6791  // Teuchos::ArrayRCP, which is host memory. The graph wants a
6792  // device pointer. We can't access host memory from the device;
6793  // that's the wrong direction for UVM. (It's the right direction
6794  // for inefficient host pinned memory, but we don't want to use
6795  // that here.) Thus, if device memory space != host memory space,
6796  // we allocate and use a temporary device View to get the offsets.
6797  // If the two spaces are equal, the template magic makes the deep
6798  // copy go away.
6799  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6800  typedef typename helper_type::host_offsets_type host_offsets_type;
6801  // Unmanaged host View that views the output array.
6802  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6803  // Allocate temp device View if host != device, else reuse host array.
6804  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6805  // NOT recursion; this calls the overload that takes a device View.
6806  this->getLocalDiagOffsets (deviceOffsets);
6807  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6808  }
6809 
6810  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6811  bool
6814  return true;
6815  }
6816 
6817  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6818  void
6821  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6822  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6823  const Teuchos::RCP<const map_type>& domainMap,
6824  const Teuchos::RCP<const map_type>& rangeMap,
6825  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6826  {
6831  using Teuchos::ArrayRCP;
6832  using Teuchos::ArrayView;
6833  using Teuchos::Comm;
6834  using Teuchos::ParameterList;
6835  using Teuchos::rcp;
6836  using Teuchos::RCP;
6837 #ifdef HAVE_TPETRA_MMM_TIMINGS
6838  using std::string;
6839  using Teuchos::TimeMonitor;
6840 #endif
6841 
6842  using LO = LocalOrdinal;
6843  using GO = GlobalOrdinal;
6844  using NT = node_type;
6845  using this_type = CrsGraph<LO, GO, NT>;
6846  using ivector_type = Vector<int, LO, GO, NT>;
6847  using packet_type = typename this_type::packet_type;
6848 
6849  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6850 
6851 #ifdef HAVE_TPETRA_MMM_TIMINGS
6852  string label;
6853  if(!params.is_null()) label = params->get("Timer Label", label);
6854  string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6855  RCP<TimeMonitor> MM =
6856  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6857 #endif
6858 
6859  // Make sure that the input argument rowTransfer is either an
6860  // Import or an Export. Import and Export are the only two
6861  // subclasses of Transfer that we defined, but users might
6862  // (unwisely, for now at least) decide to implement their own
6863  // subclasses. Exclude this possibility.
6864  const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6865  const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6866  TEUCHOS_TEST_FOR_EXCEPTION(
6867  xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6868  prefix << "The 'rowTransfer' input argument must be either an Import or "
6869  "an Export, and its template parameters must match the corresponding "
6870  "template parameters of the CrsGraph.");
6871 
6872  // Make sure that the input argument domainTransfer is either an
6873  // Import or an Export. Import and Export are the only two
6874  // subclasses of Transfer that we defined, but users might
6875  // (unwisely, for now at least) decide to implement their own
6876  // subclasses. Exclude this possibility.
6877  Teuchos::RCP<const import_type> xferDomainAsImport =
6878  Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6879  Teuchos::RCP<const export_type> xferDomainAsExport =
6880  Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6881 
6882  if(! domainTransfer.is_null()) {
6883 
6884  TEUCHOS_TEST_FOR_EXCEPTION(
6885  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6886  prefix << "The 'domainTransfer' input argument must be either an "
6887  "Import or an Export, and its template parameters must match the "
6888  "corresponding template parameters of the CrsGraph.");
6889 
6890  TEUCHOS_TEST_FOR_EXCEPTION(
6891  ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
6892  (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
6893  ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
6894  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6895  "must be of the same type (either Import or Export).");
6896 
6897  TEUCHOS_TEST_FOR_EXCEPTION(
6898  ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
6899  (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
6900  ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
6901  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6902  "must be of the same type (either Import or Export).");
6903 
6904  } // domainTransfer != null
6905 
6906 
6907  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6908  // if the source Map is not distributed but the target Map is?
6909  const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6910 
6911  //
6912  // Get the caller's parameters
6913  //
6914 
6915  bool reverseMode = false; // Are we in reverse mode?
6916  bool restrictComm = false; // Do we need to restrict the communicator?
6917  RCP<ParameterList> graphparams; // parameters for the destination graph
6918  if (! params.is_null()) {
6919  reverseMode = params->get("Reverse Mode", reverseMode);
6920  restrictComm = params->get("Restrict Communicator", restrictComm);
6921  graphparams = sublist(params, "CrsGraph");
6922  }
6923 
6924  // Get the new domain and range Maps. We need some of them for error
6925  // checking, now that we have the reverseMode parameter.
6926  RCP<const map_type> MyRowMap = reverseMode ?
6927  rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6928  RCP<const map_type> MyColMap; // create this below
6929  RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
6930  RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
6931  RCP<const map_type> BaseRowMap = MyRowMap;
6932  RCP<const map_type> BaseDomainMap = MyDomainMap;
6933 
6934  // If the user gave us a nonnull destGraph, then check whether it's
6935  // "pristine." That means that it has no entries.
6936  //
6937  // FIXME (mfh 15 May 2014) If this is not true on all processes,
6938  // then this exception test may hang. It would be better to
6939  // forward an error flag to the next communication phase.
6940  if (! destGraph.is_null()) {
6941  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6942  // whether a graph or matrix has no entries on the calling
6943  // process, is that it is neither locally nor globally indexed.
6944  // This may change eventually with the Kokkos refactor version
6945  // of Tpetra, so it would be better just to check the quantity
6946  // of interest directly. Note that with the Kokkos refactor
6947  // version of Tpetra, asking for the total number of entries in
6948  // a graph or matrix that is not fill complete might require
6949  // computation (kernel launch), since it is not thread scalable
6950  // to update a count every time an entry is inserted.
6951  const bool NewFlag =
6952  ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
6953  TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
6954  prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6955  "if its graph is empty (neither locally nor globally indexed).");
6956 
6957  // FIXME (mfh 15 May 2014) At some point, we want to change
6958  // graphs and matrices so that their DistObject Map
6959  // (this->getMap()) may differ from their row Map. This will
6960  // make redistribution for 2-D distributions more efficient. I
6961  // hesitate to change this check, because I'm not sure how much
6962  // the code here depends on getMap() and getRowMap() being the
6963  // same.
6964  TEUCHOS_TEST_FOR_EXCEPTION(
6965  ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6966  prefix << "The (row) Map of the input argument 'destGraph' is not the "
6967  "same as the (row) Map specified by the input argument 'rowTransfer'.");
6968 
6969  TEUCHOS_TEST_FOR_EXCEPTION(
6970  ! destGraph->checkSizes(*this), std::invalid_argument,
6971  prefix << "You provided a nonnull destination graph, but checkSizes() "
6972  "indicates that it is not a legal legal target for redistribution from "
6973  "the source graph (*this). This may mean that they do not have the "
6974  "same dimensions.");
6975  }
6976 
6977  // If forward mode (the default), then *this's (row) Map must be
6978  // the same as the source Map of the Transfer. If reverse mode,
6979  // then *this's (row) Map must be the same as the target Map of
6980  // the Transfer.
6981  //
6982  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6983  // and matrices so that their DistObject Map (this->getMap()) may
6984  // differ from their row Map. This will make redistribution for
6985  // 2-D distributions more efficient. I hesitate to change this
6986  // check, because I'm not sure how much the code here depends on
6987  // getMap() and getRowMap() being the same.
6988  TEUCHOS_TEST_FOR_EXCEPTION(
6989  ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6990  std::invalid_argument, prefix <<
6991  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6992 
6993  TEUCHOS_TEST_FOR_EXCEPTION(
6994  ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6995  std::invalid_argument, prefix <<
6996  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6997 
6998  // checks for domainTransfer
6999  TEUCHOS_TEST_FOR_EXCEPTION(
7000  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7001  std::invalid_argument,
7002  prefix << "The target map of the 'domainTransfer' input argument must be "
7003  "the same as the rebalanced domain map 'domainMap'");
7004 
7005  TEUCHOS_TEST_FOR_EXCEPTION(
7006  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7007  std::invalid_argument,
7008  prefix << "The source map of the 'domainTransfer' input argument must be "
7009  "the same as the rebalanced domain map 'domainMap'");
7010 
7011  // The basic algorithm here is:
7012  //
7013  // 1. Call the moral equivalent of "distor.do" to handle the import.
7014  // 2. Copy all the Imported and Copy/Permuted data into the raw
7015  // CrsGraph pointers, still using GIDs.
7016  // 3. Call an optimized version of MakeColMap that avoids the
7017  // Directory lookups (since the importer knows who owns all the
7018  // GIDs) AND reindexes to LIDs.
7019  // 4. Call expertStaticFillComplete()
7020 
7021  // Get information from the Importer
7022  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7023  ArrayView<const LO> ExportLIDs = reverseMode ?
7024  rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
7025  ArrayView<const LO> RemoteLIDs = reverseMode ?
7026  rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
7027  ArrayView<const LO> PermuteToLIDs = reverseMode ?
7028  rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
7029  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7030  rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
7031  Distributor& Distor = rowTransfer.getDistributor();
7032 
7033  // Owning PIDs
7034  Teuchos::Array<int> SourcePids;
7035  Teuchos::Array<int> TargetPids;
7036  int MyPID = getComm()->getRank();
7037 
7038  // Temp variables for sub-communicators
7039  RCP<const map_type> ReducedRowMap, ReducedColMap,
7040  ReducedDomainMap, ReducedRangeMap;
7041  RCP<const Comm<int> > ReducedComm;
7042 
7043  // If the user gave us a null destGraph, then construct the new
7044  // destination graph. We will replace its column Map later.
7045  if (destGraph.is_null()) {
7046  destGraph = rcp(new this_type(MyRowMap, 0, StaticProfile, graphparams));
7047  }
7048 
7049  /***************************************************/
7050  /***** 1) First communicator restriction phase ****/
7051  /***************************************************/
7052  if (restrictComm) {
7053  ReducedRowMap = MyRowMap->removeEmptyProcesses();
7054  ReducedComm = ReducedRowMap.is_null() ?
7055  Teuchos::null :
7056  ReducedRowMap->getComm();
7057  destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
7058 
7059  ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
7060  ReducedRowMap :
7061  MyDomainMap->replaceCommWithSubset(ReducedComm);
7062  ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
7063  ReducedRowMap :
7064  MyRangeMap->replaceCommWithSubset(ReducedComm);
7065 
7066  // Reset the "my" maps
7067  MyRowMap = ReducedRowMap;
7068  MyDomainMap = ReducedDomainMap;
7069  MyRangeMap = ReducedRangeMap;
7070 
7071  // Update my PID, if we've restricted the communicator
7072  if (! ReducedComm.is_null()) {
7073  MyPID = ReducedComm->getRank();
7074  }
7075  else {
7076  MyPID = -2; // For debugging
7077  }
7078  }
7079  else {
7080  ReducedComm = MyRowMap->getComm();
7081  }
7082 
7083  /***************************************************/
7084  /***** 2) From Tpera::DistObject::doTransfer() ****/
7085  /***************************************************/
7086 #ifdef HAVE_TPETRA_MMM_TIMINGS
7087  MM = Teuchos::null;
7088  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
7089 #endif
7090  // Get the owning PIDs
7091  RCP<const import_type> MyImporter = getImporter();
7092 
7093  // check whether domain maps of source graph and base domain map is the same
7094  bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
7095 
7096  if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
7097  // Same domain map as source graph
7098  //
7099  // NOTE: This won't work for restrictComm (because the Import
7100  // doesn't know the restricted PIDs), though writing an
7101  // optimized version for that case would be easy (Import an
7102  // IntVector of the new PIDs). Might want to add this later.
7103  Import_Util::getPids(*MyImporter, SourcePids, false);
7104  }
7105  else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
7106  // Same domain map as source graph (restricted communicator)
7107  // We need one import from the domain to the column map
7108  ivector_type SourceDomain_pids(getDomainMap(),true);
7109  ivector_type SourceCol_pids(getColMap());
7110  // SourceDomain_pids contains the restricted pids
7111  SourceDomain_pids.putScalar(MyPID);
7112 
7113  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7114  SourcePids.resize(getColMap()->getNodeNumElements());
7115  SourceCol_pids.get1dCopy(SourcePids());
7116  }
7117  else if (MyImporter.is_null() && bSameDomainMap) {
7118  // Graph has no off-process entries
7119  SourcePids.resize(getColMap()->getNodeNumElements());
7120  SourcePids.assign(getColMap()->getNodeNumElements(), MyPID);
7121  }
7122  else if ( ! MyImporter.is_null() &&
7123  ! domainTransfer.is_null() ) {
7124  // general implementation for rectangular matrices with
7125  // domain map different than SourceGraph domain map.
7126  // User has to provide a DomainTransfer object. We need
7127  // to communications (import/export)
7128 
7129  // TargetDomain_pids lives on the rebalanced new domain map
7130  ivector_type TargetDomain_pids(domainMap);
7131  TargetDomain_pids.putScalar(MyPID);
7132 
7133  // SourceDomain_pids lives on the non-rebalanced old domain map
7134  ivector_type SourceDomain_pids(getDomainMap());
7135 
7136  // SourceCol_pids lives on the non-rebalanced old column map
7137  ivector_type SourceCol_pids(getColMap());
7138 
7139  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
7140  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7141  }
7142  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
7143  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7144  }
7145  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
7146  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7147  }
7148  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
7149  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7150  }
7151  else {
7152  TEUCHOS_TEST_FOR_EXCEPTION(
7153  true, std::logic_error,
7154  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7155  }
7156  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7157  SourcePids.resize(getColMap()->getNodeNumElements());
7158  SourceCol_pids.get1dCopy(SourcePids());
7159  }
7160  else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
7161  getDomainMap()->isSameAs(*getRowMap())) {
7162  // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
7163  ivector_type TargetRow_pids(domainMap);
7164  ivector_type SourceRow_pids(getRowMap());
7165  ivector_type SourceCol_pids(getColMap());
7166 
7167  TargetRow_pids.putScalar(MyPID);
7168  if (! reverseMode && xferAsImport != nullptr) {
7169  SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
7170  }
7171  else if (reverseMode && xferAsExport != nullptr) {
7172  SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
7173  }
7174  else if (! reverseMode && xferAsExport != nullptr) {
7175  SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
7176  }
7177  else if (reverseMode && xferAsImport != nullptr) {
7178  SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
7179  }
7180  else {
7181  TEUCHOS_TEST_FOR_EXCEPTION(
7182  true, std::logic_error,
7183  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7184  }
7185  SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
7186  SourcePids.resize(getColMap()->getNodeNumElements());
7187  SourceCol_pids.get1dCopy(SourcePids());
7188  }
7189  else {
7190  TEUCHOS_TEST_FOR_EXCEPTION(
7191  true, std::invalid_argument,
7192  prefix << "This method only allows either domainMap == getDomainMap(), "
7193  "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7194  }
7195 
7196  // Tpetra-specific stuff
7197  size_t constantNumPackets = destGraph->constantNumberOfPackets();
7198  if (constantNumPackets == 0) {
7199  destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7200  RemoteLIDs.size());
7201  }
7202  else {
7203  // There are a constant number of packets per element. We
7204  // already know (from the number of "remote" (incoming)
7205  // elements) how many incoming elements we expect, so we can
7206  // resize the buffer accordingly.
7207  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7208  destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7209  }
7210 
7211  {
7212  // packAndPrepare* methods modify numExportPacketsPerLID_.
7213  destGraph->numExportPacketsPerLID_.modify_host();
7214  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7215  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7216 
7217  // Pack & Prepare w/ owning PIDs
7218  packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7219  numExportPacketsPerLID, ExportLIDs,
7220  SourcePids, constantNumPackets, Distor);
7221  }
7222 
7223  // Do the exchange of remote data.
7224 #ifdef HAVE_TPETRA_MMM_TIMINGS
7225  MM = Teuchos::null;
7226  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7227 #endif
7228 
7229  if (communication_needed) {
7230  if (reverseMode) {
7231  if (constantNumPackets == 0) { // variable number of packets per LID
7232  // Make sure that host has the latest version, since we're
7233  // using the version on host. If host has the latest
7234  // version, syncing to host does nothing.
7235  destGraph->numExportPacketsPerLID_.sync_host();
7236  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7237  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7238  destGraph->numImportPacketsPerLID_.sync_host();
7239  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7240  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7241  Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1,
7242  numImportPacketsPerLID);
7243  size_t totalImportPackets = 0;
7244  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7245  totalImportPackets += numImportPacketsPerLID[i];
7246  }
7247 
7248  // Reallocation MUST go before setting the modified flag,
7249  // because it may clear out the flags.
7250  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7251  destGraph->imports_.modify_host();
7252  Teuchos::ArrayView<packet_type> hostImports =
7253  getArrayViewFromDualView(destGraph->imports_);
7254  // This is a legacy host pack/unpack path, so use the host
7255  // version of exports_.
7256  destGraph->exports_.sync_host();
7257  Teuchos::ArrayView<const packet_type> hostExports =
7258  getArrayViewFromDualView(destGraph->exports_);
7259  Distor.doReversePostsAndWaits(hostExports,
7260  numExportPacketsPerLID,
7261  hostImports,
7262  numImportPacketsPerLID);
7263  }
7264  else { // constant number of packets per LI
7265  destGraph->imports_.modify_host();
7266  Teuchos::ArrayView<packet_type> hostImports =
7267  getArrayViewFromDualView(destGraph->imports_);
7268  // This is a legacy host pack/unpack path, so use the host
7269  // version of exports_.
7270  destGraph->exports_.sync_host();
7271  Teuchos::ArrayView<const packet_type> hostExports =
7272  getArrayViewFromDualView(destGraph->exports_);
7273  Distor.doReversePostsAndWaits(hostExports,
7274  constantNumPackets,
7275  hostImports);
7276  }
7277  }
7278  else { // forward mode (the default)
7279  if (constantNumPackets == 0) { // variable number of packets per LID
7280  // Make sure that host has the latest version, since we're
7281  // using the version on host. If host has the latest
7282  // version, syncing to host does nothing.
7283  destGraph->numExportPacketsPerLID_.sync_host();
7284  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7285  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7286  destGraph->numImportPacketsPerLID_.sync_host();
7287  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7288  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7289  Distor.doPostsAndWaits(numExportPacketsPerLID, 1,
7290  numImportPacketsPerLID);
7291  size_t totalImportPackets = 0;
7292  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7293  totalImportPackets += numImportPacketsPerLID[i];
7294  }
7295 
7296  // Reallocation MUST go before setting the modified flag,
7297  // because it may clear out the flags.
7298  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7299  destGraph->imports_.modify_host();
7300  Teuchos::ArrayView<packet_type> hostImports =
7301  getArrayViewFromDualView(destGraph->imports_);
7302  // This is a legacy host pack/unpack path, so use the host
7303  // version of exports_.
7304  destGraph->exports_.sync_host();
7305  Teuchos::ArrayView<const packet_type> hostExports =
7306  getArrayViewFromDualView(destGraph->exports_);
7307  Distor.doPostsAndWaits(hostExports,
7308  numExportPacketsPerLID,
7309  hostImports,
7310  numImportPacketsPerLID);
7311  }
7312  else { // constant number of packets per LID
7313  destGraph->imports_.modify_host();
7314  Teuchos::ArrayView<packet_type> hostImports =
7315  getArrayViewFromDualView(destGraph->imports_);
7316  // This is a legacy host pack/unpack path, so use the host
7317  // version of exports_.
7318  destGraph->exports_.sync_host();
7319  Teuchos::ArrayView<const packet_type> hostExports =
7320  getArrayViewFromDualView(destGraph->exports_);
7321  Distor.doPostsAndWaits(hostExports,
7322  constantNumPackets,
7323  hostImports);
7324  }
7325  }
7326  }
7327 
7328  /*********************************************************************/
7329  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7330  /*********************************************************************/
7331 
7332 #ifdef HAVE_TPETRA_MMM_TIMINGS
7333  MM = Teuchos::null;
7334  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7335 #endif
7336 
7337  // Backwards compatibility measure. We'll use this again below.
7338  destGraph->numImportPacketsPerLID_.sync_host();
7339  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7340  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7341  destGraph->imports_.sync_host();
7342  Teuchos::ArrayView<const packet_type> hostImports =
7343  getArrayViewFromDualView(destGraph->imports_);
7344  size_t mynnz =
7345  unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7346  numImportPacketsPerLID,
7347  constantNumPackets, Distor, INSERT,
7348  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7349  size_t N = BaseRowMap->getNodeNumElements();
7350 
7351  // Allocations
7352  ArrayRCP<size_t> CSR_rowptr(N+1);
7353  ArrayRCP<GO> CSR_colind_GID;
7354  ArrayRCP<LO> CSR_colind_LID;
7355  CSR_colind_GID.resize(mynnz);
7356 
7357  // If LO and GO are the same, we can reuse memory when
7358  // converting the column indices from global to local indices.
7359  if (typeid(LO) == typeid(GO)) {
7360  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7361  }
7362  else {
7363  CSR_colind_LID.resize(mynnz);
7364  }
7365 
7366  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7367  // unpackAndCombine method on a "CrsArrays" object? This passing
7368  // in a huge list of arrays is icky. Can't we have a bit of an
7369  // abstraction? Implementing a concrete DistObject subclass only
7370  // takes five methods.
7371  unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7372  numImportPacketsPerLID, constantNumPackets,
7373  Distor, INSERT, NumSameIDs, PermuteToLIDs,
7374  PermuteFromLIDs, N, mynnz, MyPID,
7375  CSR_rowptr(), CSR_colind_GID(),
7376  SourcePids(), TargetPids);
7377 
7378  /**************************************************************/
7379  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7380  /**************************************************************/
7381 #ifdef HAVE_TPETRA_MMM_TIMINGS
7382  MM = Teuchos::null;
7383  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7384 #endif
7385  // Call an optimized version of makeColMap that avoids the
7386  // Directory lookups (since the Import object knows who owns all
7387  // the GIDs).
7388  Teuchos::Array<int> RemotePids;
7389  Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7390  CSR_colind_LID(),
7391  CSR_colind_GID(),
7392  BaseDomainMap,
7393  TargetPids, RemotePids,
7394  MyColMap);
7395 
7396  /*******************************************************/
7397  /**** 4) Second communicator restriction phase ****/
7398  /*******************************************************/
7399  if (restrictComm) {
7400  ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7401  ReducedRowMap :
7402  MyColMap->replaceCommWithSubset(ReducedComm);
7403  MyColMap = ReducedColMap; // Reset the "my" maps
7404  }
7405 
7406  // Replace the col map
7407  destGraph->replaceColMap(MyColMap);
7408 
7409  // Short circuit if the processor is no longer in the communicator
7410  //
7411  // NOTE: Epetra replaces modifies all "removed" processes so they
7412  // have a dummy (serial) Map that doesn't touch the original
7413  // communicator. Duplicating that here might be a good idea.
7414  if (ReducedComm.is_null()) {
7415  return;
7416  }
7417 
7418  /***************************************************/
7419  /**** 5) Sort ****/
7420  /***************************************************/
7421  if ((! reverseMode && xferAsImport != nullptr) ||
7422  (reverseMode && xferAsExport != nullptr)) {
7423  Import_Util::sortCrsEntries(CSR_rowptr(),
7424  CSR_colind_LID());
7425  }
7426  else if ((! reverseMode && xferAsExport != nullptr) ||
7427  (reverseMode && xferAsImport != nullptr)) {
7428  Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7429  CSR_colind_LID());
7430  if (CSR_rowptr[N] != mynnz) {
7431  CSR_colind_LID.resize(CSR_rowptr[N]);
7432  }
7433  }
7434  else {
7435  TEUCHOS_TEST_FOR_EXCEPTION(
7436  true, std::logic_error,
7437  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7438  }
7439  /***************************************************/
7440  /**** 6) Reset the colmap and the arrays ****/
7441  /***************************************************/
7442 
7443  // Call constructor for the new graph (restricted as needed)
7444  //
7445  destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7446 
7447  /***************************************************/
7448  /**** 7) Build Importer & Call ESFC ****/
7449  /***************************************************/
7450  // Pre-build the importer using the existing PIDs
7451  Teuchos::ParameterList esfc_params;
7452 #ifdef HAVE_TPETRA_MMM_TIMINGS
7453  MM = Teuchos::null;
7454  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7455 #endif
7456  RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7457 #ifdef HAVE_TPETRA_MMM_TIMINGS
7458  MM = Teuchos::null;
7459  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7460 
7461  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7462 #endif
7463  if(!params.is_null())
7464  esfc_params.set("compute global constants",params->get("compute global constants",true));
7465 
7466  destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7467  MyImport, Teuchos::null, rcp(&esfc_params,false));
7468 
7469  }
7470 
7471  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7472  void
7473  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
7474  importAndFillComplete(Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& destGraph,
7475  const import_type& importer,
7476  const Teuchos::RCP<const map_type>& domainMap,
7477  const Teuchos::RCP<const map_type>& rangeMap,
7478  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7479  {
7480  transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7481  }
7482 
7483  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7484  void
7487  const import_type& rowImporter,
7488  const import_type& domainImporter,
7489  const Teuchos::RCP<const map_type>& domainMap,
7490  const Teuchos::RCP<const map_type>& rangeMap,
7491  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7492  {
7493  transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7494  }
7495 
7496  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7497  void
7500  const export_type& exporter,
7501  const Teuchos::RCP<const map_type>& domainMap,
7502  const Teuchos::RCP<const map_type>& rangeMap,
7503  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7504  {
7505  transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7506  }
7507 
7508  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7509  void
7512  const export_type& rowExporter,
7513  const export_type& domainExporter,
7514  const Teuchos::RCP<const map_type>& domainMap,
7515  const Teuchos::RCP<const map_type>& rangeMap,
7516  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7517  {
7518  transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7519  }
7520 
7521 
7522  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7523  void
7526  {
7527  std::swap(graph.rowMap_, this->rowMap_);
7528  std::swap(graph.colMap_, this->colMap_);
7529  std::swap(graph.rangeMap_, this->rangeMap_);
7530  std::swap(graph.domainMap_, this->domainMap_);
7531 
7532  std::swap(graph.importer_, this->importer_);
7533  std::swap(graph.exporter_, this->exporter_);
7534 
7535  std::swap(graph.lclGraph_, this->lclGraph_);
7536 
7537  std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7538 
7539  std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7540  std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7541 
7542  std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7543 
7544  std::swap(graph.k_rowPtrs_, this->k_rowPtrs_);
7545 
7546  std::swap(graph.k_lclInds1D_, this->k_lclInds1D_);
7547  std::swap(graph.k_gblInds1D_, this->k_gblInds1D_);
7548 
7549  std::swap(graph.storageStatus_, this->storageStatus_);
7550 
7551  std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7552  std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7553  std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7554  std::swap(graph.fillComplete_, this->fillComplete_);
7555  std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7556  std::swap(graph.noRedundancies_, this->noRedundancies_);
7557  std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7558  std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7559 
7560  std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7561 
7562  std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7563  std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7564  std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7565  }
7566 
7567 
7568  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7569  bool
7572  {
7573  auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7574  bool output = true;
7575  output = m1.size() == m2.size() ? output : false;
7576  for(auto & it_m: m1)
7577  {
7578  size_t key = it_m.first;
7579  output = m2.find(key) != m2.end() ? output : false;
7580  if(output)
7581  {
7582  auto v1 = m1.find(key)->second;
7583  auto v2 = m2.find(key)->second;
7584  std::sort(v1.begin(), v1.end());
7585  std::sort(v2.begin(), v2.end());
7586 
7587  output = v1.size() == v2.size() ? output : false;
7588  for(size_t i=0; output && i<v1.size(); i++)
7589  {
7590  output = v1[i]==v2[i] ? output : false;
7591  }
7592  }
7593  }
7594  return output;
7595  };
7596 
7597  bool output = true;
7598 
7599  output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7600  output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7601  output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7602  output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7603 
7604  output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7605 
7606  output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7607  output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7608 
7609  output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7610 
7611  output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7612 
7613  output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7614  output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7615  output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7616  output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7617  output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7618  output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7619  output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7620  output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7621  output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;
7622 
7623  // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7624  // nonlocals_ isa std::map<GO, std::vector<GO> >
7625  output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7626 
7627  // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
7628  // - since this is a HostMirror type, it should be in host memory already
7629  output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7630  if(output && this->k_numAllocPerRow_.extent(0) > 0)
7631  {
7632  for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
7633  output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7634  }
7635 
7636  // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
7637  // - since this is a HostMirror type, it should be in host memory already
7638  output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7639  if(output && this->k_numRowEntries_.extent(0) > 0)
7640  {
7641  for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7642  output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7643  }
7644 
7645  // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7646  output = this->k_rowPtrs_.extent(0) == graph.k_rowPtrs_.extent(0) ? output : false;
7647  if(output && this->k_rowPtrs_.extent(0) > 0)
7648  {
7649  typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrs_host_this = Kokkos::create_mirror_view(this->k_rowPtrs_);
7650  typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrs_host_graph= Kokkos::create_mirror_view(graph.k_rowPtrs_);
7651  Kokkos::deep_copy(k_rowPtrs_host_this, this->k_rowPtrs_);
7652  Kokkos::deep_copy(k_rowPtrs_host_graph, graph.k_rowPtrs_);
7653  for(size_t i=0; output && i<k_rowPtrs_host_this.extent(0); i++)
7654  output = k_rowPtrs_host_this(i) == k_rowPtrs_host_graph(i) ? output : false;
7655  }
7656 
7657  // Compare k_lclInds1D_ isa Kokkos::View<LocalOrdinal*, ...>
7658  output = this->k_lclInds1D_.extent(0) == graph.k_lclInds1D_.extent(0) ? output : false;
7659  if(output && this->k_lclInds1D_.extent(0) > 0)
7660  {
7661  typename local_graph_type::entries_type::non_const_type::HostMirror k_lclInds1D_host_this = Kokkos::create_mirror_view(this->k_lclInds1D_);
7662  typename local_graph_type::entries_type::non_const_type::HostMirror k_lclInds1D_host_graph= Kokkos::create_mirror_view(graph.k_lclInds1D_);
7663  Kokkos::deep_copy(k_lclInds1D_host_this, this->k_lclInds1D_);
7664  Kokkos::deep_copy(k_lclInds1D_host_graph, graph.k_lclInds1D_);
7665  for(size_t i=0; output && i < k_lclInds1D_host_this.extent(0); i++)
7666  output = k_lclInds1D_host_this(i) == k_lclInds1D_host_graph(i) ? output : false;
7667  }
7668 
7669  // Compare k_gblInds1D_ isa Kokkos::View<GlobalOrdinal*, ...>
7670  output = this->k_gblInds1D_.extent(0) == graph.k_gblInds1D_.extent(0) ? output : false;
7671  if(output && this->k_gblInds1D_.extent(0) > 0)
7672  {
7673  typename t_GlobalOrdinal_1D::HostMirror k_gblInds1D_host_this = Kokkos::create_mirror_view(this->k_gblInds1D_);
7674  typename t_GlobalOrdinal_1D::HostMirror k_gblInds1D_host_graph = Kokkos::create_mirror_view(graph.k_gblInds1D_);
7675  Kokkos::deep_copy(k_gblInds1D_host_this, this->k_gblInds1D_);
7676  Kokkos::deep_copy(k_gblInds1D_host_graph, graph.k_gblInds1D_);
7677  for(size_t i=0; output && i<k_gblInds1D_host_this.extent(0); i++)
7678  output = k_gblInds1D_host_this(i) == k_gblInds1D_host_graph(i) ? output : false;
7679  }
7680 
7681  // Check lclGraph_ // isa Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7682  // Kokkos::StaticCrsGraph has 3 data members in it:
7683  // Kokkos::View<size_type*, ...> row_map (local_graph_type::row_map_type)
7684  // Kokkos::View<data_type*, ...> entries (local_graph_type::entries_type)
7685  // Kokkos::View<size_type*, ...> row_block_offsets (local_graph_type::row_block_type)
7686  // There is currently no Kokkos::StaticCrsGraph comparison function that's built-in, so we will just compare
7687  // the three data items here. This can be replaced if Kokkos ever puts in its own comparison routine.
7688  output = this->lclGraph_.row_map.extent(0) == graph.lclGraph_.row_map.extent(0) ? output : false;
7689  if(output && this->lclGraph_.row_map.extent(0) > 0)
7690  {
7691  typename local_graph_type::row_map_type::HostMirror lclGraph_rowmap_host_this = Kokkos::create_mirror_view(this->lclGraph_.row_map);
7692  typename local_graph_type::row_map_type::HostMirror lclGraph_rowmap_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.row_map);
7693  Kokkos::deep_copy(lclGraph_rowmap_host_this, this->lclGraph_.row_map);
7694  Kokkos::deep_copy(lclGraph_rowmap_host_graph, graph.lclGraph_.row_map);
7695  for(size_t i=0; output && i<lclGraph_rowmap_host_this.extent(0); i++)
7696  output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i) ? output : false;
7697  }
7698 
7699  output = this->lclGraph_.entries.extent(0) == graph.lclGraph_.entries.extent(0) ? output : false;
7700  if(output && this->lclGraph_.entries.extent(0) > 0)
7701  {
7702  typename local_graph_type::entries_type::HostMirror lclGraph_entries_host_this = Kokkos::create_mirror_view(this->lclGraph_.entries);
7703  typename local_graph_type::entries_type::HostMirror lclGraph_entries_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.entries);
7704  Kokkos::deep_copy(lclGraph_entries_host_this, this->lclGraph_.entries);
7705  Kokkos::deep_copy(lclGraph_entries_host_graph, graph.lclGraph_.entries);
7706  for(size_t i=0; output && i<lclGraph_entries_host_this.extent(0); i++)
7707  output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i) ? output : false;
7708  }
7709 
7710  output = this->lclGraph_.row_block_offsets.extent(0) == graph.lclGraph_.row_block_offsets.extent(0) ? output : false;
7711  if(output && this->lclGraph_.row_block_offsets.extent(0) > 0)
7712  {
7713  typename local_graph_type::row_block_type::HostMirror lclGraph_rbo_host_this = Kokkos::create_mirror_view(this->lclGraph_.row_block_offsets);
7714  typename local_graph_type::row_block_type::HostMirror lclGraph_rbo_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.row_block_offsets);
7715  Kokkos::deep_copy(lclGraph_rbo_host_this, this->lclGraph_.row_block_offsets);
7716  Kokkos::deep_copy(lclGraph_rbo_host_graph, graph.lclGraph_.row_block_offsets);
7717  for(size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7718  output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i) ? output : false;
7719  }
7720 
7721  // For the Importer and Exporter, we shouldn't need to explicitly check them since
7722  // they will be consistent with the maps.
7723  // Note: importer_ isa Teuchos::RCP<const import_type>
7724  // exporter_ isa Teuchos::RCP<const export_type>
7725 
7726  return output;
7727  }
7728 
7729 
7730 
7731 } // namespace Tpetra
7732 
7733 //
7734 // Explicit instantiation macros
7735 //
7736 // Must be expanded from within the Tpetra namespace!
7737 //
7738 
7739 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7740  template<> \
7741  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7742  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7743  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7744  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7745  CrsGraph<LO,GO,NODE>::node_type>& importer, \
7746  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7747  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7748  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7749  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7750  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7751  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7752  const Teuchos::RCP<Teuchos::ParameterList>& params);
7753 
7754 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7755  template<> \
7756  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7757  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7758  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7759  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7760  CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7761  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7762  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7763  CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7764  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7765  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7766  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7767  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7768  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7769  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7770  const Teuchos::RCP<Teuchos::ParameterList>& params);
7771 
7772 
7773 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7774  template<> \
7775  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7776  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7777  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7778  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7779  CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7780  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7781  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7782  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7783  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7784  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7785  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7786  const Teuchos::RCP<Teuchos::ParameterList>& params);
7787 
7788 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7789  template<> \
7790  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7791  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7792  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7793  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7794  CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7795  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7796  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7797  CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7798  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7799  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7800  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7801  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7802  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7803  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7804  const Teuchos::RCP<Teuchos::ParameterList>& params);
7805 
7806 
7807 #define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7808  template class CrsGraph<LO, GO, NODE>; \
7809  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7810  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7811  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7812  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
7813 
7814 
7815 #endif // TPETRA_CRSGRAPH_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
GO global_ordinal_type
The type of the graph&#39;s global indices.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type > local_graph_type
The type of the part of the sparse graph on each MPI process.
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
bool haveGlobalConstants_
Whether all processes have computed global constants.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Kokkos::View< global_ordinal_type *, execution_space > t_GlobalOrdinal_1D
Type of the k_gblInds1D_ array of global column indices.
t_GlobalOrdinal_1D k_gblInds1D_
Global column indices for all rows.
Declaration of a function that prints strings from each process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object&#39;s Map.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
static bool debug()
Whether Tpetra is in debug mode.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
int local_ordinal_type
Default value of Scalar template parameter.
bool haveLocalConstants_
Whether this process has computed local constants.
Implementation details of Tpetra.
size_t global_size_t
Global size_t object.
node_type node_type
This class&#39; Kokkos Node type.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Insert new values that don&#39;t currently exist.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph&#39;s global column indices into local column indices.
Functions for manipulating CRS arrays.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
local_graph_type::entries_type::non_const_type k_lclInds1D_
Local column indices for all rows.
local_graph_type lclGraph_
Local graph; only initialized after first fillComplete() call.
Sets up and executes a communication plan for a Tpetra DistObject.
local_graph_type::row_map_type::const_type k_rowPtrs_
Row offsets for "1-D" storage.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
static bool verbose()
Whether Tpetra is in verbose mode.
Kokkos::View< const size_t *, execution_space >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
CombineMode
Rule for combining data in an Import or Export.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
typename device_type::execution_space execution_space
This class&#39; Kokkos execution space.
Utility functions for packing and unpacking sparse matrix entries.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Abstract base class for objects that can be the source of an Import or Export operation.
LO local_ordinal_type
The type of the graph&#39;s local indices.
global_size_t globalNumEntries_
Global number of entries in the graph.
::Kokkos::Compat::KokkosDeviceWrapperNode< execution_space > node_type
Default value of Node template parameter.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Details::EStorageStatus storageStatus_
Status of the graph&#39;s storage, when not in a fill-complete state.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
typename node_type ::device_type device_type
This class&#39; Kokkos device type.
A distributed dense vector.
Stand-alone utility functions and macros.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids, Distributor &distor)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.