MueLu  Version of the Day
MueLu_NotayAggregationFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
47 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
48 
49 #include <Xpetra_Map.hpp>
50 #include <Xpetra_Vector.hpp>
51 #include <Xpetra_MultiVectorFactory.hpp>
52 #include <Xpetra_MapFactory.hpp>
53 #include <Xpetra_VectorFactory.hpp>
54 
55 #include "KokkosKernels_Handle.hpp"
56 #include "KokkosSparse_spgemm.hpp"
57 #include "KokkosSparse_spmv.hpp"
58 
60 
61 #include "MueLu_Aggregates.hpp"
62 #include "MueLu_GraphBase.hpp"
63 #include "MueLu_Level.hpp"
64 #include "MueLu_MasterList.hpp"
65 #include "MueLu_Monitor.hpp"
66 #include "MueLu_Types.hpp"
67 #include "MueLu_Utilities.hpp"
68 
69 #include "MueLu_Utilities_kokkos.hpp"
70 
71 namespace MueLu {
72 
73  namespace NotayUtils {
74  template <class LocalOrdinal>
76  return min + as<LocalOrdinal>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
77  }
78 
79  template <class LocalOrdinal>
80  void RandomReorder(Teuchos::Array<LocalOrdinal> & list) {
81  typedef LocalOrdinal LO;
82  LO n = Teuchos::as<LO>(list.size());
83  for(LO i = 0; i < n-1; i++)
84  std::swap(list[i], list[RandomOrdinal(i,n-1)]);
85  }
86  }
87 
88  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
90  RCP<ParameterList> validParamList = rcp(new ParameterList());
91 
92 
93 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
94  SET_VALID_ENTRY("aggregation: pairwise: size");
95  SET_VALID_ENTRY("aggregation: pairwise: tie threshold");
96  SET_VALID_ENTRY("aggregation: compute aggregate qualities");
97  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
98  SET_VALID_ENTRY("aggregation: ordering");
99 #undef SET_VALID_ENTRY
100 
101  // general variables needed in AggregationFactory
102  validParamList->set< RCP<const FactoryBase> >("A", null, "Generating factory of the matrix");
103  validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
104  validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
105  validParamList->set< RCP<const FactoryBase> >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'");
106 
107 
108  return validParamList;
109  }
110 
111  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
113  const ParameterList& pL = GetParameterList();
114 
115  Input(currentLevel, "A");
116  Input(currentLevel, "Graph");
117  Input(currentLevel, "DofsPerNode");
118  if (pL.get<bool>("aggregation: compute aggregate qualities")) {
119  Input(currentLevel, "AggregateQualities");
120  }
121 
122 
123  }
124 
125 
126  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
128  FactoryMonitor m(*this, "Build", currentLevel);
129  using STS = Teuchos::ScalarTraits<Scalar>;
130  using MT = typename STS::magnitudeType;
131 
132  const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
133 
134  RCP<Teuchos::FancyOStream> out;
135  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
136  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
137  out->setShowAllFrontMatter(false).setShowProcRank(true);
138  } else {
139  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
140  }
141 
142  const ParameterList& pL = GetParameterList();
143 
144  const MT kappa = static_cast<MT>(pL.get<double>("aggregation: Dirichlet threshold"));
145  TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
147  "Pairwise requires kappa > 2"
148  " otherwise all rows are considered as Dirichlet rows.");
149 
150  // Parameters
151  int maxNumIter = 3;
152  if (pL.isParameter("aggregation: pairwise: size"))
153  maxNumIter = pL.get<int>("aggregation: pairwise: size");
154  TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
156  "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
157  " must be a strictly positive integer");
158 
159 
160  RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph");
161  RCP<const Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
162 
163  // Setup aggregates & aggStat objects
164  RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
165  aggregates->setObjectLabel("PW");
166 
167  const LO numRows = graph->GetNodeNumVertices();
168 
169  // construct aggStat information
170  std::vector<unsigned> aggStat(numRows, READY);
171 
172 
173  const int DofsPerNode = Get<int>(currentLevel,"DofsPerNode");
174  TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError,
175  "Pairwise only supports one dof per node");
176 
177  // This follows the paper:
178  // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations",
179  // SISC 34(3), pp. A2288-2316.
180 
181  // Handle Ordering
182  std::string orderingStr = pL.get<std::string>("aggregation: ordering");
183  enum {
184  O_NATURAL,
185  O_RANDOM,
186  O_CUTHILL_MCKEE,
187  } ordering;
188 
189  ordering = O_NATURAL;
190  if (orderingStr == "random" ) ordering = O_RANDOM;
191  else if(orderingStr == "natural") {}
192  else if(orderingStr == "cuthill-mckee" || orderingStr == "cm") ordering = O_CUTHILL_MCKEE;
193  else {
194  TEUCHOS_TEST_FOR_EXCEPTION(1,Exceptions::RuntimeError,"Invalid ordering type");
195  }
196 
197  // Get an ordering vector
198  // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns
199  // will get ignored in the aggregation phases, so we don't need to worry about
200  // running off the end.
201  Array<LO> orderingVector(numRows);
202  for (LO i = 0; i < numRows; i++)
203  orderingVector[i] = i;
204  if (ordering == O_RANDOM)
205  MueLu::NotayUtils::RandomReorder(orderingVector);
206  else if (ordering == O_CUTHILL_MCKEE) {
207  RCP<Xpetra::Vector<LO,LO,GO,NO> > rcmVector = MueLu::Utilities_kokkos<SC,LO,GO,NO>::CuthillMcKee(*A);
208  auto localVector = rcmVector->getData(0);
209  for (LO i = 0; i < numRows; i++)
210  orderingVector[i] = localVector[i];
211  }
212 
213  // Get the party stated
214  LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
215  BuildInitialAggregates(pL, A, orderingVector(), kappa,
216  *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
217  TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError,
218  "Initial pairwise aggregation failed to aggregate all nodes");
219  LO numLocalAggregates = aggregates->GetNumAggregates();
220  GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - "
221  << A->getLocalNumRows() / numLocalAggregates << std::endl;
222 
223  // Temporary data storage for further aggregation steps
224  local_matrix_type intermediateP;
225  local_matrix_type coarseLocalA;
226 
227  // Compute the on rank part of the local matrix
228  // that the square submatrix that only contains
229  // columns corresponding to local rows.
230  LO numLocalDirichletNodes = numDirichletNodes;
231  Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
232  BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
233  for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
234  // Compute the intermediate prolongator
235  BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
236  localVertex2AggId(), intermediateP);
237 
238  // Compute the coarse local matrix and coarse row sum
239  BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
240 
241  // Directly compute rowsum from A, rather than coarseA
242  row_sum_type rowSum("rowSum", numLocalAggregates);
243  {
244  std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
245  auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
246  for(LO i=0; i<(LO)numRows; i++) {
247  if(aggStat[i] != AGGREGATED)
248  continue;
249  LO agg=vertex2AggId[i];
250  agg2vertex[agg].push_back(i);
251  }
252 
253  typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
254  for(LO i = 0; i < numRows; i++) {
255  // If not aggregated already, skip this guy
256  if(aggStat[i] != AGGREGATED)
257  continue;
258  int agg = vertex2AggId[i];
259  std::vector<LO> & myagg = agg2vertex[agg];
260 
261  size_t nnz = A->getNumEntriesInLocalRow(i);
262  ArrayView<const LO> indices;
263  ArrayView<const SC> vals;
264  A->getLocalRowView(i, indices, vals);
265 
266  SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
267  for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
268  bool found = false;
269  if(indices[colidx] < numRows) {
270  for(LO j=0; j<(LO)myagg.size(); j++)
271  if (vertex2AggId[indices[colidx]] == agg)
272  found=true;
273  }
274  if(!found) {
275  *out << "- ADDING col "<<indices[colidx]<<" = "<<vals[colidx] << std::endl;
276  mysum += vals[colidx];
277  }
278  else {
279  *out << "- NOT ADDING col "<<indices[colidx]<<" = "<<vals[colidx] << std::endl;
280  }
281  }
282 
283  rowSum_h[agg] = mysum;
284  }
285  Kokkos::deep_copy(rowSum, rowSum_h);
286  }
287 
288  // Get local orderingVector
289  Array<LO> localOrderingVector(numRows);
290  for (LO i = 0; i < numRows; i++)
291  localOrderingVector[i] = i;
292  if (ordering == O_RANDOM)
293  MueLu::NotayUtils::RandomReorder(localOrderingVector);
294  else if (ordering == O_CUTHILL_MCKEE) {
295  RCP<Xpetra::Vector<LO,LO,GO,NO> > rcmVector = MueLu::Utilities_kokkos<SC,LO,GO,NO>::CuthillMcKee(*A);
296  auto localVector = rcmVector->getData(0);
297  for (LO i = 0; i < numRows; i++)
298  localOrderingVector[i] = localVector[i];
299  }
300 
301  // Compute new aggregates
302  numLocalAggregates = 0;
303  numNonAggregatedNodes = static_cast<LO>(coarseLocalA.numRows());
304  std::vector<LO> localAggStat(numNonAggregatedNodes, READY);
305  localVertex2AggId.resize(numNonAggregatedNodes, -1);
306  BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
307  localAggStat, localVertex2AggId,
308  numLocalAggregates, numNonAggregatedNodes);
309 
310  // After the first initial pairwise aggregation
311  // the Dirichlet nodes have been removed.
312  numLocalDirichletNodes = 0;
313 
314  // Update the aggregates
315  RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
316  ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
317  for(size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
318  LO oldAggIdx = vertex2AggId[vertexIdx];
319  if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
320  vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
321  }
322  }
323 
324  // We could probably print some better statistics at some point
325  GetOStream(Statistics0) << "Iter " << aggregationIter << ": " << numLocalAggregates << " - "
326  << A->getLocalNumRows() / numLocalAggregates << std::endl;
327  }
328  aggregates->SetNumAggregates(numLocalAggregates);
329  aggregates->AggregatesCrossProcessors(false);
330  aggregates->ComputeAggregateSizes(true/*forceRecompute*/);
331 
332  // DO stuff
333  Set(currentLevel, "Aggregates", aggregates);
334  GetOStream(Statistics0) << aggregates->description() << std::endl;
335  }
336 
337 
338  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
340  BuildInitialAggregates(const Teuchos::ParameterList& params,
341  const RCP<const Matrix>& A,
342  const Teuchos::ArrayView<const LO> & orderingVector,
343  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
344  Aggregates& aggregates,
345  std::vector<unsigned>& aggStat,
346  LO& numNonAggregatedNodes,
347  LO& numDirichletNodes) const {
348 
349  Monitor m(*this, "BuildInitialAggregates");
350  using STS = Teuchos::ScalarTraits<Scalar>;
351  using MT = typename STS::magnitudeType;
352  using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
353 
354  RCP<Teuchos::FancyOStream> out;
355  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
356  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
357  out->setShowAllFrontMatter(false).setShowProcRank(true);
358  } else {
359  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
360  }
361 
362 
363  const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
364  const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
365  const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
366  const MT MT_TWO = MT_ONE + MT_ONE;
367  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
368  const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
369 
370  const MT kappa_init = kappa / (kappa - MT_TWO);
371  const LO numRows = aggStat.size();
372  const int myRank = A->getMap()->getComm()->getRank();
373 
374  // For finding "ties" where we fall back to the ordering. Making this larger than
375  // hard zero substantially increases code robustness.
376  double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
377  double tie_less = 1.0 - tie_criterion;
378  double tie_more = 1.0 + tie_criterion;
379 
380  // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(),
381  // and so we're not doing again here.
382  // This should probably be fixed at some point.
383 
384  // Extract diagonal, rowsums, etc
385  // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
388  RCP<RealValuedVector> ghostedAbsRowSum = MueLu::Utilities<SC,LO,GO,NO>::GetMatrixOverlappedAbsDeletedRowsum(*A);
389  const ArrayRCP<const SC> D = ghostedDiag->getData(0);
390  const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
391  const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
392 
393  // Aggregates stuff
394  ArrayRCP<LO> vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0);
395  ArrayRCP<LO> procWinner_rcp = aggregates.GetProcWinner() ->getDataNonConst(0);
396  ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
397  ArrayView<LO> procWinner = procWinner_rcp();
398 
399  // Algorithm 4.2
400 
401  // 0,1 : Initialize: Flag boundary conditions
402  // Modification: We assume symmetry here aij = aji
403  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
404  MT aii = STS::magnitude(D[row]);
405  MT rowsum = AbsRs[row];
406 
407  if(aii >= kappa_init * rowsum) {
408  *out << "Flagging index " << row << " as dirichlet "
409  "aii >= kappa*rowsum = " << aii << " >= " << kappa_init << " " << rowsum << std::endl;
410  aggStat[row] = IGNORED;
411  --numNonAggregatedNodes;
412  ++numDirichletNodes;
413  }
414  }
415 
416 
417  // 2 : Iteration
418  LO aggIndex = LO_ZERO;
419  for(LO i = 0; i < numRows; i++) {
420  LO current_idx = orderingVector[i];
421  // If we're aggregated already, skip this guy
422  if(aggStat[current_idx] != READY)
423  continue;
424 
425  MT best_mu = MT_ZERO;
426  LO best_idx = LO_INVALID;
427 
428  size_t nnz = A->getNumEntriesInLocalRow(current_idx);
429  ArrayView<const LO> indices;
430  ArrayView<const SC> vals;
431  A->getLocalRowView(current_idx, indices, vals);
432 
433  MT aii = STS::real(D[current_idx]);
434  MT si = STS::real(S[current_idx]);
435  for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
436  // Skip aggregated neighbors, off-rank neighbors, hard zeros and self
437  LO col = indices[colidx];
438  SC val = vals[colidx];
439  if(current_idx == col || col >= numRows || aggStat[col] != READY || val == SC_ZERO)
440  continue;
441 
442  MT aij = STS::real(val);
443  MT ajj = STS::real(D[col]);
444  MT sj = - STS::real(S[col]); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
445  if(aii - si + ajj - sj >= MT_ZERO) {
446  // Modification: We assume symmetry here aij = aji
447  MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
448  MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
449  MT mu = mu_top / mu_bottom;
450 
451  // Modification: Explicitly check the tie criterion here
452  if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
453  (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
454  best_mu = mu;
455  best_idx = col;
456  *out << "[" << current_idx << "] Column UPDATED " << col << ": "
457  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
458  << " = " << aii - si + ajj - sj<< ", aij = "<<aij << ", mu = " << mu << std::endl;
459  }
460  else {
461  *out << "[" << current_idx << "] Column NOT UPDATED " << col << ": "
462  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
463  << " = " << aii - si + ajj - sj << ", aij = "<<aij<< ", mu = " << mu << std::endl;
464  }
465  }
466  else {
467  *out << "[" << current_idx << "] Column FAILED " << col << ": "
468  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
469  << " = " << aii - si + ajj - sj << std::endl;
470  }
471  }// end column for loop
472 
473  if(best_idx == LO_INVALID) {
474  *out << "No node buddy found for index " << current_idx
475  << " [agg " << aggIndex << "]\n" << std::endl;
476  // We found no potential node-buddy, so let's just make this a singleton
477  // NOTE: The behavior of what to do if you have no un-aggregated neighbors is not specified in
478  // the paper
479 
480  aggStat[current_idx] = ONEPT;
481  vertex2AggId[current_idx] = aggIndex;
482  procWinner[current_idx] = myRank;
483  numNonAggregatedNodes--;
484  aggIndex++;
485 
486  } else {
487  // We have a buddy, so aggregate, either as a singleton or as a pair, depending on mu
488  if(best_mu <= kappa) {
489  *out << "Node buddies (" << current_idx << "," << best_idx << ") [agg " << aggIndex << "]" << std::endl;
490 
491  aggStat[current_idx] = AGGREGATED;
492  aggStat[best_idx] = AGGREGATED;
493  vertex2AggId[current_idx] = aggIndex;
494  vertex2AggId[best_idx] = aggIndex;
495  procWinner[current_idx] = myRank;
496  procWinner[best_idx] = myRank;
497  numNonAggregatedNodes-=2;
498  aggIndex++;
499 
500  } else {
501  *out << "No buddy found for index " << current_idx << ","
502  " but aggregating as singleton [agg " << aggIndex << "]" << std::endl;
503 
504  aggStat[current_idx] = ONEPT;
505  vertex2AggId[current_idx] = aggIndex;
506  procWinner[current_idx] = myRank;
507  numNonAggregatedNodes--;
508  aggIndex++;
509  } // best_mu
510  } // best_idx
511  }// end Algorithm 4.2
512 
513  *out << "vertex2aggid :";
514  for(int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
515  *out << i << "(" << vertex2AggId[i] << ")";
516  }
517  *out << std::endl;
518 
519  // update aggregate object
520  aggregates.SetNumAggregates(aggIndex);
521  } // BuildInitialAggregates
522 
523  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
525  BuildFurtherAggregates(const Teuchos::ParameterList& params,
526  const RCP<const Matrix>& A,
527  const Teuchos::ArrayView<const LO> & orderingVector,
528  const typename Matrix::local_matrix_type& coarseA,
529  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
530  const Kokkos::View<typename Kokkos::ArithTraits<Scalar>::val_type*,
531  Kokkos::LayoutLeft,
532  typename Matrix::local_matrix_type::device_type>& rowSum,
533  std::vector<LocalOrdinal>& localAggStat,
534  Teuchos::Array<LocalOrdinal>& localVertex2AggID,
535  LO& numLocalAggregates,
536  LO& numNonAggregatedNodes) const {
537  Monitor m(*this, "BuildFurtherAggregates");
538 
539  // Set debug outputs based on environment variable
540  RCP<Teuchos::FancyOStream> out;
541  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
542  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
543  out->setShowAllFrontMatter(false).setShowProcRank(true);
544  } else {
545  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
546  }
547 
548  using value_type = typename local_matrix_type::value_type;
549  const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
550  const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
551  const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
552  const magnitude_type MT_two = MT_one + MT_one;
553  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
554 
555  // For finding "ties" where we fall back to the ordering. Making this larger than
556  // hard zero substantially increases code robustness.
557  double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
558  double tie_less = 1.0 - tie_criterion;
559  double tie_more = 1.0 + tie_criterion;
560 
561  typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
562  Kokkos::deep_copy(rowSum_h, rowSum);
563 
564  // Extracting the diagonal of a KokkosSparse::CrsMatrix
565  // is not currently provided in kokkos-kernels so here
566  // is an ugly way to get that done...
567  const LO numRows = static_cast<LO>(coarseA.numRows());
568  typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows);
569  typename local_matrix_type::row_map_type::HostMirror row_map_h
570  = Kokkos::create_mirror_view(coarseA.graph.row_map);
571  Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
572  typename local_matrix_type::index_type::HostMirror entries_h
573  = Kokkos::create_mirror_view(coarseA.graph.entries);
574  Kokkos::deep_copy(entries_h, coarseA.graph.entries);
575  typename local_matrix_type::values_type::HostMirror values_h
576  = Kokkos::create_mirror_view(coarseA.values);
577  Kokkos::deep_copy(values_h, coarseA.values);
578  for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
579  for(LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
580  entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
581  ++entryIdx) {
582  if(rowIdx == static_cast<LO>(entries_h(entryIdx))) {
583  diagA_h(rowIdx) = values_h(entryIdx);
584  }
585  }
586  }
587 
588  for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
589  if(localAggStat[currentIdx] != READY) {
590  continue;
591  }
592 
593  LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
594  magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
595  const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
596  const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
597  for(auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
598  const LO colIdx = static_cast<LO>(entries_h(entryIdx));
599  if(currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) {
600  continue;
601  }
602 
603  const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
604  const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
605  const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx)); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
606  if(aii - si + ajj - sj >= MT_zero) {
607  const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
608  const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
609  const magnitude_type mu = mu_top / mu_bottom;
610 
611  // Modification: Explicitly check the tie criterion here
612  if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
613  (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
614  best_mu = mu;
615  bestIdx = colIdx;
616  *out << "[" << currentIdx << "] Column UPDATED " << colIdx << ": "
617  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
618  << " = " << aii - si + ajj - sj << ", aij = "<<aij<<" mu = " << mu << std::endl;
619  }
620  else {
621  *out << "[" << currentIdx << "] Column NOT UPDATED " << colIdx << ": "
622  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
623  << " = " << aii - si + ajj - sj << ", aij = "<<aij<<", mu = " << mu << std::endl;
624  }
625  } else {
626  *out << "[" << currentIdx << "] Column FAILED " << colIdx << ": "
627  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
628  << " = " << aii - si + ajj - sj << std::endl;
629  }
630  } // end loop over row entries
631 
632  if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
633  localAggStat[currentIdx] = ONEPT;
634  localVertex2AggID[currentIdx] = numLocalAggregates;
635  --numNonAggregatedNodes;
636  ++numLocalAggregates;
637  } else {
638  if(best_mu <= kappa) {
639  *out << "Node buddies (" << currentIdx << "," << bestIdx << ") [agg " << numLocalAggregates << "]" << std::endl;
640 
641  localAggStat[currentIdx] = AGGREGATED;
642  localVertex2AggID[currentIdx] = numLocalAggregates;
643  --numNonAggregatedNodes;
644 
645  localAggStat[bestIdx] = AGGREGATED;
646  localVertex2AggID[bestIdx] = numLocalAggregates;
647  --numNonAggregatedNodes;
648 
649  ++numLocalAggregates;
650  } else {
651  *out << "No buddy found for index " << currentIdx << ","
652  " but aggregating as singleton [agg " << numLocalAggregates << "]" << std::endl;
653 
654  localAggStat[currentIdx] = ONEPT;
655  localVertex2AggID[currentIdx] = numLocalAggregates;
656  --numNonAggregatedNodes;
657  ++numLocalAggregates;
658  }
659  }
660  } // end loop over matrix rows
661 
662  } // BuildFurtherAggregates
663 
664  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
666  BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA,
667  typename Matrix::local_matrix_type& onrankA) const {
668  Monitor m(*this, "BuildOnRankLocalMatrix");
669 
670  // Set debug outputs based on environment variable
671  RCP<Teuchos::FancyOStream> out;
672  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
673  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
674  out->setShowAllFrontMatter(false).setShowProcRank(true);
675  } else {
676  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
677  }
678 
679  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
680  using values_type = typename local_matrix_type::values_type;
681  using size_type = typename local_graph_type::size_type;
682  using col_index_type = typename local_graph_type::data_type;
683  using array_layout = typename local_graph_type::array_layout;
684  using memory_traits = typename local_graph_type::memory_traits;
685  using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
686  using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
687  // Extract on rank part of A
688  // Simply check that the column index is less than the number of local rows
689  // otherwise remove it.
690 
691  const int numRows = static_cast<int>(localA.numRows());
692  row_pointer_type rowPtr("onrankA row pointer", numRows + 1);
693  typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
694  typename local_graph_type::row_map_type::HostMirror origRowPtr_h
695  = Kokkos::create_mirror_view(localA.graph.row_map);
696  typename local_graph_type::entries_type::HostMirror origColind_h
697  = Kokkos::create_mirror_view(localA.graph.entries);
698  typename values_type::HostMirror origValues_h
699  = Kokkos::create_mirror_view(localA.values);
700  Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
701  Kokkos::deep_copy(origColind_h, localA.graph.entries);
702  Kokkos::deep_copy(origValues_h, localA.values);
703 
704  // Compute the number of nnz entries per row
705  rowPtr_h(0) = 0;
706  for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
707  for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
708  if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
709  }
710  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
711  }
712  Kokkos::deep_copy(rowPtr, rowPtr_h);
713 
714  const LO nnzOnrankA = rowPtr_h(numRows);
715 
716  // Now use nnz per row to allocate matrix views and store column indices and values
717  col_indices_type colInd("onrankA column indices", rowPtr_h(numRows));
718  values_type values("onrankA values", rowPtr_h(numRows));
719  typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
720  typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
721  int entriesInRow;
722  for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
723  entriesInRow = 0;
724  for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
725  if(origColind_h(entryIdx) < numRows) {
726  colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
727  values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
728  ++entriesInRow;
729  }
730  }
731  }
732  Kokkos::deep_copy(colInd, colInd_h);
733  Kokkos::deep_copy(values, values_h);
734 
735  onrankA = local_matrix_type("onrankA", numRows, numRows,
736  nnzOnrankA, values, rowPtr, colInd);
737  }
738 
739  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
742  const LocalOrdinal numDirichletNodes,
743  const LocalOrdinal numLocalAggregates,
744  const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
745  typename Matrix::local_matrix_type& intermediateP) const {
746  Monitor m(*this, "BuildIntermediateProlongator");
747 
748  // Set debug outputs based on environment variable
749  RCP<Teuchos::FancyOStream> out;
750  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
751  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
752  out->setShowAllFrontMatter(false).setShowProcRank(true);
753  } else {
754  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
755  }
756 
757  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
758  using values_type = typename local_matrix_type::values_type;
759  using size_type = typename local_graph_type::size_type;
760  using col_index_type = typename local_graph_type::data_type;
761  using array_layout = typename local_graph_type::array_layout;
762  using memory_traits = typename local_graph_type::memory_traits;
763  using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
764  using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
765 
766  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
767 
768  const int intermediatePnnz = numRows - numDirichletNodes;
769  row_pointer_type rowPtr("intermediateP row pointer", numRows + 1);
770  col_indices_type colInd("intermediateP column indices", intermediatePnnz);
771  values_type values("intermediateP values", intermediatePnnz);
772  typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
773  typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
774 
775  rowPtr_h(0) = 0;
776  for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
777  // Skip Dirichlet nodes
778  if(localVertex2AggID[rowIdx] == LO_INVALID) {
779  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
780  } else {
781  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
782  colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
783  }
784  }
785 
786  Kokkos::deep_copy(rowPtr, rowPtr_h);
787  Kokkos::deep_copy(colInd, colInd_h);
788  Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
789 
790  intermediateP = local_matrix_type("intermediateP",
791  numRows, numLocalAggregates, intermediatePnnz,
792  values, rowPtr, colInd);
793  } // BuildIntermediateProlongator
794 
795  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
797  BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP,
798  typename Matrix::local_matrix_type& coarseA) const {
799  Monitor m(*this, "BuildCoarseLocalMatrix");
800 
801  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
802  using values_type = typename local_matrix_type::values_type;
803  using size_type = typename local_graph_type::size_type;
804  using col_index_type = typename local_graph_type::data_type;
805  using array_layout = typename local_graph_type::array_layout;
806  using memory_traits = typename local_graph_type::memory_traits;
807  using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
808  using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
809 
811  localSpGEMM(coarseA, intermediateP, "AP", AP);
812 
813  // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle
814  // I am not sure but doing it for safety in case it stashes data from the previous
815  // spgemm computation...
816 
817  // Compute Ac = Pt * AP
818  // Two steps needed:
819  // 1. compute Pt
820  // 2. perform multiplication
821 
822  // Step 1 compute Pt
823  // Obviously this requires the same amount of storage as P except for the rowPtr
824  row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"),
825  intermediateP.numCols() + 1);
826  col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"),
827  intermediateP.nnz());
828  values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"),
829  intermediateP.nnz());
830 
831  typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
832  typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
833  Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
834  Kokkos::deep_copy(rowPtrPt_h, 0);
835  for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
836  rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
837  }
838  for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
839  rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
840  }
841  Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
842 
843  typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
844  Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
845  typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
846  Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
847  typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
848  Kokkos::deep_copy(valuesP_h, intermediateP.values);
849  typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
850  typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
851  const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
852  Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
853 
854  col_index_type colIdx = 0;
855  for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
856  for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
857  colIdx = entries_h(entryIdxP);
858  for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
859  if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
860  colIndPt_h(entryIdxPt) = rowIdx;
861  valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
862  break;
863  }
864  } // Loop over entries in row of Pt
865  } // Loop over entries in row of P
866  } // Loop over rows of P
867 
868  Kokkos::deep_copy(colIndPt, colIndPt_h);
869  Kokkos::deep_copy(valuesPt, valuesPt_h);
870 
871 
872  local_matrix_type intermediatePt("intermediatePt",
873  intermediateP.numCols(),
874  intermediateP.numRows(),
875  intermediateP.nnz(),
876  valuesPt, rowPtrPt, colIndPt);
877 
878  // Create views for coarseA matrix
879  localSpGEMM(intermediatePt, AP, "coarseA", coarseA);
880  } // BuildCoarseLocalMatrix
881 
882  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
884  localSpGEMM(const typename Matrix::local_matrix_type& A,
885  const typename Matrix::local_matrix_type& B,
886  const std::string matrixLabel,
887  typename Matrix::local_matrix_type& C) const {
888 
889  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
890  using values_type = typename local_matrix_type::values_type;
891  using size_type = typename local_graph_type::size_type;
892  using col_index_type = typename local_graph_type::data_type;
893  using array_layout = typename local_graph_type::array_layout;
894  using memory_space = typename device_type::memory_space;
895  using memory_traits = typename local_graph_type::memory_traits;
896  using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
897  using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
898 
899  // Options
900  int team_work_size = 16;
901  std::string myalg("SPGEMM_KK_MEMORY");
902  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
903  KokkosKernels::Experimental::KokkosKernelsHandle<typename row_pointer_type::const_value_type,
904  typename col_indices_type::const_value_type,
905  typename values_type::const_value_type,
907  memory_space,
908  memory_space> kh;
909  kh.create_spgemm_handle(alg_enum);
910  kh.set_team_work_size(team_work_size);
911 
912  // Create views for AP matrix
913  row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"),
914  A.numRows() + 1);
915  col_indices_type colIndC;
916  values_type valuesC;
917 
918  // Symbolic multiplication
919  KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
920  B.numRows(), B.numCols(),
921  A.graph.row_map, A.graph.entries, false,
922  B.graph.row_map, B.graph.entries, false,
923  rowPtrC);
924 
925  // allocate column indices and values of AP
926  size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
927  if (nnzC) {
928  colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC);
929  valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC);
930  }
931 
932  // Numeric multiplication
933  KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
934  B.numRows(), B.numCols(),
935  A.graph.row_map, A.graph.entries, A.values, false,
936  B.graph.row_map, B.graph.entries, B.values, false,
937  rowPtrC, colIndC, valuesC);
938  kh.destroy_spgemm_handle();
939 
940  C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
941 
942  } // localSpGEMM
943 
944 } //namespace MueLu
945 
946 #endif /* MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ */
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels&#39; spgemm that takes in CrsMatrix.
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
MueLu::DefaultLocalOrdinal LocalOrdinal
Container class for aggregation information.
typename Matrix::local_matrix_type local_matrix_type
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
Timer to be used in factories. Similar to Monitor but with additional timers.
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
Namespace for MueLu classes and methods.
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
Print statistics that do not involve significant additional computation.
void Build(Level &currentLevel) const
Build aggregates.
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildInitialAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
typename device_type::execution_space execution_space
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
#define SET_VALID_ENTRY(name)
void DeclareInput(Level &currentLevel) const
Input.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
Exception throws to report errors in the internal logical of the program.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.
void BuildFurtherAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)