MueLu  Version of the Day
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
48 
49 #include <Xpetra_CrsGraphFactory.hpp>
50 #include <Xpetra_CrsGraph.hpp>
51 #include <Xpetra_ImportFactory.hpp>
52 #include <Xpetra_ExportFactory.hpp>
53 #include <Xpetra_MapFactory.hpp>
54 #include <Xpetra_Map.hpp>
55 #include <Xpetra_Matrix.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_MultiVector.hpp>
58 #include <Xpetra_StridedMap.hpp>
59 #include <Xpetra_VectorFactory.hpp>
60 #include <Xpetra_Vector.hpp>
61 
62 #include <Xpetra_IO.hpp>
63 
65 
66 #include "MueLu_AmalgamationFactory.hpp"
67 #include "MueLu_AmalgamationInfo.hpp"
68 #include "MueLu_Exceptions.hpp"
69 #include "MueLu_GraphBase.hpp"
70 #include "MueLu_Graph.hpp"
71 #include "MueLu_Level.hpp"
72 #include "MueLu_LWGraph.hpp"
73 #include "MueLu_MasterList.hpp"
74 #include "MueLu_Monitor.hpp"
75 #include "MueLu_PreDropFunctionBaseClass.hpp"
76 #include "MueLu_PreDropFunctionConstVal.hpp"
77 #include "MueLu_Utilities.hpp"
78 
79 #ifdef HAVE_XPETRA_TPETRA
80 #include "Tpetra_CrsGraphTransposer.hpp"
81 #endif
82 
83 #include <algorithm>
84 #include <cstdlib>
85 #include <string>
86 
87 // If defined, read environment variables.
88 // Should be removed once we are confident that this works.
89 //#define DJS_READ_ENV_VARIABLES
90 
91 
92 namespace MueLu {
93 
94  namespace Details {
95  template<class real_type, class LO>
96  struct DropTol {
97 
98  DropTol() = default;
99  DropTol(DropTol const&) = default;
100  DropTol(DropTol &&) = default;
101 
102  DropTol& operator=(DropTol const&) = default;
103  DropTol& operator=(DropTol &&) = default;
104 
105  DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
106  : val{val_}, diag{diag_}, col{col_}, drop{drop_}
107  {}
108 
109  real_type val {Teuchos::ScalarTraits<real_type>::zero()};
110  real_type diag {Teuchos::ScalarTraits<real_type>::zero()};
111  LO col {Teuchos::OrdinalTraits<LO>::invalid()};
112  bool drop {true};
113 
114  // CMS: Auxillary information for debugging info
115  // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
116  };
117  }
118 
119 
120  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
122  RCP<ParameterList> validParamList = rcp(new ParameterList());
123 
124 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
125  SET_VALID_ENTRY("aggregation: drop tol");
126  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
127  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
128  SET_VALID_ENTRY("aggregation: row sum drop tol");
129  SET_VALID_ENTRY("aggregation: drop scheme");
130  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
131  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
132  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
133 
134  {
135  typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
136  // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
137  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple<std::string>("signed classical sa","classical", "distance laplacian","signed classical","block diagonal","block diagonal classical","block diagonal distance laplacian","block diagonal signed classical","block diagonal colored signed classical"), "aggregation: drop scheme")));
138 
139  }
140  SET_VALID_ENTRY("aggregation: distance laplacian algo");
141  SET_VALID_ENTRY("aggregation: classical algo");
142  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
143 #undef SET_VALID_ENTRY
144  validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access");
145 
146  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
147  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
148  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for Coordinates");
149  validParamList->set< RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
150 
151  return validParamList;
152  }
153 
154  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
156 
157  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
159  Input(currentLevel, "A");
160  Input(currentLevel, "UnAmalgamationInfo");
161 
162  const ParameterList& pL = GetParameterList();
163  if (pL.get<bool>("lightweight wrap") == true) {
164  std::string algo = pL.get<std::string>("aggregation: drop scheme");
165  if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
166  Input(currentLevel, "Coordinates");
167  }
168  if(algo == "signed classical sa")
169  ;
170  else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
171  Input(currentLevel, "BlockNumber");
172  }
173  }
174 
175  }
176 
177  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
179 
180  FactoryMonitor m(*this, "Build", currentLevel);
181 
182  typedef Teuchos::ScalarTraits<SC> STS;
183  typedef typename STS::magnitudeType real_type;
184  typedef Xpetra::MultiVector<real_type,LO,GO,NO> RealValuedMultiVector;
185  typedef Xpetra::MultiVectorFactory<real_type,LO,GO,NO> RealValuedMultiVectorFactory;
186 
187  if (predrop_ != Teuchos::null)
188  GetOStream(Parameters0) << predrop_->description();
189 
190  RCP<Matrix> realA = Get< RCP<Matrix> >(currentLevel, "A");
191  RCP<AmalgamationInfo> amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
192  const ParameterList & pL = GetParameterList();
193  bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
194 
195  GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
196  std::string algo = pL.get<std::string>("aggregation: drop scheme");
197  const bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
198 
199  RCP<RealValuedMultiVector> Coords;
200  RCP<Matrix> A;
201 
202  bool use_block_algorithm=false;
203  LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
204  bool useSignedClassicalRS = false;
205  bool useSignedClassicalSA = false;
206  bool generateColoringGraph = false;
207 
208  // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
209  // in the block diagonalizaiton). So we'll clobber the rowSumTol with -1.0 in this case
210  typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
211  RCP<LocalOrdinalVector> ghostedBlockNumber;
212  ArrayRCP<const LO> g_block_id;
213 
214  if(algo == "distance laplacian" ) {
215  // Grab the coordinates for distance laplacian
216  Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
217  A = realA;
218  }
219  else if(algo == "signed classical sa") {
220  useSignedClassicalSA = true;
221  algo = "classical";
222  A = realA;
223  }
224  else if(algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
225  useSignedClassicalRS = true;
226  // if(realA->GetFixedBlockSize() > 1) {
227  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector> >(currentLevel, "BlockNumber");
228  // Ghost the column block numbers if we need to
229  RCP<const Import> importer = realA->getCrsGraph()->getImporter();
230  if(!importer.is_null()) {
231  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
232  ghostedBlockNumber= Xpetra::VectorFactory<LO,LO,GO,NO>::Build(importer->getTargetMap());
233  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
234  }
235  else {
236  ghostedBlockNumber = BlockNumber;
237  }
238  g_block_id = ghostedBlockNumber->getData(0);
239  // }
240  if(algo == "block diagonal colored signed classical")
241  generateColoringGraph=true;
242  algo = "classical";
243  A = realA;
244 
245  }
246  else if(algo == "block diagonal") {
247  // Handle the "block diagonal" filtering and then leave
248  BlockDiagonalize(currentLevel,realA,false);
249  return;
250  }
251  else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
252  // Handle the "block diagonal" filtering, and then continue onward
253  use_block_algorithm = true;
254  RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel,realA,true);
255  if(algo == "block diagonal distance laplacian") {
256  // We now need to expand the coordinates by the interleaved blocksize
257  RCP<RealValuedMultiVector> OldCoords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
258  if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
259  LO dim = (LO) OldCoords->getNumVectors();
260  Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(),dim);
261  for(LO k=0; k<dim; k++){
262  ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
263  ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
264  for(LO i=0; i <(LO)OldCoords->getLocalLength(); i++) {
265  LO new_base = i*dim;
266  for(LO j=0; j<interleaved_blocksize; j++)
267  new_vec[new_base + j] = old_vec[i];
268  }
269  }
270  }
271  else {
272  Coords = OldCoords;
273  }
274  algo = "distance laplacian";
275  }
276  else if(algo == "block diagonal classical") {
277  algo = "classical";
278  }
279  // All cases
280  A = filteredMatrix;
281  rowSumTol = -1.0;
282  }
283  else {
284  A = realA;
285  }
286 
287  // Distance Laplacian weights
288  Array<double> dlap_weights = pL.get<Array<double> >("aggregation: distance laplacian directional weights");
289  enum {NO_WEIGHTS=0, SINGLE_WEIGHTS, BLOCK_WEIGHTS};
290  int use_dlap_weights = NO_WEIGHTS;
291  if(algo == "distance laplacian") {
292  LO dim = (LO) Coords->getNumVectors();
293  // If anything isn't 1.0 we need to turn on the weighting
294  bool non_unity = false;
295  for (LO i=0; !non_unity && i<(LO)dlap_weights.size(); i++) {
296  if(dlap_weights[i] != 1.0) {
297  non_unity = true;
298  }
299  }
300  if(non_unity) {
301  LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
302  if((LO)dlap_weights.size() == dim)
303  use_dlap_weights = SINGLE_WEIGHTS;
304  else if((LO)dlap_weights.size() == blocksize * dim)
305  use_dlap_weights = BLOCK_WEIGHTS;
306  else {
307  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
308  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
309  }
310  if (GetVerbLevel() & Statistics1)
311  GetOStream(Statistics1) << "Using distance laplacian weights: "<<dlap_weights<<std::endl;
312  }
313  }
314 
315  // decide wether to use the fast-track code path for standard maps or the somewhat slower
316  // code path for non-standard maps
317  /*bool bNonStandardMaps = false;
318  if (A->IsView("stridedMaps") == true) {
319  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
320  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
321  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
322  if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
323  bNonStandardMaps = true;
324  }*/
325 
326  if (doExperimentalWrap) {
327  TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
328  TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
329 
330  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
331  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
332  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
333  real_type realThreshold = STS::magnitude(threshold);// CMS: Rename this to "magnitude threshold" sometime
334 
336  // Remove this bit once we are confident that cut-based dropping works.
337 #ifdef HAVE_MUELU_DEBUG
338  int distanceLaplacianCutVerbose = 0;
339 #endif
340 #ifdef DJS_READ_ENV_VARIABLES
341  if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
342  distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
343  }
344 
345  if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
346  auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
347  realThreshold = 1e-4*tmp;
348  }
349 
350 # ifdef HAVE_MUELU_DEBUG
351  if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
352  distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
353  }
354 # endif
355 #endif
356 
358  enum decisionAlgoType {defaultAlgo, unscaled_cut, scaled_cut, scaled_cut_symmetric};
359 
360  decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
361  decisionAlgoType classicalAlgo = defaultAlgo;
362  if (algo == "distance laplacian") {
363  if (distanceLaplacianAlgoStr == "default")
364  distanceLaplacianAlgo = defaultAlgo;
365  else if (distanceLaplacianAlgoStr == "unscaled cut")
366  distanceLaplacianAlgo = unscaled_cut;
367  else if (distanceLaplacianAlgoStr == "scaled cut")
368  distanceLaplacianAlgo = scaled_cut;
369  else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
370  distanceLaplacianAlgo = scaled_cut_symmetric;
371  else
372  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
373  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize()<< std::endl;
374  } else if (algo == "classical") {
375  if (classicalAlgoStr == "default")
376  classicalAlgo = defaultAlgo;
377  else if (classicalAlgoStr == "unscaled cut")
378  classicalAlgo = unscaled_cut;
379  else if (classicalAlgoStr == "scaled cut")
380  classicalAlgo = scaled_cut;
381  else
382  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
383  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
384 
385  } else
386  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
387  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
388 
389  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
390 
391 
392  // NOTE: We don't support signed classical RS or SA with cut drop at present
393  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
394  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
395 
396  GO numDropped = 0, numTotal = 0;
397  std::string graphType = "unamalgamated"; //for description purposes only
398 
399 
400  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
401  BlockSize is the number of storage blocks that must kept together during the amalgamation process.
402 
403  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
404 
405  numPDEs = BlockSize * storageblocksize.
406 
407  If numPDEs==1
408  Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1
409  No other values makes sense.
410 
411  If numPDEs>1
412  If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs.
413  If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n.
414  Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested.
415  */
416  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,Exceptions::RuntimeError,"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
417  const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
418 
419 
420  /************************** RS or SA-style Classical Dropping (and variants) **************************/
421  if (algo == "classical") {
422  if (predrop_ == null) {
423  // ap: this is a hack: had to declare predrop_ as mutable
424  predrop_ = rcp(new PreDropFunctionConstVal(threshold));
425  }
426 
427  if (predrop_ != null) {
428  RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
429  TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
430  "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
431  // If a user provided a predrop function, it overwrites the XML threshold parameter
432  SC newt = predropConstVal->GetThreshold();
433  if (newt != threshold) {
434  GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
435  threshold = newt;
436  }
437  }
438  // At this points we either have
439  // (predrop_ != null)
440  // Therefore, it is sufficient to check only threshold
441  if ( BlockSize==1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
442  // Case 1: scalar problem, no dropping => just use matrix graph
443  RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
444  // Detect and record rows that correspond to Dirichlet boundary conditions
445  ArrayRCP<bool > boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
446  if (rowSumTol > 0.)
447  Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
448 
449  graph->SetBoundaryNodeMap(boundaryNodes);
450  numTotal = A->getLocalNumEntries();
451 
452  if (GetVerbLevel() & Statistics1) {
453  GO numLocalBoundaryNodes = 0;
454  GO numGlobalBoundaryNodes = 0;
455  for (LO i = 0; i < boundaryNodes.size(); ++i)
456  if (boundaryNodes[i])
457  numLocalBoundaryNodes++;
458  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
459  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
460  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
461  }
462 
463  Set(currentLevel, "DofsPerNode", 1);
464  Set(currentLevel, "Graph", graph);
465 
466  } else if ( (BlockSize == 1 && threshold != STS::zero()) ||
467  (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
468  (BlockSize == 1 && useSignedClassicalRS) ||
469  (BlockSize == 1 && useSignedClassicalSA) ) {
470  // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
471  // graph's map information, e.g., whether index is local
472  // OR a matrix without a CrsGraph
473 
474  // allocate space for the local graph
475  ArrayRCP<LO> rows (A->getLocalNumRows()+1);
476  ArrayRCP<LO> columns(A->getLocalNumEntries());
477 
478  using MT = typename STS::magnitudeType;
479  RCP<Vector> ghostedDiag;
480  ArrayRCP<const SC> ghostedDiagVals;
481  ArrayRCP<const MT> negMaxOffDiagonal;
482  // RS style needs the max negative off-diagonal, SA style needs the diagonal
483  if(useSignedClassicalRS) {
484  if(ghostedBlockNumber.is_null()) {
486  if (GetVerbLevel() & Statistics1)
487  GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
488  }
489  else {
490  negMaxOffDiagonal = MueLu::Utilities<SC,LO,GO,NO>::GetMatrixMaxMinusOffDiagonal(*A,*ghostedBlockNumber);
491  if (GetVerbLevel() & Statistics1)
492  GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
493  }
494  }
495  else {
497  ghostedDiagVals = ghostedDiag->getData(0);
498  }
499  ArrayRCP<bool> boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
500  if (rowSumTol > 0.) {
501  if(ghostedBlockNumber.is_null()) {
502  if (GetVerbLevel() & Statistics1)
503  GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
504  Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
505  } else {
506  if (GetVerbLevel() & Statistics1)
507  GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
508  Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
509  }
510  }
511 
512  LO realnnz = 0;
513  rows[0] = 0;
514  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
515  size_t nnz = A->getNumEntriesInLocalRow(row);
516  bool rowIsDirichlet = boundaryNodes[row];
517  ArrayView<const LO> indices;
518  ArrayView<const SC> vals;
519  A->getLocalRowView(row, indices, vals);
520 
521  if(classicalAlgo == defaultAlgo) {
522  //FIXME the current predrop function uses the following
523  //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
524  //FIXME but the threshold doesn't take into account the rows' diagonal entries
525  //FIXME For now, hardwiring the dropping in here
526 
527  LO rownnz = 0;
528  if(useSignedClassicalRS) {
529  // Signed classical RS style
530  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
531  LO col = indices[colID];
532  MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
533  MT neg_aij = - STS::real(vals[colID]);
534  /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
535  g_block_id.is_null() ? -1 : g_block_id[row],
536  g_block_id.is_null() ? -1 : g_block_id[col],
537  neg_aij, max_neg_aik);*/
538  if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
539  columns[realnnz++] = col;
540  rownnz++;
541  } else
542  numDropped++;
543  }
544  rows[row+1] = realnnz;
545  }
546  else if(useSignedClassicalSA) {
547  // Signed classical SA style
548  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
549  LO col = indices[colID];
550 
551  bool is_nonpositive = STS::real(vals[colID]) <= 0;
552  MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
553  MT aij = is_nonpositive ? STS::magnitude(vals[colID]*vals[colID]) : (-STS::magnitude(vals[colID]*vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
554  /*
555  if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID],
556  vals[colID],aij, aiiajj);
557  */
558 
559  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
560  columns[realnnz++] = col;
561  rownnz++;
562  } else
563  numDropped++;
564  }
565  rows[row+1] = realnnz;
566  }
567  else {
568  // Standard abs classical
569  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
570  LO col = indices[colID];
571  MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
572  MT aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
573 
574  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
575  columns[realnnz++] = col;
576  rownnz++;
577  } else
578  numDropped++;
579  }
580  rows[row+1] = realnnz;
581  }
582  }
583  else {
584  /* Cut Algorithm */
585  //CMS
586  using DropTol = Details::DropTol<real_type,LO>;
587  std::vector<DropTol> drop_vec;
588  drop_vec.reserve(nnz);
589  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
590  const real_type one = Teuchos::ScalarTraits<real_type>::one();
591  LO rownnz = 0;
592  // NOTE: This probably needs to be fixed for rowsum
593 
594  // find magnitudes
595  for (LO colID = 0; colID < (LO)nnz; colID++) {
596  LO col = indices[colID];
597  if (row == col) {
598  drop_vec.emplace_back( zero, one, colID, false);
599  continue;
600  }
601 
602  // Don't aggregate boundaries
603  if(boundaryNodes[colID]) continue;
604  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
605  typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
606  drop_vec.emplace_back(aij, aiiajj, colID, false);
607  }
608 
609  const size_t n = drop_vec.size();
610 
611  if (classicalAlgo == unscaled_cut) {
612  std::sort( drop_vec.begin(), drop_vec.end()
613  , [](DropTol const& a, DropTol const& b) {
614  return a.val > b.val;
615  });
616 
617  bool drop = false;
618  for (size_t i=1; i<n; ++i) {
619  if (!drop) {
620  auto const& x = drop_vec[i-1];
621  auto const& y = drop_vec[i];
622  auto a = x.val;
623  auto b = y.val;
624  if (a > realThreshold*b) {
625  drop = true;
626 #ifdef HAVE_MUELU_DEBUG
627  if (distanceLaplacianCutVerbose) {
628  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
629  }
630 #endif
631  }
632  }
633  drop_vec[i].drop = drop;
634  }
635  } else if (classicalAlgo == scaled_cut) {
636  std::sort( drop_vec.begin(), drop_vec.end()
637  , [](DropTol const& a, DropTol const& b) {
638  return a.val/a.diag > b.val/b.diag;
639  });
640  bool drop = false;
641  // printf("[%d] Scaled Cut: ",(int)row);
642  // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep");
643  for (size_t i=1; i<n; ++i) {
644  if (!drop) {
645  auto const& x = drop_vec[i-1];
646  auto const& y = drop_vec[i];
647  auto a = x.val/x.diag;
648  auto b = y.val/y.diag;
649  if (a > realThreshold*b) {
650  drop = true;
651 
652 #ifdef HAVE_MUELU_DEBUG
653  if (distanceLaplacianCutVerbose) {
654  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
655  }
656 #endif
657  }
658  // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep");
659 
660  }
661  drop_vec[i].drop = drop;
662  }
663  // printf("\n");
664  }
665  std::sort( drop_vec.begin(), drop_vec.end()
666  , [](DropTol const& a, DropTol const& b) {
667  return a.col < b.col;
668  }
669  );
670 
671  for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
672  LO col = indices[drop_vec[idxID].col];
673  // don't drop diagonal
674  if (row == col) {
675  columns[realnnz++] = col;
676  rownnz++;
677  continue;
678  }
679 
680  if (!drop_vec[idxID].drop) {
681  columns[realnnz++] = col;
682  rownnz++;
683  } else {
684  numDropped++;
685  }
686  }
687  // CMS
688  rows[row+1] = realnnz;
689 
690  }
691  }//end for row
692 
693  columns.resize(realnnz);
694  numTotal = A->getLocalNumEntries();
695 
696  if (aggregationMayCreateDirichlet) {
697  // If the only element remaining after filtering is diagonal, mark node as boundary
698  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
699  if (rows[row+1]- rows[row] <= 1)
700  boundaryNodes[row] = true;
701  }
702  }
703 
704  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A"));
705  graph->SetBoundaryNodeMap(boundaryNodes);
706  if (GetVerbLevel() & Statistics1) {
707  GO numLocalBoundaryNodes = 0;
708  GO numGlobalBoundaryNodes = 0;
709  for (LO i = 0; i < boundaryNodes.size(); ++i)
710  if (boundaryNodes[i])
711  numLocalBoundaryNodes++;
712  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
713  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
714  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
715  }
716  Set(currentLevel, "Graph", graph);
717  Set(currentLevel, "DofsPerNode", 1);
718 
719  // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
720  if(generateColoringGraph) {
721  RCP<GraphBase> colorGraph;
722  RCP<const Import> importer = A->getCrsGraph()->getImporter();
723  BlockDiagonalizeGraph(graph,ghostedBlockNumber,colorGraph,importer);
724  Set(currentLevel, "Coloring Graph",colorGraph);
725  // #define CMS_DUMP
726 #ifdef CMS_DUMP
727  {
728  Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("m_regular_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
729  Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("m_color_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
730  // int rank = graph->GetDomainMap()->getComm()->getRank();
731  // {
732  // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
733  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
734  // colorGraph->print(*fancy,Debug);
735  // }
736  // {
737  // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
738  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
739  // graph->print(*fancy,Debug);
740  // }
741 
742  }
743 #endif
744  }//end generateColoringGraph
745  } else if (BlockSize > 1 && threshold == STS::zero()) {
746  // Case 3: Multiple DOF/node problem without dropping
747  const RCP<const Map> rowMap = A->getRowMap();
748  const RCP<const Map> colMap = A->getColMap();
749 
750  graphType = "amalgamated";
751 
752  // build node row map (uniqueMap) and node column map (nonUniqueMap)
753  // the arrays rowTranslation and colTranslation contain the local node id
754  // given a local dof id. The data is calculated by the AmalgamationFactory and
755  // stored in the variable container "UnAmalgamationInfo"
756  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
757  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
758  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
759  Array<LO> colTranslation = *(amalInfo->getColTranslation());
760 
761  // get number of local nodes
762  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
763 
764  // Allocate space for the local graph
765  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
766  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getLocalNumEntries());
767 
768  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
769 
770  // Detect and record rows that correspond to Dirichlet boundary conditions
771  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
772  // TODO the array one bigger than the number of local rows, and the last entry can
773  // TODO hold the actual number of boundary nodes. Clever, huh?
774  ArrayRCP<bool > pointBoundaryNodes;
775  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
776  if (rowSumTol > 0.)
777  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
778 
779 
780  // extract striding information
781  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
782  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
783  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
784  if (A->IsView("stridedMaps") == true) {
785  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
786  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
787  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
788  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
789  blkId = strMap->getStridedBlockId();
790  if (blkId > -1)
791  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
792  }
793 
794  // loop over all local nodes
795  LO realnnz = 0;
796  rows[0] = 0;
797  Array<LO> indicesExtra;
798  for (LO row = 0; row < numRows; row++) {
799  ArrayView<const LO> indices;
800  indicesExtra.resize(0);
801 
802  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
803  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
804  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
805  // with local ids.
806  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
807  // node.
808  bool isBoundary = false;
809  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
810  for (LO j = 0; j < blkPartSize; j++) {
811  if (pointBoundaryNodes[row*blkPartSize+j]) {
812  isBoundary = true;
813  break;
814  }
815  }
816  } else {
817  isBoundary = true;
818  for (LO j = 0; j < blkPartSize; j++) {
819  if (!pointBoundaryNodes[row*blkPartSize+j]) {
820  isBoundary = false;
821  break;
822  }
823  }
824  }
825 
826  // Merge rows of A
827  // The array indicesExtra contains local column node ids for the current local node "row"
828  if (!isBoundary)
829  MergeRows(*A, row, indicesExtra, colTranslation);
830  else
831  indicesExtra.push_back(row);
832  indices = indicesExtra;
833  numTotal += indices.size();
834 
835  // add the local column node ids to the full columns array which
836  // contains the local column node ids for all local node rows
837  LO nnz = indices.size(), rownnz = 0;
838  for (LO colID = 0; colID < nnz; colID++) {
839  LO col = indices[colID];
840  columns[realnnz++] = col;
841  rownnz++;
842  }
843 
844  if (rownnz == 1) {
845  // If the only element remaining after filtering is diagonal, mark node as boundary
846  // FIXME: this should really be replaced by the following
847  // if (indices.size() == 1 && indices[0] == row)
848  // boundaryNodes[row] = true;
849  // We do not do it this way now because there is no framework for distinguishing isolated
850  // and boundary nodes in the aggregation algorithms
851  amalgBoundaryNodes[row] = true;
852  }
853  rows[row+1] = realnnz;
854  } //for (LO row = 0; row < numRows; row++)
855  columns.resize(realnnz);
856 
857  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
858  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
859 
860  if (GetVerbLevel() & Statistics1) {
861  GO numLocalBoundaryNodes = 0;
862  GO numGlobalBoundaryNodes = 0;
863 
864  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
865  if (amalgBoundaryNodes[i])
866  numLocalBoundaryNodes++;
867 
868  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
869  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
870  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
871  << " agglomerated Dirichlet nodes" << std::endl;
872  }
873 
874  Set(currentLevel, "Graph", graph);
875  Set(currentLevel, "DofsPerNode", blkSize); // full block size
876 
877  } else if (BlockSize > 1 && threshold != STS::zero()) {
878  // Case 4: Multiple DOF/node problem with dropping
879  const RCP<const Map> rowMap = A->getRowMap();
880  const RCP<const Map> colMap = A->getColMap();
881  graphType = "amalgamated";
882 
883  // build node row map (uniqueMap) and node column map (nonUniqueMap)
884  // the arrays rowTranslation and colTranslation contain the local node id
885  // given a local dof id. The data is calculated by the AmalgamationFactory and
886  // stored in the variable container "UnAmalgamationInfo"
887  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
888  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
889  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
890  Array<LO> colTranslation = *(amalInfo->getColTranslation());
891 
892  // get number of local nodes
893  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
894 
895  // Allocate space for the local graph
896  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
897  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getLocalNumEntries());
898 
899  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
900 
901  // Detect and record rows that correspond to Dirichlet boundary conditions
902  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
903  // TODO the array one bigger than the number of local rows, and the last entry can
904  // TODO hold the actual number of boundary nodes. Clever, huh?
905  ArrayRCP<bool > pointBoundaryNodes;
906  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
907  if (rowSumTol > 0.)
908  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
909 
910 
911  // extract striding information
912  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
913  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
914  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
915  if (A->IsView("stridedMaps") == true) {
916  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
917  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
918  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
919  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
920  blkId = strMap->getStridedBlockId();
921  if (blkId > -1)
922  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
923  }
924 
925  // extract diagonal data for dropping strategy
927  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
928 
929  // loop over all local nodes
930  LO realnnz = 0;
931  rows[0] = 0;
932  Array<LO> indicesExtra;
933  for (LO row = 0; row < numRows; row++) {
934  ArrayView<const LO> indices;
935  indicesExtra.resize(0);
936 
937  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
938  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
939  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
940  // with local ids.
941  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
942  // node.
943  bool isBoundary = false;
944  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
945  for (LO j = 0; j < blkPartSize; j++) {
946  if (pointBoundaryNodes[row*blkPartSize+j]) {
947  isBoundary = true;
948  break;
949  }
950  }
951  } else {
952  isBoundary = true;
953  for (LO j = 0; j < blkPartSize; j++) {
954  if (!pointBoundaryNodes[row*blkPartSize+j]) {
955  isBoundary = false;
956  break;
957  }
958  }
959  }
960 
961  // Merge rows of A
962  // The array indicesExtra contains local column node ids for the current local node "row"
963  if (!isBoundary)
964  MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
965  else
966  indicesExtra.push_back(row);
967  indices = indicesExtra;
968  numTotal += indices.size();
969 
970  // add the local column node ids to the full columns array which
971  // contains the local column node ids for all local node rows
972  LO nnz = indices.size(), rownnz = 0;
973  for (LO colID = 0; colID < nnz; colID++) {
974  LO col = indices[colID];
975  columns[realnnz++] = col;
976  rownnz++;
977  }
978 
979  if (rownnz == 1) {
980  // If the only element remaining after filtering is diagonal, mark node as boundary
981  // FIXME: this should really be replaced by the following
982  // if (indices.size() == 1 && indices[0] == row)
983  // boundaryNodes[row] = true;
984  // We do not do it this way now because there is no framework for distinguishing isolated
985  // and boundary nodes in the aggregation algorithms
986  amalgBoundaryNodes[row] = true;
987  }
988  rows[row+1] = realnnz;
989  } //for (LO row = 0; row < numRows; row++)
990  columns.resize(realnnz);
991 
992  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
993  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
994 
995  if (GetVerbLevel() & Statistics1) {
996  GO numLocalBoundaryNodes = 0;
997  GO numGlobalBoundaryNodes = 0;
998 
999  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1000  if (amalgBoundaryNodes[i])
1001  numLocalBoundaryNodes++;
1002 
1003  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1004  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1005  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
1006  << " agglomerated Dirichlet nodes" << std::endl;
1007  }
1008 
1009  Set(currentLevel, "Graph", graph);
1010  Set(currentLevel, "DofsPerNode", blkSize); // full block size
1011  }
1012 
1013  } else if (algo == "distance laplacian") {
1014  LO blkSize = A->GetFixedBlockSize();
1015  GO indexBase = A->getRowMap()->getIndexBase();
1016  // [*0*] : FIXME
1017  // ap: somehow, if I move this line to [*1*], Belos throws an error
1018  // I'm not sure what's going on. Do we always have to Get data, if we did
1019  // DeclareInput for it?
1020  // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
1021 
1022  // Detect and record rows that correspond to Dirichlet boundary conditions
1023  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
1024  // TODO the array one bigger than the number of local rows, and the last entry can
1025  // TODO hold the actual number of boundary nodes. Clever, huh?
1026  ArrayRCP<bool > pointBoundaryNodes;
1027  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
1028  if (rowSumTol > 0.)
1029  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
1030 
1031  if ( (blkSize == 1) && (threshold == STS::zero()) ) {
1032  // Trivial case: scalar problem, no dropping. Can return original graph
1033  RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
1034  graph->SetBoundaryNodeMap(pointBoundaryNodes);
1035  graphType="unamalgamated";
1036  numTotal = A->getLocalNumEntries();
1037 
1038  if (GetVerbLevel() & Statistics1) {
1039  GO numLocalBoundaryNodes = 0;
1040  GO numGlobalBoundaryNodes = 0;
1041  for (LO i = 0; i < pointBoundaryNodes.size(); ++i)
1042  if (pointBoundaryNodes[i])
1043  numLocalBoundaryNodes++;
1044  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1045  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1046  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1047  }
1048 
1049  Set(currentLevel, "DofsPerNode", blkSize);
1050  Set(currentLevel, "Graph", graph);
1051 
1052  } else {
1053  // ap: We make quite a few assumptions here; general case may be a lot different,
1054  // but much much harder to implement. We assume that:
1055  // 1) all maps are standard maps, not strided maps
1056  // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
1057  // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
1058  //
1059  // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
1060  // but as I totally don't understand that code, here is my solution
1061 
1062  // [*1*]: see [*0*]
1063 
1064  // Check that the number of local coordinates is consistent with the #rows in A
1065  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements()/blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1066  "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size ("<< blkSize <<").");
1067 
1068  const RCP<const Map> colMap = A->getColMap();
1069  RCP<const Map> uniqueMap, nonUniqueMap;
1070  Array<LO> colTranslation;
1071  if (blkSize == 1) {
1072  uniqueMap = A->getRowMap();
1073  nonUniqueMap = A->getColMap();
1074  graphType="unamalgamated";
1075 
1076  } else {
1077  uniqueMap = Coords->getMap();
1078  TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1079  "Different index bases for matrix and coordinates");
1080 
1081  AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1082 
1083  graphType = "amalgamated";
1084  }
1085  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1086 
1087  RCP<RealValuedMultiVector> ghostedCoords;
1088  RCP<Vector> ghostedLaplDiag;
1089  Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1090  if (threshold != STS::zero()) {
1091  // Get ghost coordinates
1092  RCP<const Import> importer;
1093  {
1094  SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1095  if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1096  GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1097  importer = realA->getCrsGraph()->getImporter();
1098  } else {
1099  GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1100  importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1101  }
1102  } //subtimer
1103  ghostedCoords = Xpetra::MultiVectorFactory<real_type,LO,GO,NO>::Build(nonUniqueMap, Coords->getNumVectors());
1104  {
1105  SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1106  ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1107  } //subtimer
1108 
1109  // Construct Distance Laplacian diagonal
1110  RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1111  Array<LO> indicesExtra;
1112  Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1113  if (threshold != STS::zero()) {
1114  const size_t numVectors = ghostedCoords->getNumVectors();
1115  coordData.reserve(numVectors);
1116  for (size_t j = 0; j < numVectors; j++) {
1117  Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
1118  coordData.push_back(tmpData);
1119  }
1120  }
1121  {
1122  SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1123  ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1124  for (LO row = 0; row < numRows; row++) {
1125  ArrayView<const LO> indices;
1126 
1127  if (blkSize == 1) {
1128  ArrayView<const SC> vals;
1129  A->getLocalRowView(row, indices, vals);
1130 
1131  } else {
1132  // Merge rows of A
1133  indicesExtra.resize(0);
1134  MergeRows(*A, row, indicesExtra, colTranslation);
1135  indices = indicesExtra;
1136  }
1137 
1138  LO nnz = indices.size();
1139  bool haveAddedToDiag = false;
1140  for (LO colID = 0; colID < nnz; colID++) {
1141  const LO col = indices[colID];
1142 
1143  if (row != col) {
1144  if(use_dlap_weights == SINGLE_WEIGHTS) {
1145  /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1146  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1147  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1148  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1149  }
1150  else if(use_dlap_weights == BLOCK_WEIGHTS) {
1151  int block_id = row % interleaved_blocksize;
1152  int block_start = block_id * interleaved_blocksize;
1153  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1154  }
1155  else {
1156  // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1157  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1158  }
1159  haveAddedToDiag = true;
1160  }
1161  }
1162  // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1163  // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1164  if (!haveAddedToDiag)
1165  localLaplDiagData[row] = STS::rmax();
1166  }
1167  } //subtimer
1168  {
1169  SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1170  ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1171  ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1172  ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1173  } //subtimer
1174 
1175  } else {
1176  GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1177  }
1178 
1179  // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1180 
1181  // allocate space for the local graph
1182  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
1183  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getLocalNumEntries());
1184 
1185 #ifdef HAVE_MUELU_DEBUG
1186  // DEBUGGING
1187  for(LO i=0; i<(LO)columns.size(); i++) columns[i]=-666;
1188 #endif
1189 
1190  // Extra array for if we're allowing symmetrization with cutting
1191  ArrayRCP<LO> rows_stop;
1192  bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1193  if(use_stop_array)
1194  rows_stop.resize(numRows);
1195 
1196  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
1197 
1198  LO realnnz = 0;
1199  rows[0] = 0;
1200 
1201  Array<LO> indicesExtra;
1202  {
1203  SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1204  Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1205  if (threshold != STS::zero()) {
1206  const size_t numVectors = ghostedCoords->getNumVectors();
1207  coordData.reserve(numVectors);
1208  for (size_t j = 0; j < numVectors; j++) {
1209  Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
1210  coordData.push_back(tmpData);
1211  }
1212  }
1213 
1214  ArrayView<const SC> vals;//CMS hackery
1215  for (LO row = 0; row < numRows; row++) {
1216  ArrayView<const LO> indices;
1217  indicesExtra.resize(0);
1218  bool isBoundary = false;
1219 
1220  if (blkSize == 1) {
1221  // ArrayView<const SC> vals;//CMS uncomment
1222  A->getLocalRowView(row, indices, vals);
1223  isBoundary = pointBoundaryNodes[row];
1224  } else {
1225  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1226  for (LO j = 0; j < blkSize; j++) {
1227  if (!pointBoundaryNodes[row*blkSize+j]) {
1228  isBoundary = false;
1229  break;
1230  }
1231  }
1232 
1233  // Merge rows of A
1234  if (!isBoundary)
1235  MergeRows(*A, row, indicesExtra, colTranslation);
1236  else
1237  indicesExtra.push_back(row);
1238  indices = indicesExtra;
1239  }
1240  numTotal += indices.size();
1241 
1242  LO nnz = indices.size(), rownnz = 0;
1243 
1244  if(use_stop_array) {
1245  rows[row+1] = rows[row]+nnz;
1246  realnnz = rows[row];
1247  }
1248 
1249  if (threshold != STS::zero()) {
1250  // default
1251  if (distanceLaplacianAlgo == defaultAlgo) {
1252  /* Standard Distance Laplacian */
1253  for (LO colID = 0; colID < nnz; colID++) {
1254 
1255  LO col = indices[colID];
1256 
1257  if (row == col) {
1258  columns[realnnz++] = col;
1259  rownnz++;
1260  continue;
1261  }
1262 
1263  // We do not want the distance Laplacian aggregating boundary nodes
1264  if(isBoundary) continue;
1265 
1266  SC laplVal;
1267  if(use_dlap_weights == SINGLE_WEIGHTS) {
1268  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1269  }
1270  else if(use_dlap_weights == BLOCK_WEIGHTS) {
1271  int block_id = row % interleaved_blocksize;
1272  int block_start = block_id * interleaved_blocksize;
1273  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1274  }
1275  else {
1276  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1277  }
1278  real_type aiiajj = STS::magnitude(realThreshold*realThreshold * ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
1279  real_type aij = STS::magnitude(laplVal*laplVal);
1280 
1281  if (aij > aiiajj) {
1282  columns[realnnz++] = col;
1283  rownnz++;
1284  } else {
1285  numDropped++;
1286  }
1287  }
1288  } else {
1289  /* Cut Algorithm */
1290  using DropTol = Details::DropTol<real_type,LO>;
1291  std::vector<DropTol> drop_vec;
1292  drop_vec.reserve(nnz);
1293  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1294  const real_type one = Teuchos::ScalarTraits<real_type>::one();
1295 
1296  // find magnitudes
1297  for (LO colID = 0; colID < nnz; colID++) {
1298 
1299  LO col = indices[colID];
1300 
1301  if (row == col) {
1302  drop_vec.emplace_back( zero, one, colID, false);
1303  continue;
1304  }
1305  // We do not want the distance Laplacian aggregating boundary nodes
1306  if(isBoundary) continue;
1307 
1308  SC laplVal;
1309  if(use_dlap_weights == SINGLE_WEIGHTS) {
1310  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1311  }
1312  else if(use_dlap_weights == BLOCK_WEIGHTS) {
1313  int block_id = row % interleaved_blocksize;
1314  int block_start = block_id * interleaved_blocksize;
1315  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1316  }
1317  else {
1318  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1319  }
1320 
1321  real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
1322  real_type aij = STS::magnitude(laplVal*laplVal);
1323 
1324  drop_vec.emplace_back(aij, aiiajj, colID, false);
1325  }
1326 
1327  const size_t n = drop_vec.size();
1328 
1329  if (distanceLaplacianAlgo == unscaled_cut) {
1330 
1331  std::sort( drop_vec.begin(), drop_vec.end()
1332  , [](DropTol const& a, DropTol const& b) {
1333  return a.val > b.val;
1334  }
1335  );
1336 
1337  bool drop = false;
1338  for (size_t i=1; i<n; ++i) {
1339  if (!drop) {
1340  auto const& x = drop_vec[i-1];
1341  auto const& y = drop_vec[i];
1342  auto a = x.val;
1343  auto b = y.val;
1344  if (a > realThreshold*b) {
1345  drop = true;
1346 #ifdef HAVE_MUELU_DEBUG
1347  if (distanceLaplacianCutVerbose) {
1348  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1349  }
1350 #endif
1351  }
1352  }
1353  drop_vec[i].drop = drop;
1354  }
1355  }
1356  else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1357 
1358  std::sort( drop_vec.begin(), drop_vec.end()
1359  , [](DropTol const& a, DropTol const& b) {
1360  return a.val/a.diag > b.val/b.diag;
1361  }
1362  );
1363 
1364  bool drop = false;
1365  for (size_t i=1; i<n; ++i) {
1366  if (!drop) {
1367  auto const& x = drop_vec[i-1];
1368  auto const& y = drop_vec[i];
1369  auto a = x.val/x.diag;
1370  auto b = y.val/y.diag;
1371  if (a > realThreshold*b) {
1372  drop = true;
1373 #ifdef HAVE_MUELU_DEBUG
1374  if (distanceLaplacianCutVerbose) {
1375  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1376  }
1377 #endif
1378  }
1379  }
1380  drop_vec[i].drop = drop;
1381  }
1382  }
1383 
1384  std::sort( drop_vec.begin(), drop_vec.end()
1385  , [](DropTol const& a, DropTol const& b) {
1386  return a.col < b.col;
1387  }
1388  );
1389 
1390  for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
1391  LO col = indices[drop_vec[idxID].col];
1392 
1393 
1394  // don't drop diagonal
1395  if (row == col) {
1396  columns[realnnz++] = col;
1397  rownnz++;
1398  // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1399  continue;
1400  }
1401 
1402  if (!drop_vec[idxID].drop) {
1403  columns[realnnz++] = col;
1404  // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1405  rownnz++;
1406  } else {
1407  // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1408  numDropped++;
1409  }
1410  }
1411  }
1412  } else {
1413  // Skip laplace calculation and threshold comparison for zero threshold
1414  for (LO colID = 0; colID < nnz; colID++) {
1415  LO col = indices[colID];
1416  columns[realnnz++] = col;
1417  rownnz++;
1418  }
1419  }
1420 
1421  if ( rownnz == 1) {
1422  // If the only element remaining after filtering is diagonal, mark node as boundary
1423  // FIXME: this should really be replaced by the following
1424  // if (indices.size() == 1 && indices[0] == row)
1425  // boundaryNodes[row] = true;
1426  // We do not do it this way now because there is no framework for distinguishing isolated
1427  // and boundary nodes in the aggregation algorithms
1428  amalgBoundaryNodes[row] = true;
1429  }
1430 
1431  if(use_stop_array)
1432  rows_stop[row] = rownnz + rows[row];
1433  else
1434  rows[row+1] = realnnz;
1435  } //for (LO row = 0; row < numRows; row++)
1436 
1437  } //subtimer
1438 
1439  if (use_stop_array) {
1440  // Do symmetrization of the cut matrix
1441  // NOTE: We assume nested row/column maps here
1442  for (LO row = 0; row < numRows; row++) {
1443  for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1444  LO col = columns[colidx];
1445  if(col >= numRows) continue;
1446 
1447  bool found = false;
1448  for(LO t_col = rows[col] ; !found && t_col < rows_stop[col]; t_col++) {
1449  if (columns[t_col] == row)
1450  found = true;
1451  }
1452  // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1453  // into a Dirichlet unknown. In that case don't.
1454  if(!found && !pointBoundaryNodes[col] && rows_stop[col] < rows[col+1]) {
1455  LO new_idx = rows_stop[col];
1456  // printf("(%d,%d) SYMADD entry\n",col,row);
1457  columns[new_idx] = row;
1458  rows_stop[col]++;
1459  numDropped--;
1460  }
1461  }
1462  }
1463 
1464  // Condense everything down
1465  LO current_start=0;
1466  for (LO row = 0; row < numRows; row++) {
1467  LO old_start = current_start;
1468  for (LO col = rows[row]; col < rows_stop[row]; col++) {
1469  if(current_start != col) {
1470  columns[current_start] = columns[col];
1471  }
1472  current_start++;
1473  }
1474  rows[row] = old_start;
1475  }
1476  rows[numRows] = realnnz = current_start;
1477 
1478  }
1479 
1480  columns.resize(realnnz);
1481 
1482  RCP<GraphBase> graph;
1483  {
1484  SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1485  graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1486  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1487  } //subtimer
1488 
1489  if (GetVerbLevel() & Statistics1) {
1490  GO numLocalBoundaryNodes = 0;
1491  GO numGlobalBoundaryNodes = 0;
1492 
1493  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1494  if (amalgBoundaryNodes[i])
1495  numLocalBoundaryNodes++;
1496 
1497  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1498  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1499  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1500  << " using threshold " << dirichletThreshold << std::endl;
1501  }
1502 
1503  Set(currentLevel, "Graph", graph);
1504  Set(currentLevel, "DofsPerNode", blkSize);
1505  }
1506  }
1507 
1508  if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1509  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1510  GO numGlobalTotal, numGlobalDropped;
1511  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1512  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1513  GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1514  if (numGlobalTotal != 0)
1515  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1516  GetOStream(Statistics1) << std::endl;
1517  }
1518 
1519  } else {
1520  //what Tobias has implemented
1521 
1522  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1523  //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1524  GetOStream(Runtime0) << "algorithm = \"" << "failsafe" << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1525  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1526 
1527  RCP<const Map> rowMap = A->getRowMap();
1528  RCP<const Map> colMap = A->getColMap();
1529 
1530  LO blockdim = 1; // block dim for fixed size blocks
1531  GO indexBase = rowMap->getIndexBase(); // index base of maps
1532  GO offset = 0;
1533 
1534  // 1) check for blocking/striding information
1535  if(A->IsView("stridedMaps") &&
1536  Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1537  Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1538  RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1539  TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1540  blockdim = strMap->getFixedBlockSize();
1541  offset = strMap->getOffset();
1542  oldView = A->SwitchToView(oldView);
1543  GetOStream(Statistics1) << "CoalesceDropFactory::Build():" << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1544  } else GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1545 
1546  // 2) get row map for amalgamated matrix (graph of A)
1547  // with same distribution over all procs as row map of A
1548  RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1549  GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1550 
1551  // 3) create graph of amalgamated matrix
1552  RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries()*blockdim);
1553 
1554  LO numRows = A->getRowMap()->getLocalNumElements();
1555  LO numNodes = nodeMap->getLocalNumElements();
1556  const ArrayRCP<bool> amalgBoundaryNodes(numNodes, false);
1557  const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1558  bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1559 
1560  // 4) do amalgamation. generate graph of amalgamated matrix
1561  // Note, this code is much more inefficient than the leightwight implementation
1562  // Most of the work has already been done in the AmalgamationFactory
1563  for(LO row=0; row<numRows; row++) {
1564  // get global DOF id
1565  GO grid = rowMap->getGlobalElement(row);
1566 
1567  // reinitialize boolean helper variable
1568  bIsDiagonalEntry = false;
1569 
1570  // translate grid to nodeid
1571  GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1572 
1573  size_t nnz = A->getNumEntriesInLocalRow(row);
1574  Teuchos::ArrayView<const LO> indices;
1575  Teuchos::ArrayView<const SC> vals;
1576  A->getLocalRowView(row, indices, vals);
1577 
1578  RCP<std::vector<GO> > cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1579  LO realnnz = 0;
1580  for(LO col=0; col<Teuchos::as<LO>(nnz); col++) {
1581  GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1582 
1583  if(vals[col]!=STS::zero()) {
1584  GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1585  cnodeIds->push_back(cnodeId);
1586  realnnz++; // increment number of nnz in matrix row
1587  if (grid == gcid) bIsDiagonalEntry = true;
1588  }
1589  }
1590 
1591  if(realnnz == 1 && bIsDiagonalEntry == true) {
1592  LO lNodeId = nodeMap->getLocalElement(nodeId);
1593  numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1594  if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1595  amalgBoundaryNodes[lNodeId] = true;
1596  }
1597 
1598  Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp( cnodeIds );
1599 
1600  if(arr_cnodeIds.size() > 0 )
1601  crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1602  }
1603  // fill matrix graph
1604  crsGraph->fillComplete(nodeMap,nodeMap);
1605 
1606  // 5) create MueLu Graph object
1607  RCP<GraphBase> graph = rcp(new Graph(crsGraph, "amalgamated graph of A"));
1608 
1609  // Detect and record rows that correspond to Dirichlet boundary conditions
1610  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1611 
1612  if (GetVerbLevel() & Statistics1) {
1613  GO numLocalBoundaryNodes = 0;
1614  GO numGlobalBoundaryNodes = 0;
1615  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1616  if (amalgBoundaryNodes[i])
1617  numLocalBoundaryNodes++;
1618  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1619  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1620  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1621  }
1622 
1623  // 6) store results in Level
1624  //graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1625  Set(currentLevel, "DofsPerNode", blockdim);
1626  Set(currentLevel, "Graph", graph);
1627 
1628  } //if (doExperimentalWrap) ... else ...
1629 
1630 
1631  } //Build
1632 
1633  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1634  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1635  typedef typename ArrayView<const LO>::size_type size_type;
1636 
1637  // extract striding information
1638  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1639  if (A.IsView("stridedMaps") == true) {
1640  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1641  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1642  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1643  if (strMap->getStridedBlockId() > -1)
1644  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1645  }
1646 
1647  // count nonzero entries in all dof rows associated with node row
1648  size_t nnz = 0, pos = 0;
1649  for (LO j = 0; j < blkSize; j++)
1650  nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1651 
1652  if (nnz == 0) {
1653  cols.resize(0);
1654  return;
1655  }
1656 
1657  cols.resize(nnz);
1658 
1659  // loop over all local dof rows associated with local node "row"
1660  ArrayView<const LO> inds;
1661  ArrayView<const SC> vals;
1662  for (LO j = 0; j < blkSize; j++) {
1663  A.getLocalRowView(row*blkSize+j, inds, vals);
1664  size_type numIndices = inds.size();
1665 
1666  if (numIndices == 0) // skip empty dof rows
1667  continue;
1668 
1669  // cols: stores all local node ids for current local node id "row"
1670  cols[pos++] = translation[inds[0]];
1671  for (size_type k = 1; k < numIndices; k++) {
1672  LO nodeID = translation[inds[k]];
1673  // Here we try to speed up the process by reducing the size of an array
1674  // to sort. This works if the column nonzeros belonging to the same
1675  // node are stored consequently.
1676  if (nodeID != cols[pos-1])
1677  cols[pos++] = nodeID;
1678  }
1679  }
1680  cols.resize(pos);
1681  nnz = pos;
1682 
1683  // Sort and remove duplicates
1684  std::sort(cols.begin(), cols.end());
1685  pos = 0;
1686  for (size_t j = 1; j < nnz; j++)
1687  if (cols[j] != cols[pos])
1688  cols[++pos] = cols[j];
1689  cols.resize(pos+1);
1690  }
1691 
1692  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1693  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1694  typedef typename ArrayView<const LO>::size_type size_type;
1695  typedef Teuchos::ScalarTraits<SC> STS;
1696 
1697  // extract striding information
1698  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1699  if (A.IsView("stridedMaps") == true) {
1700  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1701  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1702  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1703  if (strMap->getStridedBlockId() > -1)
1704  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1705  }
1706 
1707  // count nonzero entries in all dof rows associated with node row
1708  size_t nnz = 0, pos = 0;
1709  for (LO j = 0; j < blkSize; j++)
1710  nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1711 
1712  if (nnz == 0) {
1713  cols.resize(0);
1714  return;
1715  }
1716 
1717  cols.resize(nnz);
1718 
1719  // loop over all local dof rows associated with local node "row"
1720  ArrayView<const LO> inds;
1721  ArrayView<const SC> vals;
1722  for (LO j = 0; j < blkSize; j++) {
1723  A.getLocalRowView(row*blkSize+j, inds, vals);
1724  size_type numIndices = inds.size();
1725 
1726  if (numIndices == 0) // skip empty dof rows
1727  continue;
1728 
1729  // cols: stores all local node ids for current local node id "row"
1730  LO prevNodeID = -1;
1731  for (size_type k = 0; k < numIndices; k++) {
1732  LO dofID = inds[k];
1733  LO nodeID = translation[inds[k]];
1734 
1735  // we avoid a square root by using squared values
1736  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold*ghostedDiagVals[dofID]*ghostedDiagVals[row*blkSize+j]); // eps^2 * |a_ii| * |a_jj|
1737  typename STS::magnitudeType aij = STS::magnitude(vals[k]*vals[k]);
1738 
1739  // check dropping criterion
1740  if (aij > aiiajj || (row*blkSize+j == dofID)) {
1741  // accept entry in graph
1742 
1743  // Here we try to speed up the process by reducing the size of an array
1744  // to sort. This works if the column nonzeros belonging to the same
1745  // node are stored consequently.
1746  if (nodeID != prevNodeID) {
1747  cols[pos++] = nodeID;
1748  prevNodeID = nodeID;
1749  }
1750  }
1751  }
1752  }
1753  cols.resize(pos);
1754  nnz = pos;
1755 
1756  // Sort and remove duplicates
1757  std::sort(cols.begin(), cols.end());
1758  pos = 0;
1759  for (size_t j = 1; j < nnz; j++)
1760  if (cols[j] != cols[pos])
1761  cols[++pos] = cols[j];
1762  cols.resize(pos+1);
1763 
1764  return;
1765  }
1766 
1767 
1768 
1769  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1770  Teuchos::RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalize(Level & currentLevel,const RCP<Matrix>& A,bool generate_matrix) const {
1771  typedef Teuchos::ScalarTraits<SC> STS;
1772 
1773  const ParameterList & pL = GetParameterList();
1774  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1775  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1776 
1777  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector> >(currentLevel, "BlockNumber");
1778  RCP<LocalOrdinalVector> ghostedBlockNumber;
1779  GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)"<<std::endl;
1780 
1781  // Ghost the column block numbers if we need to
1782  RCP<const Import> importer = A->getCrsGraph()->getImporter();
1783  if(!importer.is_null()) {
1784  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1785  ghostedBlockNumber= Xpetra::VectorFactory<LO,LO,GO,NO>::Build(importer->getTargetMap());
1786  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1787  }
1788  else {
1789  ghostedBlockNumber = BlockNumber;
1790  }
1791 
1792  // Accessors for block numbers
1793  Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1794  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1795 
1796  // allocate space for the local graph
1797  ArrayRCP<size_t> rows_mat;
1798  ArrayRCP<LO> rows_graph,columns;
1799  ArrayRCP<SC> values;
1800  RCP<CrsMatrixWrap> crs_matrix_wrap;
1801 
1802  if(generate_matrix) {
1803  crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1804  crs_matrix_wrap->getCrsMatrix()->allocateAllValues(A->getLocalNumEntries(), rows_mat, columns, values);
1805  }
1806  else {
1807  rows_graph.resize(A->getLocalNumRows()+1);
1808  columns.resize(A->getLocalNumEntries());
1809  values.resize(A->getLocalNumEntries());
1810  }
1811 
1812  LO realnnz = 0;
1813  GO numDropped = 0, numTotal = 0;
1814  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1815  LO row_block = row_block_number[row];
1816  size_t nnz = A->getNumEntriesInLocalRow(row);
1817  ArrayView<const LO> indices;
1818  ArrayView<const SC> vals;
1819  A->getLocalRowView(row, indices, vals);
1820 
1821  LO rownnz = 0;
1822  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1823  LO col = indices[colID];
1824  LO col_block = col_block_number[col];
1825 
1826  if(row_block == col_block) {
1827  if(generate_matrix) values[realnnz] = vals[colID];
1828  columns[realnnz++] = col;
1829  rownnz++;
1830  } else
1831  numDropped++;
1832  }
1833  if(generate_matrix) rows_mat[row+1] = realnnz;
1834  else rows_graph[row+1] = realnnz;
1835  }
1836 
1837  ArrayRCP<bool> boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
1838  if (rowSumTol > 0.)
1839  Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
1840 
1841 
1842  if(!generate_matrix) {
1843  // We can't resize an Arrayrcp and pass the checks for setAllValues
1844  values.resize(realnnz);
1845  columns.resize(realnnz);
1846  }
1847  numTotal = A->getLocalNumEntries();
1848 
1849  if (GetVerbLevel() & Statistics1) {
1850  GO numLocalBoundaryNodes = 0;
1851  GO numGlobalBoundaryNodes = 0;
1852  for (LO i = 0; i < boundaryNodes.size(); ++i)
1853  if (boundaryNodes[i])
1854  numLocalBoundaryNodes++;
1855  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1856  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1857  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1858 
1859  GO numGlobalTotal, numGlobalDropped;
1860  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1861  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1862  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1863  if (numGlobalTotal != 0)
1864  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1865  GetOStream(Statistics1) << std::endl;
1866  }
1867 
1868  Set(currentLevel, "Filtering", true);
1869 
1870  if(generate_matrix) {
1871  // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1872  // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1873  // here, which is legit, because we never use them anyway.
1874  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat,columns,values);
1875  crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1876  }
1877  else {
1878  RCP<GraphBase> graph = rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1879  graph->SetBoundaryNodeMap(boundaryNodes);
1880  Set(currentLevel, "Graph", graph);
1881  }
1882 
1883 
1884  Set(currentLevel, "DofsPerNode", 1);
1885  return crs_matrix_wrap;
1886  }
1887 
1888 
1889  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1890  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalizeGraph(const RCP<GraphBase> & inputGraph, const RCP<LocalOrdinalVector> & ghostedBlockNumber, RCP<GraphBase> & outputGraph, RCP<const Import> & importer) const {
1891 
1892  TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1893  const ParameterList & pL = GetParameterList();
1894 
1895  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1896 
1897  GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1898  if (localizeColoringGraph)
1899  GetOStream(Statistics1) << ", with localization" <<std::endl;
1900  else
1901  GetOStream(Statistics1) << ", without localization" <<std::endl;
1902 
1903  // Accessors for block numbers
1904  Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1905  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1906 
1907  // allocate space for the local graph
1908  ArrayRCP<size_t> rows_mat;
1909  ArrayRCP<LO> rows_graph,columns;
1910 
1911  rows_graph.resize(inputGraph->GetNodeNumVertices()+1);
1912  columns.resize(inputGraph->GetNodeNumEdges());
1913 
1914  LO realnnz = 0;
1915  GO numDropped = 0, numTotal = 0;
1916  const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getLocalNumElements());
1917  if (localizeColoringGraph) {
1918 
1919  for (LO row = 0; row < numRows; ++row) {
1920  LO row_block = row_block_number[row];
1921  ArrayView<const LO> indices = inputGraph->getNeighborVertices(row);
1922 
1923  LO rownnz = 0;
1924  for (LO colID = 0; colID < Teuchos::as<LO>(indices.size()); colID++) {
1925  LO col = indices[colID];
1926  LO col_block = col_block_number[col];
1927 
1928  if((row_block == col_block) && (col < numRows)) {
1929  columns[realnnz++] = col;
1930  rownnz++;
1931  } else
1932  numDropped++;
1933  }
1934  rows_graph[row+1] = realnnz;
1935  }
1936  } else {
1937  // ghosting of boundary node map
1938  Teuchos::ArrayRCP<const bool> boundaryNodes = inputGraph->GetBoundaryNodeMap();
1939  auto boundaryNodesVector = Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(inputGraph->GetDomainMap());
1940  for (size_t i=0; i<inputGraph->GetNodeNumVertices(); i++)
1941  boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1942  // Xpetra::IO<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1943  auto boundaryColumnVector = Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(inputGraph->GetImportMap());
1944  boundaryColumnVector->doImport(*boundaryNodesVector,*importer, Xpetra::INSERT);
1945  auto boundaryColumn = boundaryColumnVector->getData(0);
1946 
1947  for (LO row = 0; row < numRows; ++row) {
1948  LO row_block = row_block_number[row];
1949  ArrayView<const LO> indices = inputGraph->getNeighborVertices(row);
1950 
1951  LO rownnz = 0;
1952  for (LO colID = 0; colID < Teuchos::as<LO>(indices.size()); colID++) {
1953  LO col = indices[colID];
1954  LO col_block = col_block_number[col];
1955 
1956  if((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1957  columns[realnnz++] = col;
1958  rownnz++;
1959  } else
1960  numDropped++;
1961  }
1962  rows_graph[row+1] = realnnz;
1963  }
1964  }
1965 
1966  columns.resize(realnnz);
1967  numTotal = inputGraph->GetNodeNumEdges();
1968 
1969  if (GetVerbLevel() & Statistics1) {
1970  RCP<const Teuchos::Comm<int> > comm = inputGraph->GetDomainMap()->getComm();
1971  GO numGlobalTotal, numGlobalDropped;
1972  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1973  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1974  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1975  if (numGlobalTotal != 0)
1976  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1977  GetOStream(Statistics1) << std::endl;
1978  }
1979 
1980  if (localizeColoringGraph) {
1981  outputGraph = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1982  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1983  } else {
1984  TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
1985 #ifdef HAVE_XPETRA_TPETRA
1986  auto outputGraph2 = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1987 
1988  auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
1989  auto sym = rcp(new Tpetra::CrsGraphTransposer<LocalOrdinal,GlobalOrdinal,Node>(tpGraph));
1990  auto tpGraphSym = sym->symmetrize();
1991 
1992  auto colIndsSym = // FIXME persistingView is temporary; better fix would be change to LWGraph constructor
1993  Kokkos::Compat::persistingView(tpGraphSym->getLocalIndicesHost());
1994 
1995  auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
1996  ArrayRCP<LO> rows_graphSym;
1997  rows_graphSym.resize(rowsSym.size());
1998  for (size_t row = 0; row < rowsSym.size(); row++)
1999  rows_graphSym[row] = rowsSym[row];
2000  outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
2001  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
2002 #endif
2003  }
2004 
2005  }
2006 
2007 
2008 
2009 } //namespace MueLu
2010 
2011 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
Important warning messages (one line)
Exception indicating invalid cast attempted.
#define MueLu_sumAll(rcpComm, in, out)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
MueLu representation of a compressed row storage graph.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::magnitude(0.), const bool count_twos_as_dirichlet=false)
Timer to be used in factories. Similar to Monitor but with additional timers.
Print more statistics.
DropTol & operator=(DropTol const &)=default
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Exception throws to report incompatible objects (like maps).
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
Additional warnings.
void DeclareInput(Level &currentLevel) const
Input.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
#define SET_VALID_ENTRY(name)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
void Build(Level &currentLevel) const
Build an object with this factory.
Print class parameters.
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
static Teuchos::ArrayRCP< Magnitude > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Lightweight MueLu representation of a compressed row storage graph.
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
Exception throws to report errors in the internal logical of the program.
void BlockDiagonalizeGraph(const RCP< GraphBase > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< GraphBase > &outputGraph, RCP< const Import > &importer) const