MueLu  Version of the Day
MueLu_RefMaxwell_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_REFMAXWELL_DEF_HPP
47 #define MUELU_REFMAXWELL_DEF_HPP
48 
49 #include <sstream>
50 
51 #include "MueLu_ConfigDefs.hpp"
52 
53 #include "Xpetra_Map.hpp"
54 #include "Xpetra_MatrixMatrix.hpp"
55 #include "Xpetra_TripleMatrixMultiply.hpp"
56 #include "Xpetra_CrsMatrixUtils.hpp"
57 #include "Xpetra_MatrixUtils.hpp"
58 
60 
61 #include "MueLu_AmalgamationFactory.hpp"
62 #include "MueLu_RAPFactory.hpp"
63 #include "MueLu_ThresholdAFilterFactory.hpp"
64 #include "MueLu_TransPFactory.hpp"
65 #include "MueLu_SmootherFactory.hpp"
66 
67 #include "MueLu_CoalesceDropFactory.hpp"
68 #include "MueLu_CoarseMapFactory.hpp"
69 #include "MueLu_CoordinatesTransferFactory.hpp"
70 #include "MueLu_UncoupledAggregationFactory.hpp"
71 #include "MueLu_TentativePFactory.hpp"
72 #include "MueLu_SaPFactory.hpp"
73 #include "MueLu_AggregationExportFactory.hpp"
74 #include "MueLu_Utilities.hpp"
75 #include "MueLu_Maxwell_Utils.hpp"
76 
77 #include "MueLu_AmalgamationFactory_kokkos.hpp"
78 #include "MueLu_CoalesceDropFactory_kokkos.hpp"
79 #include "MueLu_CoarseMapFactory_kokkos.hpp"
80 #include "MueLu_CoordinatesTransferFactory_kokkos.hpp"
81 #include "MueLu_UncoupledAggregationFactory_kokkos.hpp"
82 #include "MueLu_TentativePFactory_kokkos.hpp"
83 #include "MueLu_SaPFactory_kokkos.hpp"
84 #include "MueLu_Utilities_kokkos.hpp"
85 #include <Kokkos_Core.hpp>
86 #include <KokkosSparse_CrsMatrix.hpp>
87 
88 #include "MueLu_ZoltanInterface.hpp"
89 #include "MueLu_Zoltan2Interface.hpp"
90 #include "MueLu_RepartitionHeuristicFactory.hpp"
91 #include "MueLu_RepartitionFactory.hpp"
92 #include "MueLu_RebalanceAcFactory.hpp"
93 #include "MueLu_RebalanceTransferFactory.hpp"
94 
95 #include "MueLu_VerbosityLevel.hpp"
96 
99 
100 #ifdef HAVE_MUELU_CUDA
101 #include "cuda_profiler_api.h"
102 #endif
103 
104 // Stratimikos
105 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
106 #include <Xpetra_ThyraLinearOp.hpp>
107 #endif
108 
109 
110 namespace MueLu {
111 
112  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
113  Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getDomainMap() const {
114  return SM_Matrix_->getDomainMap();
115  }
116 
117 
118  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
119  Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getRangeMap() const {
120  return SM_Matrix_->getRangeMap();
121  }
122 
123 
124  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
126 
127  if (list.isType<std::string>("parameterlist: syntax") && list.get<std::string>("parameterlist: syntax") == "ml") {
128  Teuchos::ParameterList newList = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list,"refmaxwell"));
129  if(list.isSublist("refmaxwell: 11list") && list.sublist("refmaxwell: 11list").isSublist("edge matrix free: coarse"))
130  newList.sublist("refmaxwell: 11list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse"),"SA"));
131  if(list.isSublist("refmaxwell: 22list"))
132  newList.sublist("refmaxwell: 22list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 22list"),"SA"));
133  list = newList;
134  }
135 
136  parameterList_ = list;
137  std::string verbosityLevel = parameterList_.get<std::string>("verbosity", MasterList::getDefault<std::string>("verbosity"));
139  std::string outputFilename = parameterList_.get<std::string>("output filename", MasterList::getDefault<std::string>("output filename"));
140  if (outputFilename != "")
141  VerboseObject::SetMueLuOFileStream(outputFilename);
142  if (parameterList_.isType<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"))
143  VerboseObject::SetMueLuOStream(parameterList_.get<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"));
144 
145  if (parameterList_.get("print initial parameters",MasterList::getDefault<bool>("print initial parameters")))
146  GetOStream(static_cast<MsgType>(Runtime1), 0) << parameterList_ << std::endl;
147  disable_addon_ = list.get("refmaxwell: disable addon", MasterList::getDefault<bool>("refmaxwell: disable addon"));
148  mode_ = list.get("refmaxwell: mode", MasterList::getDefault<std::string>("refmaxwell: mode"));
149  use_as_preconditioner_ = list.get("refmaxwell: use as preconditioner", MasterList::getDefault<bool>("refmaxwell: use as preconditioner"));
150  dump_matrices_ = list.get("refmaxwell: dump matrices", MasterList::getDefault<bool>("refmaxwell: dump matrices"));
151  enable_reuse_ = list.get("refmaxwell: enable reuse", MasterList::getDefault<bool>("refmaxwell: enable reuse"));
152  implicitTranspose_ = list.get("transpose: use implicit", MasterList::getDefault<bool>("transpose: use implicit"));
153  fuseProlongationAndUpdate_ = list.get("fuse prolongation and update", MasterList::getDefault<bool>("fuse prolongation and update"));
154  skipFirstLevel_ = list.get("refmaxwell: skip first (1,1) level", MasterList::getDefault<bool>("refmaxwell: skip first (1,1) level"));
155  syncTimers_ = list.get("sync timers", false);
156  numItersH_ = list.get("refmaxwell: num iters H", 1);
157  numIters22_ = list.get("refmaxwell: num iters 22", 1);
158  applyBCsToAnodal_ = list.get("refmaxwell: apply BCs to Anodal", false);
159  applyBCsToH_ = list.get("refmaxwell: apply BCs to H", true);
160  applyBCsTo22_ = list.get("refmaxwell: apply BCs to 22", true);
161 
162  if(list.isSublist("refmaxwell: 11list"))
163  precList11_ = list.sublist("refmaxwell: 11list");
164  if(!precList11_.isType<std::string>("Preconditioner Type") &&
165  !precList11_.isType<std::string>("smoother: type") &&
166  !precList11_.isType<std::string>("smoother: pre type") &&
167  !precList11_.isType<std::string>("smoother: post type")) {
168  precList11_.set("smoother: type", "CHEBYSHEV");
169  precList11_.sublist("smoother: params").set("chebyshev: degree",2);
170  precList11_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",5.4);
171  precList11_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
172  }
173 
174  if(list.isSublist("refmaxwell: 22list"))
175  precList22_ = list.sublist("refmaxwell: 22list");
176  if(!precList22_.isType<std::string>("Preconditioner Type") &&
177  !precList22_.isType<std::string>("smoother: type") &&
178  !precList22_.isType<std::string>("smoother: pre type") &&
179  !precList22_.isType<std::string>("smoother: post type")) {
180  precList22_.set("smoother: type", "CHEBYSHEV");
181  precList22_.sublist("smoother: params").set("chebyshev: degree",2);
182  precList22_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",7.0);
183  precList22_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
184  }
185 
186  if(!list.isType<std::string>("smoother: type") && !list.isType<std::string>("smoother: pre type") && !list.isType<std::string>("smoother: post type")) {
187  list.set("smoother: type", "CHEBYSHEV");
188  list.sublist("smoother: params").set("chebyshev: degree",2);
189  list.sublist("smoother: params").set("chebyshev: ratio eigenvalue",20.0);
190  list.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
191  }
192 
193  if(list.isSublist("smoother: params")) {
194  smootherList_ = list.sublist("smoother: params");
195  }
196 
197  if (enable_reuse_ &&
198  !precList11_.isType<std::string>("Preconditioner Type") &&
199  !precList11_.isParameter("reuse: type"))
200  precList11_.set("reuse: type", "full");
201  if (enable_reuse_ &&
202  !precList22_.isType<std::string>("Preconditioner Type") &&
203  !precList22_.isParameter("reuse: type"))
204  precList22_.set("reuse: type", "full");
205 
206 # ifdef HAVE_MUELU_SERIAL
207  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosSerialWrapperNode).name())
208  useKokkos_ = false;
209 # endif
210 # ifdef HAVE_MUELU_OPENMP
211  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosOpenMPWrapperNode).name())
212  useKokkos_ = true;
213 # endif
214 # ifdef HAVE_MUELU_CUDA
215  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosCudaWrapperNode).name())
216  useKokkos_ = true;
217 # endif
218 # ifdef HAVE_MUELU_HIP
219  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosHIPWrapperNode).name())
220  useKokkos_ = true;
221 # endif
222  useKokkos_ = list.get("use kokkos refactor",useKokkos_);
223  }
224 
225 
226 
227  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
229 
230 #ifdef HAVE_MUELU_CUDA
231  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStart();
232 #endif
233 
234  std::string timerLabel;
235  if (reuse)
236  timerLabel = "MueLu RefMaxwell: compute (reuse)";
237  else
238  timerLabel = "MueLu RefMaxwell: compute";
239  RCP<Teuchos::TimeMonitor> tmCompute = getTimer(timerLabel);
240 
242  // Remove explicit zeros from matrices
243  Maxwell_Utils<SC,LO,GO,NO>::removeExplicitZeros(parameterList_,D0_Matrix_,SM_Matrix_,M1_Matrix_,Ms_Matrix_);
244 
245  if (IsPrint(Statistics2)) {
246  RCP<ParameterList> params = rcp(new ParameterList());;
247  params->set("printLoadBalancingInfo", true);
248  params->set("printCommInfo", true);
249  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*SM_Matrix_, "SM_Matrix", params);
250  }
251 
253  // Detect Dirichlet boundary conditions
254  if (!reuse) {
255  magnitudeType rowSumTol = parameterList_.get("refmaxwell: row sum drop tol (1,1)",-1.0);
256  Maxwell_Utils<SC,LO,GO,NO>::detectBoundaryConditionsSM(SM_Matrix_,D0_Matrix_,rowSumTol,
257  useKokkos_,BCrowsKokkos_,BCcolsKokkos_,BCdomainKokkos_,
258  BCedges_,BCnodes_,BCrows_,BCcols_,BCdomain_,
259  allEdgesBoundary_,allNodesBoundary_);
260  if (IsPrint(Statistics2)) {
261  GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCedges_ << " BC rows and " << BCnodes_ << " BC columns." << std::endl;
262  }
263  }
264 
265  if (allEdgesBoundary_) {
266  // All edges have been detected as boundary edges.
267  // Do not attempt to construct sub-hierarchies, but just set up a single level preconditioner.
268  GetOStream(Warnings0) << "All edges are detected as boundary edges!" << std::endl;
269  mode_ = "none";
270  setFineLevelSmoother();
271  return;
272  }
273 
275  // build nullspace if necessary
276 
277  if(Nullspace_ != null) {
278  // no need to do anything - nullspace is built
279  TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
280  }
281  else if(Nullspace_ == null && Coords_ != null) {
282  RCP<MultiVector> CoordsSC;
283  if (useKokkos_)
285  else
286  CoordsSC = Utilities::RealValuedToScalarMultiVector(Coords_);
287  Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
288  D0_Matrix_->apply(*CoordsSC,*Nullspace_);
289 
290  bool normalize = parameterList_.get<bool>("refmaxwell: normalize nullspace", MasterList::getDefault<bool>("refmaxwell: normalize nullspace"));
291 
292  coordinateType minLen, maxLen, meanLen;
293  if (IsPrint(Statistics2) || normalize){
294  // compute edge lengths
295  ArrayRCP<ArrayRCP<const Scalar> > localNullspace(Nullspace_->getNumVectors());
296  for (size_t i = 0; i < Nullspace_->getNumVectors(); i++)
297  localNullspace[i] = Nullspace_->getData(i);
298  coordinateType localMinLen = Teuchos::ScalarTraits<coordinateType>::rmax();
299  coordinateType localMeanLen = Teuchos::ScalarTraits<coordinateType>::zero();
300  coordinateType localMaxLen = Teuchos::ScalarTraits<coordinateType>::zero();
301  for (size_t j=0; j < Nullspace_->getMap()->getLocalNumElements(); j++) {
302  Scalar lenSC = Teuchos::ScalarTraits<Scalar>::zero();
303  for (size_t i=0; i < Nullspace_->getNumVectors(); i++)
304  lenSC += localNullspace[i][j]*localNullspace[i][j];
305  coordinateType len = Teuchos::as<coordinateType>(Teuchos::ScalarTraits<Scalar>::real(Teuchos::ScalarTraits<Scalar>::squareroot(lenSC)));
306  localMinLen = std::min(localMinLen, len);
307  localMaxLen = std::max(localMaxLen, len);
308  localMeanLen += len;
309  }
310 
311  RCP<const Teuchos::Comm<int> > comm = Nullspace_->getMap()->getComm();
312  MueLu_minAll(comm, localMinLen, minLen);
313  MueLu_sumAll(comm, localMeanLen, meanLen);
314  MueLu_maxAll(comm, localMaxLen, maxLen);
315  meanLen /= Nullspace_->getMap()->getGlobalNumElements();
316  }
317 
318  if (IsPrint(Statistics2)) {
319  GetOStream(Statistics0) << "Edge length (min/mean/max): " << minLen << " / " << meanLen << " / " << maxLen << std::endl;
320  }
321 
322  if (normalize) {
323  // normalize the nullspace
324  GetOStream(Runtime0) << "RefMaxwell::compute(): normalizing nullspace" << std::endl;
325 
326  const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
327 
328  Array<Scalar> normsSC(Coords_->getNumVectors(), one / Teuchos::as<Scalar>(meanLen));
329  Nullspace_->scale(normsSC());
330  }
331  }
332  else {
333  GetOStream(Errors) << "MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
334  }
335 
336  if (!reuse && skipFirstLevel_) {
337  // Nuke the BC edges in nullspace
338  if (useKokkos_)
339  Utilities_kokkos::ZeroDirichletRows(Nullspace_,BCrowsKokkos_);
340  else
341  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
342  dump(*Nullspace_, "nullspace.m");
343  }
344 
346  // build special prolongator for (1,1)-block
347 
348  if(P11_.is_null()) {
349  if (skipFirstLevel_) {
350  // Form A_nodal = D0* Ms D0 (aka TMT_agg)
351  std::string label("D0*Ms*D0");
352  A_nodal_Matrix_ = Maxwell_Utils<SC,LO,GO,NO>::PtAPWrapper(Ms_Matrix_,D0_Matrix_,parameterList_,label);
353 
354  if (applyBCsToAnodal_) {
355  // Apply boundary conditions to A_nodal
356  if (useKokkos_)
357  Utilities_kokkos::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomainKokkos_);
358  else
359  Utilities::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomain_);
360  }
361  dump(*A_nodal_Matrix_, "A_nodal.m");
362  }
363 
364  // build special prolongator
365  GetOStream(Runtime0) << "RefMaxwell::compute(): building special prolongator" << std::endl;
366  buildProlongator();
367  }
368 
369 #ifdef HAVE_MPI
370  bool doRebalancing = false;
371  doRebalancing = parameterList_.get<bool>("refmaxwell: subsolves on subcommunicators", MasterList::getDefault<bool>("refmaxwell: subsolves on subcommunicators"));
372  int rebalanceStriding = parameterList_.get<int>("refmaxwell: subsolves striding", -1);
373  int numProcsAH, numProcsA22;
374  int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
375  if (numProcs == 1)
376  doRebalancing = false;
377 #endif
378  {
379  // build coarse grid operator for (1,1)-block
380  formCoarseMatrix();
381 
382  if (!reuse) {
383 #ifdef HAVE_MPI
384  if (doRebalancing) {
385 
386  {
387  // decide on number of ranks for coarse (1, 1) problem
388 
389  Level level;
390  level.SetFactoryManager(null);
391  level.SetLevelID(0);
392  level.Set("A",AH_);
393 
394  auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
395  ParameterList repartheurParams;
396  repartheurParams.set("repartition: start level", 0);
397  // Setting min == target on purpose.
398  int defaultTargetRows = 10000;
399  repartheurParams.set("repartition: min rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
400  repartheurParams.set("repartition: target rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
401  repartheurParams.set("repartition: min rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
402  repartheurParams.set("repartition: target rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
403  repartheurParams.set("repartition: max imbalance", precList11_.get<double>("repartition: max imbalance", 1.1));
404  repartheurFactory->SetParameterList(repartheurParams);
405 
406  level.Request("number of partitions", repartheurFactory.get());
407  repartheurFactory->Build(level);
408  numProcsAH = level.Get<int>("number of partitions", repartheurFactory.get());
409  numProcsAH = std::min(numProcsAH,numProcs);
410  }
411 
412  {
413  // decide on number of ranks for (2, 2) problem
414 
415  Level level;
416  level.SetFactoryManager(null);
417  level.SetLevelID(0);
418 
419  level.Set("Map",D0_Matrix_->getDomainMap());
420 
421  auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
422  ParameterList repartheurParams;
423  repartheurParams.set("repartition: start level", 0);
424  repartheurParams.set("repartition: use map", true);
425  // Setting min == target on purpose.
426  int defaultTargetRows = 10000;
427  repartheurParams.set("repartition: min rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
428  repartheurParams.set("repartition: target rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
429  repartheurParams.set("repartition: min rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
430  repartheurParams.set("repartition: target rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
431  // repartheurParams.set("repartition: max imbalance", precList22_.get<double>("repartition: max imbalance", 1.1));
432  repartheurFactory->SetParameterList(repartheurParams);
433 
434  level.Request("number of partitions", repartheurFactory.get());
435  repartheurFactory->Build(level);
436  numProcsA22 = level.Get<int>("number of partitions", repartheurFactory.get());
437  numProcsA22 = std::min(numProcsA22,numProcs);
438  }
439 
440  if (rebalanceStriding >= 1) {
441  TEUCHOS_ASSERT(rebalanceStriding*numProcsAH<=numProcs);
442  TEUCHOS_ASSERT(rebalanceStriding*numProcsA22<=numProcs);
443  if (rebalanceStriding*(numProcsAH+numProcsA22)>numProcs) {
444  GetOStream(Warnings0) << "RefMaxwell::compute(): Disabling striding = " << rebalanceStriding << ", since AH needs " << numProcsAH
445  << " procs and A22 needs " << numProcsA22 << " procs."<< std::endl;
446  rebalanceStriding = -1;
447  }
448  }
449 
450  // }
451 
452  if ((numProcsAH < 0) || (numProcsA22 < 0) || (numProcsAH + numProcsA22 > numProcs)) {
453  GetOStream(Warnings0) << "RefMaxwell::compute(): Disabling rebalancing of subsolves, since partition heuristic resulted "
454  << "in undesirable number of partitions: " << numProcsAH << ", " << numProcsA22 << std::endl;
455  doRebalancing = false;
456  }
457 
458  // check again, as we could have changed the value above
459  if (doRebalancing) {
460  // rebalance AH
461  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Rebalance AH");
462 
463  Level fineLevel, coarseLevel;
464  fineLevel.SetFactoryManager(null);
465  coarseLevel.SetFactoryManager(null);
466  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
467  fineLevel.SetLevelID(0);
468  coarseLevel.SetLevelID(1);
469  coarseLevel.Set("A",AH_);
470  coarseLevel.Set("P",P11_);
471  coarseLevel.Set("Coordinates",CoordsH_);
472  if (!NullspaceH_.is_null())
473  coarseLevel.Set("Nullspace",NullspaceH_);
474  coarseLevel.Set("number of partitions", numProcsAH);
475  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
476 
477  coarseLevel.setlib(AH_->getDomainMap()->lib());
478  fineLevel.setlib(AH_->getDomainMap()->lib());
479  coarseLevel.setObjectLabel("RefMaxwell coarse (1,1)");
480  fineLevel.setObjectLabel("RefMaxwell coarse (1,1)");
481 
482  std::string partName = precList11_.get<std::string>("repartition: partitioner", "zoltan2");
483  RCP<Factory> partitioner;
484  if (partName == "zoltan") {
485 #ifdef HAVE_MUELU_ZOLTAN
486  partitioner = rcp(new ZoltanInterface());
487  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
488  // partitioner->SetFactory("number of partitions", repartheurFactory);
489 #else
490  throw Exceptions::RuntimeError("Zoltan interface is not available");
491 #endif
492  } else if (partName == "zoltan2") {
493 #ifdef HAVE_MUELU_ZOLTAN2
494  partitioner = rcp(new Zoltan2Interface());
495  ParameterList partParams;
496  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList11_.sublist("repartition: params", false)));
497  partParams.set("ParameterList", partpartParams);
498  partitioner->SetParameterList(partParams);
499  // partitioner->SetFactory("number of partitions", repartheurFactory);
500 #else
501  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
502 #endif
503  }
504 
505  auto repartFactory = rcp(new RepartitionFactory());
506  ParameterList repartParams;
507  repartParams.set("repartition: print partition distribution", precList11_.get<bool>("repartition: print partition distribution", false));
508  repartParams.set("repartition: remap parts", precList11_.get<bool>("repartition: remap parts", true));
509  if (rebalanceStriding >= 1) {
510  bool acceptPart = (SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
511  if (SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsAH*rebalanceStriding)
512  acceptPart = false;
513  repartParams.set("repartition: remap accept partition", acceptPart);
514  }
515  repartFactory->SetParameterList(repartParams);
516  // repartFactory->SetFactory("number of partitions", repartheurFactory);
517  repartFactory->SetFactory("Partition", partitioner);
518 
519  auto newP = rcp(new RebalanceTransferFactory());
520  ParameterList newPparams;
521  newPparams.set("type", "Interpolation");
522  newPparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
523  newPparams.set("repartition: use subcommunicators", true);
524  newPparams.set("repartition: rebalance Nullspace", !NullspaceH_.is_null());
525  newP->SetFactory("Coordinates", NoFactory::getRCP());
526  if (!NullspaceH_.is_null())
527  newP->SetFactory("Nullspace", NoFactory::getRCP());
528  newP->SetParameterList(newPparams);
529  newP->SetFactory("Importer", repartFactory);
530 
531  auto newA = rcp(new RebalanceAcFactory());
532  ParameterList rebAcParams;
533  rebAcParams.set("repartition: use subcommunicators", true);
534  newA->SetParameterList(rebAcParams);
535  newA->SetFactory("Importer", repartFactory);
536 
537  coarseLevel.Request("P", newP.get());
538  coarseLevel.Request("Importer", repartFactory.get());
539  coarseLevel.Request("A", newA.get());
540  coarseLevel.Request("Coordinates", newP.get());
541  if (!NullspaceH_.is_null())
542  coarseLevel.Request("Nullspace", newP.get());
543  repartFactory->Build(coarseLevel);
544 
545  if (!precList11_.get<bool>("repartition: rebalance P and R", false))
546  ImporterH_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
547  P11_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
548  AH_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
549  CoordsH_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
550  if (!NullspaceH_.is_null())
551  NullspaceH_ = coarseLevel.Get< RCP<MultiVector> >("Nullspace", newP.get());
552 
553  AH_AP_reuse_data_ = Teuchos::null;
554  AH_RAP_reuse_data_ = Teuchos::null;
555 
556  if (!disable_addon_ && enable_reuse_) {
557  // Rebalance the addon for next setup
558  RCP<const Import> ImporterH = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
559  RCP<const Map> targetMap = ImporterH->getTargetMap();
560  ParameterList XpetraList;
561  XpetraList.set("Restrict Communicator",true);
562  Addon_Matrix_ = MatrixFactory::Build(Addon_Matrix_, *ImporterH, *ImporterH, targetMap, targetMap, rcp(&XpetraList,false));
563  }
564  }
565  }
566 #endif // HAVE_MPI
567 
568  // This should be taken out again as soon as
569  // CoalesceDropFactory_kokkos supports BlockSize > 1 and
570  // drop tol != 0.0
571  if (useKokkos_ && precList11_.isParameter("aggregation: drop tol") && precList11_.get<double>("aggregation: drop tol") != 0.0) {
572  GetOStream(Warnings0) << "RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
573  << "support BlockSize > 1 and drop tol != 0.0" << std::endl;
574  precList11_.set("aggregation: drop tol", 0.0);
575  }
576  dump(*P11_, "P11.m");
577 
578  if (!implicitTranspose_) {
579  if (useKokkos_)
580  R11_ = Utilities_kokkos::Transpose(*P11_);
581  else
582  R11_ = Utilities::Transpose(*P11_);
583  dump(*R11_, "R11.m");
584  }
585  }
586 
588  if (!AH_.is_null()) {
589  // set fixed block size for vector nodal matrix
590  size_t dim = Nullspace_->getNumVectors();
591  AH_->SetFixedBlockSize(dim);
592  AH_->setObjectLabel("RefMaxwell coarse (1,1)");
593  dump(*AH_, "AH.m");
594  int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
595  if (IsPrint(Statistics2)) {
596  RCP<ParameterList> params = rcp(new ParameterList());;
597  params->set("printLoadBalancingInfo", true);
598  params->set("printCommInfo", true);
599  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*AH_, "AH", params);
600  }
601 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
602  if (precList11_.isType<std::string>("Preconditioner Type")) {
603  // build a Stratimikos preconditioner
604  if (precList11_.get<std::string>("Preconditioner Type") == "MueLu") {
605  ParameterList& userParamList = precList11_.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
606  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
607  }
608  thyraPrecOpH_ = rcp(new XpetraThyraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,Node>(AH_, rcp(&precList11_, false)));
609  } else
610 #endif
611  {
612  // build a MueLu hierarchy
613 
614  if (!reuse) {
615  dumpCoords(*CoordsH_, "coordsH.m");
616  if (!NullspaceH_.is_null())
617  dump(*NullspaceH_, "NullspaceH.m");
618  ParameterList& userParamList = precList11_.sublist("user data");
619  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
620  if (!NullspaceH_.is_null())
621  userParamList.set<RCP<MultiVector> >("Nullspace", NullspaceH_);
622  HierarchyH_ = MueLu::CreateXpetraPreconditioner(AH_, precList11_);
623  } else {
624  RCP<MueLu::Level> level0 = HierarchyH_->GetLevel(0);
625  level0->Set("A", AH_);
626  HierarchyH_->SetupRe();
627  }
628  }
629  SetProcRankVerbose(oldRank);
630  }
631  VerboseObject::SetDefaultVerbLevel(verbosityLevel);
632 
633  }
634 
635  if(!reuse && applyBCsTo22_) {
636  GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC nodes of D0" << std::endl;
637 
638  D0_Matrix_->resumeFill();
639  Scalar replaceWith;
640  if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
641  replaceWith= Teuchos::ScalarTraits<SC>::eps();
642  else
643  replaceWith = Teuchos::ScalarTraits<SC>::zero();
644  if (useKokkos_) {
645  Utilities_kokkos::ZeroDirichletCols(D0_Matrix_,BCcolsKokkos_,replaceWith);
646  } else {
647  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
648  }
649  D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
650  }
651 
653  // Build A22 = D0* SM D0 and hierarchy for A22
654  if (!allNodesBoundary_) {
655  GetOStream(Runtime0) << "RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
656 
657  if (!reuse) { // build fine grid operator for (2,2)-block, D0* SM D0 (aka TMT)
658  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
659 
660  Level fineLevel, coarseLevel;
661  fineLevel.SetFactoryManager(null);
662  coarseLevel.SetFactoryManager(null);
663  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
664  fineLevel.SetLevelID(0);
665  coarseLevel.SetLevelID(1);
666  fineLevel.Set("A",SM_Matrix_);
667  coarseLevel.Set("P",D0_Matrix_);
668  coarseLevel.Set("Coordinates",Coords_);
669 
670  coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
671  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
672  coarseLevel.setObjectLabel("RefMaxwell (2,2)");
673  fineLevel.setObjectLabel("RefMaxwell (2,2)");
674 
675  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
676  ParameterList rapList = *(rapFact->GetValidParameterList());
677  rapList.set("transpose: use implicit", true);
678  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
679  rapList.set("rap: fix zero diagonals threshold", parameterList_.get<double>("rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
680  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
681  rapFact->SetParameterList(rapList);
682 
683  if (!A22_AP_reuse_data_.is_null()) {
684  coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
685  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", A22_AP_reuse_data_, rapFact.get());
686  }
687  if (!A22_RAP_reuse_data_.is_null()) {
688  coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
689  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", A22_RAP_reuse_data_, rapFact.get());
690  }
691 
692 #ifdef HAVE_MPI
693  if (doRebalancing) {
694 
695  coarseLevel.Set("number of partitions", numProcsA22);
696  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
697 
698  std::string partName = precList22_.get<std::string>("repartition: partitioner", "zoltan2");
699  RCP<Factory> partitioner;
700  if (partName == "zoltan") {
701 #ifdef HAVE_MUELU_ZOLTAN
702  partitioner = rcp(new ZoltanInterface());
703  partitioner->SetFactory("A", rapFact);
704  // partitioner->SetFactory("number of partitions", repartheurFactory);
705  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
706 #else
707  throw Exceptions::RuntimeError("Zoltan interface is not available");
708 #endif
709  } else if (partName == "zoltan2") {
710 #ifdef HAVE_MUELU_ZOLTAN2
711  partitioner = rcp(new Zoltan2Interface());
712  ParameterList partParams;
713  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList22_.sublist("repartition: params", false)));
714  partParams.set("ParameterList", partpartParams);
715  partitioner->SetParameterList(partParams);
716  partitioner->SetFactory("A", rapFact);
717  // partitioner->SetFactory("number of partitions", repartheurFactory);
718 #else
719  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
720 #endif
721  }
722 
723  auto repartFactory = rcp(new RepartitionFactory());
724  ParameterList repartParams;
725  repartParams.set("repartition: print partition distribution", precList22_.get<bool>("repartition: print partition distribution", false));
726  repartParams.set("repartition: remap parts", precList22_.get<bool>("repartition: remap parts", true));
727  if (rebalanceStriding >= 1) {
728  bool acceptPart = ((SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
729  if (SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22*rebalanceStriding)
730  acceptPart = false;
731  if (acceptPart)
732  TEUCHOS_ASSERT(AH_.is_null());
733  repartParams.set("repartition: remap accept partition", acceptPart);
734  } else
735  repartParams.set("repartition: remap accept partition", AH_.is_null());
736  repartFactory->SetParameterList(repartParams);
737  repartFactory->SetFactory("A", rapFact);
738  // repartFactory->SetFactory("number of partitions", repartheurFactory);
739  repartFactory->SetFactory("Partition", partitioner);
740 
741  auto newP = rcp(new RebalanceTransferFactory());
742  ParameterList newPparams;
743  newPparams.set("type", "Interpolation");
744  newPparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
745  newPparams.set("repartition: use subcommunicators", true);
746  newPparams.set("repartition: rebalance Nullspace", false);
747  newP->SetFactory("Coordinates", NoFactory::getRCP());
748  newP->SetParameterList(newPparams);
749  newP->SetFactory("Importer", repartFactory);
750 
751  auto newA = rcp(new RebalanceAcFactory());
752  ParameterList rebAcParams;
753  rebAcParams.set("repartition: use subcommunicators", true);
754  newA->SetParameterList(rebAcParams);
755  newA->SetFactory("A", rapFact);
756  newA->SetFactory("Importer", repartFactory);
757 
758  coarseLevel.Request("P", newP.get());
759  coarseLevel.Request("Importer", repartFactory.get());
760  coarseLevel.Request("A", newA.get());
761  coarseLevel.Request("Coordinates", newP.get());
762  rapFact->Build(fineLevel,coarseLevel);
763  repartFactory->Build(coarseLevel);
764 
765  if (!precList22_.get<bool>("repartition: rebalance P and R", false))
766  Importer22_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
767  D0_Matrix_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
768  A22_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
769  Coords_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
770 
771  } else
772 #endif // HAVE_MPI
773  {
774  coarseLevel.Request("A", rapFact.get());
775  if (enable_reuse_) {
776  coarseLevel.Request("AP reuse data", rapFact.get());
777  coarseLevel.Request("RAP reuse data", rapFact.get());
778  }
779 
780  A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
781 
782  if (enable_reuse_) {
783  if (coarseLevel.IsAvailable("AP reuse data", rapFact.get()))
784  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
785  if (coarseLevel.IsAvailable("RAP reuse data", rapFact.get()))
786  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
787  }
788  }
789  } else {
790  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
791  if (Importer22_.is_null()) {
792  RCP<Matrix> temp;
793  temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*D0_Matrix_,false,temp,GetOStream(Runtime0),true,true);
794  if (!implicitTranspose_)
795  A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,false,*temp,false,A22_,GetOStream(Runtime0),true,true);
796  else
797  A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*temp,false,A22_,GetOStream(Runtime0),true,true);
798  } else {
799  // we replaced domain map and importer on D0, reverse that
800  RCP<const Import> D0importer = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
801  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(D0origDomainMap_, D0origImporter_);
802 
803  RCP<Matrix> temp, temp2;
804  temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*D0_Matrix_,false,temp,GetOStream(Runtime0),true,true);
805  if (!implicitTranspose_)
806  temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,false,*temp,false,temp2,GetOStream(Runtime0),true,true);
807  else
808  temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*temp,false,temp2,GetOStream(Runtime0),true,true);
809 
810  // and back again
811  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), D0importer);
812 
813  ParameterList XpetraList;
814  XpetraList.set("Restrict Communicator",true);
815  XpetraList.set("Timer Label","MueLu::RebalanceA22");
816  RCP<const Map> targetMap = Importer22_->getTargetMap();
817  A22_ = MatrixFactory::Build(temp2, *Importer22_, *Importer22_, targetMap, targetMap, rcp(&XpetraList,false));
818  }
819  }
820 
821  if (!implicitTranspose_ && !reuse) {
822  if (useKokkos_)
823  D0_T_Matrix_ = Utilities_kokkos::Transpose(*D0_Matrix_);
824  else
825  D0_T_Matrix_ = Utilities::Transpose(*D0_Matrix_);
826  }
827 
829  if (!A22_.is_null()) {
830  dump(*A22_, "A22.m");
831  A22_->setObjectLabel("RefMaxwell (2,2)");
832  int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
833  if (IsPrint(Statistics2)) {
834  RCP<ParameterList> params = rcp(new ParameterList());;
835  params->set("printLoadBalancingInfo", true);
836  params->set("printCommInfo", true);
837  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*A22_, "A22", params);
838  }
839 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
840  if (precList22_.isType<std::string>("Preconditioner Type")) {
841  // build a Stratimikos preconditioner
842  if (precList22_.get<std::string>("Preconditioner Type") == "MueLu") {
843  ParameterList& userParamList = precList22_.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
844  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
845  }
846  thyraPrecOp22_ = rcp(new XpetraThyraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A22_, rcp(&precList22_, false)));
847  } else
848 #endif
849  {
850  // build a MueLu hierarchy
851  if (!reuse) {
852  ParameterList& userParamList = precList22_.sublist("user data");
853  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
854  // If we detected no boundary conditions, the (2,2) problem is singular.
855  // Therefore, if we want to use a direct coarse solver, we need to fix up the nullspace.
856  std::string coarseType = "";
857  if (precList22_.isParameter("coarse: type")) {
858  coarseType = precList22_.get<std::string>("coarse: type");
859  // Transform string to "Abcde" notation
860  std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
861  std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
862  }
863  if (BCedges_ == 0 &&
864  (coarseType == "" ||
865  coarseType == "Klu" ||
866  coarseType == "Klu2") &&
867  (!precList22_.isSublist("coarse: params") ||
868  !precList22_.sublist("coarse: params").isParameter("fix nullspace")))
869  precList22_.sublist("coarse: params").set("fix nullspace",true);
870  Hierarchy22_ = MueLu::CreateXpetraPreconditioner(A22_, precList22_);
871  } else {
872  RCP<MueLu::Level> level0 = Hierarchy22_->GetLevel(0);
873  level0->Set("A", A22_);
874  Hierarchy22_->SetupRe();
875  }
876  }
877  SetProcRankVerbose(oldRank);
878  }
879  VerboseObject::SetDefaultVerbLevel(verbosityLevel);
880 
881  }
882 
883  if(!reuse && !allNodesBoundary_ && applyBCsTo22_) {
884  GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
885 
886  D0_Matrix_->resumeFill();
887  Scalar replaceWith;
888  if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
889  replaceWith= Teuchos::ScalarTraits<SC>::eps();
890  else
891  replaceWith = Teuchos::ScalarTraits<SC>::zero();
892  if (useKokkos_) {
893  Utilities_kokkos::ZeroDirichletRows(D0_Matrix_,BCrowsKokkos_,replaceWith);
894  } else {
895  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
896  }
897  D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
898  dump(*D0_Matrix_, "D0_nuked.m");
899  }
900 
901  setFineLevelSmoother();
902 
903  if (!reuse) {
904  if (!ImporterH_.is_null()) {
905  RCP<const Import> ImporterP11 = ImportFactory::Build(ImporterH_->getTargetMap(),P11_->getColMap());
906  rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix()->replaceDomainMapAndImporter(ImporterH_->getTargetMap(), ImporterP11);
907  }
908 
909  if (!Importer22_.is_null()) {
910  if (enable_reuse_) {
911  D0origDomainMap_ = D0_Matrix_->getDomainMap();
912  D0origImporter_ = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
913  }
914  RCP<const Import> ImporterD0 = ImportFactory::Build(Importer22_->getTargetMap(),D0_Matrix_->getColMap());
915  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), ImporterD0);
916  }
917 
918 #ifdef HAVE_MUELU_TPETRA
919  if ((!D0_T_Matrix_.is_null()) &&
920  (!R11_.is_null()) &&
921  (!rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
922  (!rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
923  (D0_T_Matrix_->getColMap()->lib() == Xpetra::UseTpetra) &&
924  (R11_->getColMap()->lib() == Xpetra::UseTpetra))
925  D0_T_R11_colMapsMatch_ = D0_T_Matrix_->getColMap()->isSameAs(*R11_->getColMap());
926  else
927 #endif
928  D0_T_R11_colMapsMatch_ = false;
929  if (D0_T_R11_colMapsMatch_)
930  GetOStream(Runtime0) << "RefMaxwell::compute(): D0_T and R11 have matching colMaps" << std::endl;
931 
932  // Allocate temporary MultiVectors for solve
933  allocateMemory(1);
934 
935  if (parameterList_.isSublist("matvec params"))
936  {
937  RCP<ParameterList> matvecParams = rcpFromRef(parameterList_.sublist("matvec params"));
938  Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*D0_Matrix_, matvecParams);
939  Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*P11_, matvecParams);
940  if (!D0_T_Matrix_.is_null()) Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*D0_T_Matrix_, matvecParams);
941  if (!R11_.is_null()) Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*R11_, matvecParams);
942  if (!ImporterH_.is_null()) ImporterH_->setDistributorParameters(matvecParams);
943  if (!Importer22_.is_null()) Importer22_->setDistributorParameters(matvecParams);
944  }
945  if (!ImporterH_.is_null() && parameterList_.isSublist("refmaxwell: ImporterH params")){
946  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: ImporterH params"));
947  ImporterH_->setDistributorParameters(importerParams);
948  }
949  if (!Importer22_.is_null() && parameterList_.isSublist("refmaxwell: Importer22 params")){
950  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: Importer22 params"));
951  Importer22_->setDistributorParameters(importerParams);
952  }
953  }
954 
955  describe(GetOStream(Runtime0));
956 
957 #ifdef HAVE_MUELU_CUDA
958  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStop();
959 #endif
960  }
961 
962 
963  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
965 
966  Level level;
967  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
968  level.SetFactoryManager(factoryHandler);
969  level.SetLevelID(0);
970  level.setObjectLabel("RefMaxwell (1,1)");
971  level.Set("A",SM_Matrix_);
972  level.setlib(SM_Matrix_->getDomainMap()->lib());
973  // For Hiptmair
974  level.Set("NodeMatrix", A22_);
975  level.Set("D0", D0_Matrix_);
976 
977  if (parameterList_.isType<std::string>("smoother: pre type") && parameterList_.isType<std::string>("smoother: post type")) {
978  std::string preSmootherType = parameterList_.get<std::string>("smoother: pre type");
979  std::string postSmootherType = parameterList_.get<std::string>("smoother: post type");
980 
981  ParameterList preSmootherList, postSmootherList;
982  if (parameterList_.isSublist("smoother: pre params"))
983  preSmootherList = parameterList_.sublist("smoother: pre params");
984  if (parameterList_.isSublist("smoother: post params"))
985  postSmootherList = parameterList_.sublist("smoother: post params");
986 
987  RCP<SmootherPrototype> preSmootherPrototype = rcp(new TrilinosSmoother(preSmootherType, preSmootherList));
988  RCP<SmootherPrototype> postSmootherPrototype = rcp(new TrilinosSmoother(postSmootherType, postSmootherList));
989  RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(preSmootherPrototype, postSmootherPrototype));
990 
991  level.Request("PreSmoother",smootherFact.get());
992  level.Request("PostSmoother",smootherFact.get());
993  if (enable_reuse_) {
994  ParameterList smootherFactoryParams;
995  smootherFactoryParams.set("keep smoother data", true);
996  smootherFact->SetParameterList(smootherFactoryParams);
997  level.Request("PreSmoother data", smootherFact.get());
998  level.Request("PostSmoother data", smootherFact.get());
999  if (!PreSmootherData_.is_null())
1000  level.Set("PreSmoother data", PreSmootherData_, smootherFact.get());
1001  if (!PostSmootherData_.is_null())
1002  level.Set("PostSmoother data", PostSmootherData_, smootherFact.get());
1003  }
1004  smootherFact->Build(level);
1005  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",smootherFact.get());
1006  PostSmoother_ = level.Get<RCP<SmootherBase> >("PostSmoother",smootherFact.get());
1007  if (enable_reuse_) {
1008  PreSmootherData_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data",smootherFact.get());
1009  PostSmootherData_ = level.Get<RCP<SmootherPrototype> >("PostSmoother data",smootherFact.get());
1010  }
1011  } else {
1012  std::string smootherType = parameterList_.get<std::string>("smoother: type", "CHEBYSHEV");
1013 
1014  RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(smootherType, smootherList_));
1015  RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(smootherPrototype));
1016  level.Request("PreSmoother",smootherFact.get());
1017  if (enable_reuse_) {
1018  ParameterList smootherFactoryParams;
1019  smootherFactoryParams.set("keep smoother data", true);
1020  smootherFact->SetParameterList(smootherFactoryParams);
1021  level.Request("PreSmoother data", smootherFact.get());
1022  if (!PreSmootherData_.is_null())
1023  level.Set("PreSmoother data", PreSmootherData_, smootherFact.get());
1024  }
1025  smootherFact->Build(level);
1026  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",smootherFact.get());
1027  PostSmoother_ = PreSmoother_;
1028  if (enable_reuse_)
1029  PreSmootherData_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data",smootherFact.get());
1030  }
1031 
1032  }
1033 
1034 
1035  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1037 
1038  RCP<Teuchos::TimeMonitor> tmAlloc = getTimer("MueLu RefMaxwell: Allocate MVs");
1039 
1040  if (!R11_.is_null())
1041  P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), numVectors);
1042  else
1043  P11res_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1044  P11res_->setObjectLabel("P11res");
1045  if (D0_T_R11_colMapsMatch_) {
1046  D0TR11Tmp_ = MultiVectorFactory::Build(R11_->getColMap(), numVectors);
1047  D0TR11Tmp_->setObjectLabel("D0TR11Tmp");
1048  }
1049  if (!ImporterH_.is_null()) {
1050  P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1051  P11resTmp_->setObjectLabel("P11resTmp");
1052  P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1053  } else
1054  P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1055  P11x_->setObjectLabel("P11x");
1056  if (!D0_T_Matrix_.is_null())
1057  D0res_ = MultiVectorFactory::Build(D0_T_Matrix_->getRangeMap(), numVectors);
1058  else
1059  D0res_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1060  D0res_->setObjectLabel("D0res");
1061  if (!Importer22_.is_null()) {
1062  D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1063  D0resTmp_->setObjectLabel("D0resTmp");
1064  D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1065  } else
1066  D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1067  D0x_->setObjectLabel("D0x");
1068  if (!AH_.is_null()) {
1069  if (!ImporterH_.is_null() && !implicitTranspose_)
1070  P11resSubComm_ = MultiVectorFactory::Build(P11resTmp_, Teuchos::View);
1071  else
1072  P11resSubComm_ = MultiVectorFactory::Build(P11res_, Teuchos::View);
1073  P11resSubComm_->replaceMap(AH_->getRangeMap());
1074  P11resSubComm_->setObjectLabel("P11resSubComm");
1075 
1076  P11xSubComm_ = MultiVectorFactory::Build(P11x_, Teuchos::View);
1077  P11xSubComm_->replaceMap(AH_->getDomainMap());
1078  P11xSubComm_->setObjectLabel("P11xSubComm");
1079  }
1080  if (!A22_.is_null()) {
1081  if (!Importer22_.is_null() && !implicitTranspose_)
1082  D0resSubComm_ = MultiVectorFactory::Build(D0resTmp_, Teuchos::View);
1083  else
1084  D0resSubComm_ = MultiVectorFactory::Build(D0res_, Teuchos::View);
1085  D0resSubComm_->replaceMap(A22_->getRangeMap());
1086  D0resSubComm_->setObjectLabel("D0resSubComm");
1087 
1088  D0xSubComm_ = MultiVectorFactory::Build(D0x_, Teuchos::View);
1089  D0xSubComm_->replaceMap(A22_->getDomainMap());
1090  D0xSubComm_->setObjectLabel("D0xSubComm");
1091  }
1092  residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), numVectors);
1093  residual_->setObjectLabel("residual");
1094  }
1095 
1096 
1097  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1098  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Matrix& A, std::string name) const {
1099  if (dump_matrices_) {
1100  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1101  Xpetra::IO<SC, LO, GO, NO>::Write(name, A);
1102  }
1103  }
1104 
1105 
1106  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1107  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const MultiVector& X, std::string name) const {
1108  if (dump_matrices_) {
1109  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1110  Xpetra::IO<SC, LO, GO, NO>::Write(name, X);
1111  }
1112  }
1113 
1114 
1115  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1117  if (dump_matrices_) {
1118  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1119  Xpetra::IO<coordinateType, LO, GO, NO>::Write(name, X);
1120  }
1121  }
1122 
1123 
1124  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1125  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Teuchos::ArrayRCP<bool>& v, std::string name) const {
1126  if (dump_matrices_) {
1127  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1128  std::ofstream out(name);
1129  for (size_t i = 0; i < Teuchos::as<size_t>(v.size()); i++)
1130  out << v[i] << "\n";
1131  }
1132  }
1133 
1134  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1135  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Kokkos::View<bool*, typename Node::device_type>& v, std::string name) const {
1136  if (dump_matrices_) {
1137  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1138  std::ofstream out(name);
1139  auto vH = Kokkos::create_mirror_view (v);
1140  Kokkos::deep_copy(vH , v);
1141  for (size_t i = 0; i < vH.size(); i++)
1142  out << vH[i] << "\n";
1143  }
1144  }
1145 
1146  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1147  Teuchos::RCP<Teuchos::TimeMonitor> RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getTimer(std::string name, RCP<const Teuchos::Comm<int> > comm) const {
1148  if (IsPrint(Timings)) {
1149  if (!syncTimers_)
1150  return Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name)));
1151  else {
1152  if (comm.is_null())
1153  return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), SM_Matrix_->getRowMap()->getComm().ptr()));
1154  else
1155  return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), comm.ptr()));
1156  }
1157  } else
1158  return Teuchos::null;
1159  }
1160 
1161 
1162  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1164  // The P11 matrix maps node based aggregrates { A_j } to edges { e_i }.
1165  //
1166  // The old implementation used
1167  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i) * P(n_l, A_j)
1168  // yet the paper gives
1169  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i)
1170  // where phi_k is the k-th nullspace vector.
1171  //
1172  // The graph of D0 contains the incidence from nodes to edges.
1173  // The nodal prolongator P maps aggregates to nodes.
1174 
1175  const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1176  const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1177  const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1178  size_t dim = Nullspace_->getNumVectors();
1179  size_t numLocalRows = SM_Matrix_->getLocalNumRows();
1180 
1181  RCP<Matrix> P_nodal;
1182  RCP<CrsMatrix> P_nodal_imported;
1183  RCP<MultiVector> Nullspace_nodal;
1184  if (skipFirstLevel_) {
1185  // build prolongator: algorithm 1 in the reference paper
1186  // First, build nodal unsmoothed prolongator using the matrix A_nodal
1187  bool read_P_from_file = parameterList_.get("refmaxwell: read_P_from_file",false);
1188  if (read_P_from_file) {
1189  // This permits to read in an ML prolongator, so that we get the same hierarchy.
1190  // (ML and MueLu typically produce different aggregates.)
1191  std::string P_filename = parameterList_.get("refmaxwell: P_filename",std::string("P.m"));
1192  std::string domainmap_filename = parameterList_.get("refmaxwell: P_domainmap_filename",std::string("domainmap_P.m"));
1193  std::string colmap_filename = parameterList_.get("refmaxwell: P_colmap_filename",std::string("colmap_P.m"));
1194  std::string coords_filename = parameterList_.get("refmaxwell: CoordsH",std::string("coordsH.m"));
1195  RCP<const Map> colmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(colmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1196  RCP<const Map> domainmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(domainmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1197  P_nodal = Xpetra::IO<SC, LO, GO, NO>::Read(P_filename, A_nodal_Matrix_->getDomainMap(), colmap, domainmap, A_nodal_Matrix_->getDomainMap());
1198  CoordsH_ = Xpetra::IO<typename RealValuedMultiVector::scalar_type, LO, GO, NO>::ReadMultiVector(coords_filename, domainmap);
1199  } else {
1200  Level fineLevel, coarseLevel;
1201  fineLevel.SetFactoryManager(null);
1202  coarseLevel.SetFactoryManager(null);
1203  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1204  fineLevel.SetLevelID(0);
1205  coarseLevel.SetLevelID(1);
1206  fineLevel.Set("A",A_nodal_Matrix_);
1207  fineLevel.Set("Coordinates",Coords_);
1208  fineLevel.Set("DofsPerNode",1);
1209  coarseLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1210  fineLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1211  coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1212  fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1213 
1214  LocalOrdinal NSdim = 1;
1215  RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
1216  nullSpace->putScalar(SC_ONE);
1217  fineLevel.Set("Nullspace",nullSpace);
1218 
1219  RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1220  if (useKokkos_) {
1221  amalgFact = rcp(new AmalgamationFactory_kokkos());
1222  dropFact = rcp(new CoalesceDropFactory_kokkos());
1223  UncoupledAggFact = rcp(new UncoupledAggregationFactory_kokkos());
1224  coarseMapFact = rcp(new CoarseMapFactory_kokkos());
1225  TentativePFact = rcp(new TentativePFactory_kokkos());
1226  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1227  SaPFact = rcp(new SaPFactory_kokkos());
1228  Tfact = rcp(new CoordinatesTransferFactory_kokkos());
1229  } else
1230  {
1231  amalgFact = rcp(new AmalgamationFactory());
1232  dropFact = rcp(new CoalesceDropFactory());
1233  UncoupledAggFact = rcp(new UncoupledAggregationFactory());
1234  coarseMapFact = rcp(new CoarseMapFactory());
1235  TentativePFact = rcp(new TentativePFactory());
1236  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1237  SaPFact = rcp(new SaPFactory());
1238  Tfact = rcp(new CoordinatesTransferFactory());
1239  }
1240  dropFact->SetFactory("UnAmalgamationInfo", amalgFact);
1241  double dropTol = parameterList_.get("aggregation: drop tol",0.0);
1242  std::string dropScheme = parameterList_.get("aggregation: drop scheme","classical");
1243  std::string distLaplAlgo = parameterList_.get("aggregation: distance laplacian algo","default");
1244  dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1245  dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme));
1246  if (!useKokkos_)
1247  dropFact->SetParameter("aggregation: distance laplacian algo",Teuchos::ParameterEntry(distLaplAlgo));
1248 
1249  UncoupledAggFact->SetFactory("Graph", dropFact);
1250  int minAggSize = parameterList_.get("aggregation: min agg size",2);
1251  UncoupledAggFact->SetParameter("aggregation: min agg size",Teuchos::ParameterEntry(minAggSize));
1252  int maxAggSize = parameterList_.get("aggregation: max agg size",-1);
1253  UncoupledAggFact->SetParameter("aggregation: max agg size",Teuchos::ParameterEntry(maxAggSize));
1254 
1255  coarseMapFact->SetFactory("Aggregates", UncoupledAggFact);
1256 
1257  TentativePFact->SetFactory("Aggregates", UncoupledAggFact);
1258  TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact);
1259  TentativePFact->SetFactory("CoarseMap", coarseMapFact);
1260 
1261  Tfact->SetFactory("Aggregates", UncoupledAggFact);
1262  Tfact->SetFactory("CoarseMap", coarseMapFact);
1263 
1264  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa") {
1265  SaPFact->SetFactory("P", TentativePFact);
1266  coarseLevel.Request("P", SaPFact.get());
1267  } else
1268  coarseLevel.Request("P",TentativePFact.get());
1269  coarseLevel.Request("Nullspace",TentativePFact.get());
1270  coarseLevel.Request("Coordinates",Tfact.get());
1271 
1272  RCP<AggregationExportFactory> aggExport;
1273  if (parameterList_.get("aggregation: export visualization data",false)) {
1274  aggExport = rcp(new AggregationExportFactory());
1275  ParameterList aggExportParams;
1276  aggExportParams.set("aggregation: output filename", "aggs.vtk");
1277  aggExportParams.set("aggregation: output file: agg style", "Jacks");
1278  aggExport->SetParameterList(aggExportParams);
1279 
1280  aggExport->SetFactory("Aggregates", UncoupledAggFact);
1281  aggExport->SetFactory("UnAmalgamationInfo", amalgFact);
1282  fineLevel.Request("Aggregates",UncoupledAggFact.get());
1283  fineLevel.Request("UnAmalgamationInfo",amalgFact.get());
1284  }
1285 
1286  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1287  coarseLevel.Get("P",P_nodal,SaPFact.get());
1288  else
1289  coarseLevel.Get("P",P_nodal,TentativePFact.get());
1290  coarseLevel.Get("Nullspace",Nullspace_nodal,TentativePFact.get());
1291  coarseLevel.Get("Coordinates",CoordsH_,Tfact.get());
1292 
1293 
1294  if (parameterList_.get("aggregation: export visualization data",false))
1295  aggExport->Build(fineLevel, coarseLevel);
1296  }
1297  dump(*P_nodal, "P_nodal.m");
1298  dump(*Nullspace_nodal, "Nullspace_nodal.m");
1299 
1300 
1301  RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
1302 
1303  // Import off-rank rows of P_nodal into P_nodal_imported
1304  int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1305  if (numProcs > 1) {
1306  RCP<CrsMatrixWrap> P_nodal_temp;
1307  RCP<const Map> targetMap = D0Crs->getColMap();
1308  P_nodal_temp = rcp(new CrsMatrixWrap(targetMap));
1309  RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1310  P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1311  P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
1312  rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
1313  P_nodal_imported = P_nodal_temp->getCrsMatrix();
1314  dump(*P_nodal_temp, "P_nodal_imported.m");
1315  } else
1316  P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1317 
1318  }
1319 
1320  if (useKokkos_) {
1321 
1322  using ATS = Kokkos::ArithTraits<SC>;
1323  using impl_Scalar = typename ATS::val_type;
1324  using impl_ATS = Kokkos::ArithTraits<impl_Scalar>;
1325  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1326 
1327  typedef typename Matrix::local_matrix_type KCRS;
1328  typedef typename KCRS::StaticCrsGraphType graph_t;
1329  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1330  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1331  typedef typename KCRS::values_type::non_const_type scalar_view_t;
1332 
1333  const impl_Scalar impl_SC_ZERO = impl_ATS::zero();
1334  const impl_Scalar impl_SC_ONE = impl_ATS::one();
1335  const impl_Scalar impl_half = impl_SC_ONE / (impl_SC_ONE + impl_SC_ONE);
1336 
1337 
1338  // Which algorithm should we use for the construction of the special prolongator?
1339  // Option "mat-mat":
1340  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1341  std::string defaultAlgo = "mat-mat";
1342  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1343 
1344  if (skipFirstLevel_) {
1345  // Get data out of P_nodal_imported and D0.
1346 
1347  if (algo == "mat-mat") {
1348  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1349  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1350 
1351 #ifdef HAVE_MUELU_DEBUG
1352  TEUCHOS_ASSERT(D0_P_nodal->getColMap()->isSameAs(*P_nodal_imported->getColMap()));
1353 #endif
1354 
1355  // Get data out of D0*P.
1356  auto localD0P = D0_P_nodal->getLocalMatrixDevice();
1357 
1358  // Create the matrix object
1359  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1360  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1361 
1362  size_t nnzEstimate = dim*localD0P.graph.entries.size();
1363  lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1364  lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1365  scalar_view_t P11vals("P11_vals",nnzEstimate);
1366 
1367  // adjust rowpointer
1368  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1369  KOKKOS_LAMBDA(const size_t i) {
1370  P11rowptr(i) = dim*localD0P.graph.row_map(i);
1371  });
1372 
1373  // adjust column indices
1374  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1375  KOKKOS_LAMBDA(const size_t jj) {
1376  for (size_t k = 0; k < dim; k++) {
1377  P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1378  P11vals(dim*jj+k) = impl_SC_ZERO;
1379  }
1380  });
1381 
1382  auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1383 
1384  // enter values
1385  if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1386  // The matrix D0 has too many entries per row.
1387  // Therefore we need to check whether its entries are actually non-zero.
1388  // This is the case for the matrices built by MiniEM.
1389  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1390 
1391  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1392 
1393  auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1394  auto localP = P_nodal_imported->getLocalMatrixDevice();
1395  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1396  KOKKOS_LAMBDA(const size_t i) {
1397  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1398  LO l = localD0.graph.entries(ll);
1399  impl_Scalar p = localD0.values(ll);
1400  if (impl_ATS::magnitude(p) < tol)
1401  continue;
1402  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1403  LO j = localP.graph.entries(jj);
1404  impl_Scalar v = localP.values(jj);
1405  for (size_t k = 0; k < dim; k++) {
1406  LO jNew = dim*j+k;
1407  impl_Scalar n = localNullspace(i,k);
1408  size_t m;
1409  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1410  if (P11colind(m) == jNew)
1411  break;
1412 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1413  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1414 #endif
1415  P11vals(m) += impl_half * v * n;
1416  }
1417  }
1418  }
1419  });
1420 
1421  } else {
1422  auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1423  auto localP = P_nodal_imported->getLocalMatrixDevice();
1424  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1425  KOKKOS_LAMBDA(const size_t i) {
1426  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1427  LO l = localD0.graph.entries(ll);
1428  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1429  LO j = localP.graph.entries(jj);
1430  impl_Scalar v = localP.values(jj);
1431  for (size_t k = 0; k < dim; k++) {
1432  LO jNew = dim*j+k;
1433  impl_Scalar n = localNullspace(i,k);
1434  size_t m;
1435  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1436  if (P11colind(m) == jNew)
1437  break;
1438 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1439  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1440 #endif
1441  P11vals(m) += impl_half * v * n;
1442  }
1443  }
1444  }
1445  });
1446  }
1447 
1448  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1449  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1450  P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1451  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1452 
1453  } else
1454  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1455 
1456  NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1457 
1458  auto localNullspace_nodal = Nullspace_nodal->getDeviceLocalView(Xpetra::Access::ReadOnly);
1459  auto localNullspaceH = NullspaceH_->getDeviceLocalView(Xpetra::Access::ReadWrite);
1460  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_nullspace", range_type(0,Nullspace_nodal->getLocalLength()),
1461  KOKKOS_LAMBDA(const size_t i) {
1462  impl_Scalar val = localNullspace_nodal(i,0);
1463  for (size_t j = 0; j < dim; j++)
1464  localNullspaceH(dim*i+j, j) = val;
1465  });
1466 
1467  } else { // !skipFirstLevel_
1468  // Get data out of P_nodal_imported and D0.
1469  auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1470 
1471  CoordsH_ = Coords_;
1472 
1473  if (algo == "mat-mat") {
1474 
1475  // Create the matrix object
1476  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1477  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1478 
1479  size_t nnzEstimate = dim*localD0.graph.entries.size();
1480  lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1481  lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1482  scalar_view_t P11vals("P11_vals",nnzEstimate);
1483 
1484  // adjust rowpointer
1485  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1486  KOKKOS_LAMBDA(const size_t i) {
1487  P11rowptr(i) = dim*localD0.graph.row_map(i);
1488  });
1489 
1490  // adjust column indices
1491  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0.graph.entries.size()),
1492  KOKKOS_LAMBDA(const size_t jj) {
1493  for (size_t k = 0; k < dim; k++) {
1494  P11colind(dim*jj+k) = dim*localD0.graph.entries(jj)+k;
1495  P11vals(dim*jj+k) = impl_SC_ZERO;
1496  }
1497  });
1498 
1499  auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1500 
1501  // enter values
1502  if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1503  // The matrix D0 has too many entries per row.
1504  // Therefore we need to check whether its entries are actually non-zero.
1505  // This is the case for the matrices built by MiniEM.
1506  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1507 
1508  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1509 
1510  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1511  KOKKOS_LAMBDA(const size_t i) {
1512  for (size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1513  LO j = localD0.graph.entries(jj);
1514  impl_Scalar p = localD0.values(jj);
1515  if (impl_ATS::magnitude(p) < tol)
1516  continue;
1517  for (size_t k = 0; k < dim; k++) {
1518  LO jNew = dim*j+k;
1519  impl_Scalar n = localNullspace(i,k);
1520  size_t m;
1521  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1522  if (P11colind(m) == jNew)
1523  break;
1524 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1525  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1526 #endif
1527  P11vals(m) += impl_half * n;
1528  }
1529  }
1530  });
1531 
1532  } else {
1533  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1534  KOKKOS_LAMBDA(const size_t i) {
1535  for (size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1536  LO j = localD0.graph.entries(jj);
1537  for (size_t k = 0; k < dim; k++) {
1538  LO jNew = dim*j+k;
1539  impl_Scalar n = localNullspace(i,k);
1540  size_t m;
1541  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1542  if (P11colind(m) == jNew)
1543  break;
1544 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1545  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1546 #endif
1547  P11vals(m) += impl_half * n;
1548  }
1549  }
1550  });
1551  }
1552 
1553  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1554  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1555  P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1556  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1557  } else
1558  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1559 
1560  }
1561  } else
1562  {
1563  // get nullspace vectors
1564  ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1565  ArrayRCP<ArrayView<const SC> > nullspace(dim);
1566  for(size_t i=0; i<dim; i++) {
1567  nullspaceRCP[i] = Nullspace_->getData(i);
1568  nullspace[i] = nullspaceRCP[i]();
1569  }
1570 
1571  // Get data out of P_nodal_imported and D0.
1572  ArrayRCP<size_t> P11rowptr_RCP;
1573  ArrayRCP<LO> P11colind_RCP;
1574  ArrayRCP<SC> P11vals_RCP;
1575 
1576 
1577  // Which algorithm should we use for the construction of the special prolongator?
1578  // Option "mat-mat":
1579  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1580  // Option "gustavson":
1581  // Loop over D0, P and nullspace and allocate directly. (Gustavson-like)
1582  // More efficient, but only available for serial node.
1583  std::string defaultAlgo = "mat-mat";
1584  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1585 
1586  if (skipFirstLevel_) {
1587 
1588  if (algo == "mat-mat") {
1589  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1590  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1591 
1592 
1593  ArrayRCP<const size_t> D0rowptr_RCP;
1594  ArrayRCP<const LO> D0colind_RCP;
1595  ArrayRCP<const SC> D0vals_RCP;
1596  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1597  // For efficiency
1598  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1599  // slower than Teuchos::ArrayView::operator[].
1600  ArrayView<const size_t> D0rowptr;
1601  ArrayView<const LO> D0colind;
1602  ArrayView<const SC> D0vals;
1603  D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1604 
1605  // Get data out of P_nodal_imported and D0.
1606  ArrayRCP<const size_t> Prowptr_RCP;
1607  ArrayRCP<const LO> Pcolind_RCP;
1608  ArrayRCP<const SC> Pvals_RCP;
1609  P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1610  ArrayView<const size_t> Prowptr;
1611  ArrayView<const LO> Pcolind;
1612  ArrayView<const SC> Pvals;
1613  Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1614 
1615  // Get data out of D0*P.
1616  ArrayRCP<const size_t> D0Prowptr_RCP;
1617  ArrayRCP<const LO> D0Pcolind_RCP;
1618  ArrayRCP<const SC> D0Pvals_RCP;
1619  rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1620 
1621  // For efficiency
1622  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1623  // slower than Teuchos::ArrayView::operator[].
1624  ArrayView<const size_t> D0Prowptr;
1625  ArrayView<const LO> D0Pcolind;
1626  D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1627 
1628  // Create the matrix object
1629  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1630  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1631  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1632  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1633  size_t nnzEstimate = dim*D0Prowptr[numLocalRows];
1634  P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1635 
1636  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1637  ArrayView<LO> P11colind = P11colind_RCP();
1638  ArrayView<SC> P11vals = P11vals_RCP();
1639 
1640  // adjust rowpointer
1641  for (size_t i = 0; i < numLocalRows+1; i++) {
1642  P11rowptr[i] = dim*D0Prowptr[i];
1643  }
1644 
1645  // adjust column indices
1646  for (size_t jj = 0; jj < (size_t) D0Prowptr[numLocalRows]; jj++)
1647  for (size_t k = 0; k < dim; k++) {
1648  P11colind[dim*jj+k] = dim*D0Pcolind[jj]+k;
1649  P11vals[dim*jj+k] = SC_ZERO;
1650  }
1651 
1652  RCP<const Map> P_nodal_imported_colmap = P_nodal_imported->getColMap();
1653  RCP<const Map> D0_P_nodal_colmap = D0_P_nodal->getColMap();
1654  // enter values
1655  if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1656  // The matrix D0 has too many entries per row.
1657  // Therefore we need to check whether its entries are actually non-zero.
1658  // This is the case for the matrices built by MiniEM.
1659  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1660 
1661  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1662  for (size_t i = 0; i < numLocalRows; i++) {
1663  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1664  LO l = D0colind[ll];
1665  SC p = D0vals[ll];
1666  if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1667  continue;
1668  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1669  LO j = Pcolind[jj];
1670  j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1671  SC v = Pvals[jj];
1672  for (size_t k = 0; k < dim; k++) {
1673  LO jNew = dim*j+k;
1674  SC n = nullspace[k][i];
1675  size_t m;
1676  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1677  if (P11colind[m] == jNew)
1678  break;
1679 #ifdef HAVE_MUELU_DEBUG
1680  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1681 #endif
1682  P11vals[m] += half * v * n;
1683  }
1684  }
1685  }
1686  }
1687  } else {
1688  // enter values
1689  for (size_t i = 0; i < numLocalRows; i++) {
1690  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1691  LO l = D0colind[ll];
1692  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1693  LO j = Pcolind[jj];
1694  j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1695  SC v = Pvals[jj];
1696  for (size_t k = 0; k < dim; k++) {
1697  LO jNew = dim*j+k;
1698  SC n = nullspace[k][i];
1699  size_t m;
1700  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1701  if (P11colind[m] == jNew)
1702  break;
1703 #ifdef HAVE_MUELU_DEBUG
1704  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1705 #endif
1706  P11vals[m] += half * v * n;
1707  }
1708  }
1709  }
1710  }
1711  }
1712 
1713  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1714  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1715 
1716  } else if (algo == "gustavson") {
1717  ArrayRCP<const size_t> D0rowptr_RCP;
1718  ArrayRCP<const LO> D0colind_RCP;
1719  ArrayRCP<const SC> D0vals_RCP;
1720  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1721  // For efficiency
1722  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1723  // slower than Teuchos::ArrayView::operator[].
1724  ArrayView<const size_t> D0rowptr;
1725  ArrayView<const LO> D0colind;
1726  ArrayView<const SC> D0vals;
1727  D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1728 
1729  // Get data out of P_nodal_imported and D0.
1730  ArrayRCP<const size_t> Prowptr_RCP;
1731  ArrayRCP<const LO> Pcolind_RCP;
1732  ArrayRCP<const SC> Pvals_RCP;
1733  P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1734  ArrayView<const size_t> Prowptr;
1735  ArrayView<const LO> Pcolind;
1736  ArrayView<const SC> Pvals;
1737  Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1738 
1739  LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1740  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1741  Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1742  // This is ad-hoc and should maybe be replaced with some better heuristics.
1743  size_t nnz_alloc = dim*D0vals_RCP.size();
1744 
1745  // Create the matrix object
1746  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1747  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1748  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1749  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1750  P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1751 
1752  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1753  ArrayView<LO> P11colind = P11colind_RCP();
1754  ArrayView<SC> P11vals = P11vals_RCP();
1755 
1756  size_t nnz;
1757  if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1758  // The matrix D0 has too many entries per row.
1759  // Therefore we need to check whether its entries are actually non-zero.
1760  // This is the case for the matrices built by MiniEM.
1761  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1762 
1763  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1764  nnz = 0;
1765  size_t nnz_old = 0;
1766  for (size_t i = 0; i < numLocalRows; i++) {
1767  P11rowptr[i] = nnz;
1768  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1769  LO l = D0colind[ll];
1770  SC p = D0vals[ll];
1771  if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1772  continue;
1773  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1774  LO j = Pcolind[jj];
1775  SC v = Pvals[jj];
1776  for (size_t k = 0; k < dim; k++) {
1777  LO jNew = dim*j+k;
1778  SC n = nullspace[k][i];
1779  // do we already have an entry for (i, jNew)?
1780  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1781  P11_status[jNew] = nnz;
1782  P11colind[nnz] = jNew;
1783  P11vals[nnz] = half * v * n;
1784  // or should it be
1785  // P11vals[nnz] = half * n;
1786  nnz++;
1787  } else {
1788  P11vals[P11_status[jNew]] += half * v * n;
1789  // or should it be
1790  // P11vals[P11_status[jNew]] += half * n;
1791  }
1792  }
1793  }
1794  }
1795  nnz_old = nnz;
1796  }
1797  P11rowptr[numLocalRows] = nnz;
1798  } else {
1799  nnz = 0;
1800  size_t nnz_old = 0;
1801  for (size_t i = 0; i < numLocalRows; i++) {
1802  P11rowptr[i] = nnz;
1803  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1804  LO l = D0colind[ll];
1805  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1806  LO j = Pcolind[jj];
1807  SC v = Pvals[jj];
1808  for (size_t k = 0; k < dim; k++) {
1809  LO jNew = dim*j+k;
1810  SC n = nullspace[k][i];
1811  // do we already have an entry for (i, jNew)?
1812  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1813  P11_status[jNew] = nnz;
1814  P11colind[nnz] = jNew;
1815  P11vals[nnz] = half * v * n;
1816  // or should it be
1817  // P11vals[nnz] = half * n;
1818  nnz++;
1819  } else {
1820  P11vals[P11_status[jNew]] += half * v * n;
1821  // or should it be
1822  // P11vals[P11_status[jNew]] += half * n;
1823  }
1824  }
1825  }
1826  }
1827  nnz_old = nnz;
1828  }
1829  P11rowptr[numLocalRows] = nnz;
1830  }
1831 
1832  if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1833  // Downward resize
1834  // - Cannot resize for Epetra, as it checks for same pointers
1835  // - Need to resize for Tpetra, as it checks ().size() == P11rowptr[numLocalRows]
1836  P11vals_RCP.resize(nnz);
1837  P11colind_RCP.resize(nnz);
1838  }
1839 
1840  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1841  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1842  } else
1843  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1844 
1845  NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1846 
1847  ArrayRCP<const Scalar> ns_rcp = Nullspace_nodal->getData(0);
1848  ArrayView<const Scalar> ns_view = ns_rcp();
1849  for (size_t i = 0; i < Nullspace_nodal->getLocalLength(); i++) {
1850  Scalar val = ns_view[i];
1851  for (size_t j = 0; j < dim; j++)
1852  NullspaceH_->replaceLocalValue(dim*i+j, j, val);
1853  }
1854 
1855 
1856  } else { // !skipFirstLevel_
1857  ArrayRCP<const size_t> D0rowptr_RCP;
1858  ArrayRCP<const LO> D0colind_RCP;
1859  ArrayRCP<const SC> D0vals_RCP;
1860  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1861  // For efficiency
1862  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1863  // slower than Teuchos::ArrayView::operator[].
1864  ArrayView<const size_t> D0rowptr;
1865  ArrayView<const LO> D0colind;
1866  ArrayView<const SC> D0vals;
1867  D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1868 
1869  CoordsH_ = Coords_;
1870  if (algo == "mat-mat") {
1871 
1872  // Create the matrix object
1873  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1874  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1875  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1876  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1877  size_t nnzEstimate = dim*D0rowptr[numLocalRows];
1878  P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1879 
1880  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1881  ArrayView<LO> P11colind = P11colind_RCP();
1882  ArrayView<SC> P11vals = P11vals_RCP();
1883 
1884  // adjust rowpointer
1885  for (size_t i = 0; i < numLocalRows+1; i++) {
1886  P11rowptr[i] = dim*D0rowptr[i];
1887  }
1888 
1889  // adjust column indices
1890  for (size_t jj = 0; jj < (size_t) D0rowptr[numLocalRows]; jj++)
1891  for (size_t k = 0; k < dim; k++) {
1892  P11colind[dim*jj+k] = dim*D0colind[jj]+k;
1893  P11vals[dim*jj+k] = SC_ZERO;
1894  }
1895 
1896  // enter values
1897  if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1898  // The matrix D0 has too many entries per row.
1899  // Therefore we need to check whether its entries are actually non-zero.
1900  // This is the case for the matrices built by MiniEM.
1901  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1902 
1903  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1904  for (size_t i = 0; i < numLocalRows; i++) {
1905  for (size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
1906  LO j = D0colind[jj];
1907  SC p = D0vals[jj];
1908  if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1909  continue;
1910  for (size_t k = 0; k < dim; k++) {
1911  LO jNew = dim*j+k;
1912  SC n = nullspace[k][i];
1913  size_t m;
1914  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1915  if (P11colind[m] == jNew)
1916  break;
1917 #ifdef HAVE_MUELU_DEBUG
1918  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1919 #endif
1920  P11vals[m] += half * n;
1921  }
1922  }
1923  }
1924  } else {
1925  // enter values
1926  for (size_t i = 0; i < numLocalRows; i++) {
1927  for (size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
1928  LO j = D0colind[jj];
1929 
1930  for (size_t k = 0; k < dim; k++) {
1931  LO jNew = dim*j+k;
1932  SC n = nullspace[k][i];
1933  size_t m;
1934  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1935  if (P11colind[m] == jNew)
1936  break;
1937 #ifdef HAVE_MUELU_DEBUG
1938  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1939 #endif
1940  P11vals[m] += half * n;
1941  }
1942  }
1943  }
1944  }
1945 
1946  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1947  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1948 
1949  } else
1950  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1951  }
1952  }
1953  }
1954 
1955  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1957  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build coarse (1,1) matrix");
1958 
1959  const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1960 
1961  // coarse matrix for P11* (M1 + D1* M2 D1) P11
1962  RCP<Matrix> temp;
1963  temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*P11_,false,temp,GetOStream(Runtime0),true,true);
1964  if (ImporterH_.is_null())
1965  AH_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*P11_,true,*temp,false,AH_,GetOStream(Runtime0),true,true);
1966  else {
1967 
1968  RCP<Matrix> temp2;
1969  temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*P11_,true,*temp,false,temp2,GetOStream(Runtime0),true,true);
1970 
1971  RCP<const Map> map = ImporterH_->getTargetMap()->removeEmptyProcesses();
1972  temp2->removeEmptyProcessesInPlace(map);
1973  if (!temp2.is_null() && temp2->getRowMap().is_null())
1974  temp2 = Teuchos::null;
1975  AH_ = temp2;
1976  }
1977 
1978  if (!disable_addon_) {
1979 
1980  RCP<Matrix> addon;
1981 
1982  if (!AH_.is_null() && Addon_Matrix_.is_null()) {
1983  // construct addon
1984  RCP<Teuchos::TimeMonitor> tmAddon = getTimer("MueLu RefMaxwell: Build coarse addon matrix");
1985  // catch a failure
1986  TEUCHOS_TEST_FOR_EXCEPTION(M0inv_Matrix_==Teuchos::null,std::invalid_argument,
1987  "MueLu::RefMaxwell::formCoarseMatrix(): Inverse of "
1988  "lumped mass matrix required for add-on (i.e. M0inv_Matrix is null)");
1989 
1990  // coarse matrix for add-on, i.e P11* (M1 D0 M0inv D0* M1) P11
1991  RCP<Matrix> Zaux, Z;
1992 
1993  // construct Zaux = M1 P11
1994  Zaux = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M1_Matrix_,false,*P11_,false,Zaux,GetOStream(Runtime0),true,true);
1995  // construct Z = D0* M1 P11 = D0* Zaux
1996  Z = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*Zaux,false,Z,GetOStream(Runtime0),true,true);
1997 
1998  // construct Z* M0inv Z
1999  if (M0inv_Matrix_->getGlobalMaxNumRowEntries()<=1) {
2000  // We assume that if M0inv has at most one entry per row then
2001  // these are all diagonal entries.
2002  RCP<Vector> diag = VectorFactory::Build(M0inv_Matrix_->getRowMap());
2003  M0inv_Matrix_->getLocalDiagCopy(*diag);
2004  {
2005  ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
2006  for (size_t j=0; j < diag->getMap()->getLocalNumElements(); j++) {
2007  diagVals[j] = Teuchos::ScalarTraits<Scalar>::squareroot(diagVals[j]);
2008  }
2009  }
2010  if (Z->getRowMap()->isSameAs(*(diag->getMap())))
2011  Z->leftScale(*diag);
2012  else {
2013  RCP<Import> importer = ImportFactory::Build(diag->getMap(),Z->getRowMap());
2014  RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
2015  diag2->doImport(*diag,*importer,Xpetra::INSERT);
2016  Z->leftScale(*diag2);
2017  }
2018  addon = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*Z,false,addon,GetOStream(Runtime0),true,true);
2019  } else if (parameterList_.get<bool>("rap: triple product", false) == false) {
2020  RCP<Matrix> C2;
2021  // construct C2 = M0inv Z
2022  C2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M0inv_Matrix_,false,*Z,false,C2,GetOStream(Runtime0),true,true);
2023  // construct Matrix2 = Z* M0inv Z = Z* C2
2024  addon = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*C2,false,addon,GetOStream(Runtime0),true,true);
2025  } else {
2026  addon = MatrixFactory::Build(Z->getDomainMap());
2027  // construct Matrix2 = Z* M0inv Z
2028  Xpetra::TripleMatrixMultiply<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
2029  MultiplyRAP(*Z, true, *M0inv_Matrix_, false, *Z, false, *addon, true, true);
2030  }
2031  // Should we keep the addon for next setup?
2032  if (enable_reuse_)
2033  Addon_Matrix_ = addon;
2034  } else
2035  addon = Addon_Matrix_;
2036 
2037  if (!AH_.is_null()) {
2038  // add matrices together
2039  RCP<Matrix> newAH;
2040  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*AH_,false,one,*addon,false,one,newAH,GetOStream(Runtime0));
2041  newAH->fillComplete();
2042  AH_ = newAH;
2043  }
2044  }
2045 
2046  if (!AH_.is_null() && !skipFirstLevel_) {
2047  ArrayRCP<bool> AHBCrows;
2048  AHBCrows.resize(AH_->getRowMap()->getLocalNumElements());
2049  size_t dim = Nullspace_->getNumVectors();
2050  if (useKokkos_)
2051  for (size_t i = 0; i < BCdomainKokkos_.size(); i++)
2052  for (size_t k = 0; k < dim; k++)
2053  AHBCrows[i*dim+k] = BCdomainKokkos_(i);
2054  else
2055  for (size_t i = 0; i < static_cast<size_t>(BCdomain_.size()); i++)
2056  for (size_t k = 0; k < dim; k++)
2057  AHBCrows[i*dim+k] = BCdomain_[i];
2058  magnitudeType rowSumTol = parameterList_.get("refmaxwell: row sum drop tol (1,1)",-1.0);
2059  if (rowSumTol > 0.)
2060  Utilities::ApplyRowSumCriterion(*AH_, rowSumTol, AHBCrows);
2061  if (applyBCsToH_)
2062  Utilities::ApplyOAZToMatrixRows(AH_, AHBCrows);
2063  }
2064 
2065  if (!AH_.is_null()) {
2066  // If we already applied BCs to A_nodal, we likely do not need
2067  // to fix up AH.
2068  // If we did not apply BCs to A_nodal, we now need to correct
2069  // the zero diagonals of AH, since we did nuke the nullspace.
2070 
2071  bool fixZeroDiagonal = !applyBCsToAnodal_;
2072  if (precList11_.isParameter("rap: fix zero diagonals"))
2073  fixZeroDiagonal = precList11_.get<bool>("rap: fix zero diagonals");
2074 
2075  if (fixZeroDiagonal) {
2076  magnitudeType threshold = 1e-16;
2077  Scalar replacement = 1.0;
2078  if (precList11_.isType<magnitudeType>("rap: fix zero diagonals threshold"))
2079  threshold = precList11_.get<magnitudeType>("rap: fix zero diagonals threshold");
2080  else if (precList11_.isType<double>("rap: fix zero diagonals threshold"))
2081  threshold = Teuchos::as<magnitudeType>(precList11_.get<double>("rap: fix zero diagonals threshold"));
2082  if (precList11_.isType<double>("rap: fix zero diagonals replacement"))
2083  replacement = Teuchos::as<Scalar>(precList11_.get<double>("rap: fix zero diagonals replacement"));
2084  Xpetra::MatrixUtils<SC,LO,GO,NO>::CheckRepairMainDiagonal(AH_, true, GetOStream(Warnings1), threshold, replacement);
2085  }
2086 
2087  // Set block size
2088  size_t dim = Nullspace_->getNumVectors();
2089  AH_->SetFixedBlockSize(dim);
2090  }
2091  }
2092 
2093 
2094  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2095  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::resetMatrix(RCP<Matrix> SM_Matrix_new, bool ComputePrec) {
2096  bool reuse = !SM_Matrix_.is_null();
2097  SM_Matrix_ = SM_Matrix_new;
2098  dump(*SM_Matrix_, "SM.m");
2099  if (ComputePrec)
2100  compute(reuse);
2101  }
2102 
2103 
2104  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2105  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverseAdditive(const MultiVector& RHS, MultiVector& X) const {
2106 
2107  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2108 
2109  { // compute residual
2110 
2111  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2112  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2113  }
2114 
2115  { // restrict residual to sub-hierarchies
2116 
2117  if (implicitTranspose_) {
2118  {
2119  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (implicit)");
2120  P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2121  }
2122  if (!allNodesBoundary_) {
2123  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (implicit)");
2124  D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2125  }
2126  } else {
2127 #ifdef MUELU_HAVE_TPETRA
2128  if (D0_T_R11_colMapsMatch_) {
2129  // Column maps of D0_T and R11 match, and we're running Tpetra
2130  {
2131  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restrictions import");
2132  D0TR11Tmp_->doImport(*residual_, *rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter(), Xpetra::INSERT);
2133  }
2134  if (!allNodesBoundary_) {
2135  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2136  rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*D0res_),Teuchos::NO_TRANS);
2137  }
2138  {
2139  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2140  rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*P11res_),Teuchos::NO_TRANS);
2141  }
2142  } else
2143 #endif
2144  {
2145  {
2146  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2147  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2148  }
2149  if (!allNodesBoundary_) {
2150  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2151  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2152  }
2153  }
2154  }
2155  }
2156 
2157  {
2158  RCP<Teuchos::TimeMonitor> tmSubSolves = getTimer("MueLu RefMaxwell: subsolves");
2159 
2160  // block diagonal preconditioner on 2x2 (V-cycle for diagonal blocks)
2161 
2162  if (!ImporterH_.is_null() && !implicitTranspose_) {
2163  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2164  P11resTmp_->beginImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2165  }
2166  if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_) {
2167  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2168  D0resTmp_->beginImport(*D0res_, *Importer22_, Xpetra::INSERT);
2169  }
2170 
2171  // iterate on coarse (1, 1) block
2172  if (!AH_.is_null()) {
2173  if (!ImporterH_.is_null() && !implicitTranspose_)
2174  P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2175 
2176  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2177 
2178 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2179  if (!thyraPrecOpH_.is_null()) {
2180  Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2181  thyraPrecOpH_->apply(*P11resSubComm_, *P11xSubComm_, Teuchos::NO_TRANS, one, zero);
2182  }
2183  else
2184 #endif
2185  HierarchyH_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersH_, true);
2186  }
2187 
2188  // iterate on (2, 2) block
2189  if (!A22_.is_null()) {
2190  if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2191  D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2192 
2193  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2194 
2195 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2196  if (!thyraPrecOp22_.is_null()) {
2197  Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2198  thyraPrecOp22_->apply(*D0resSubComm_, *D0xSubComm_, Teuchos::NO_TRANS, one, zero);
2199  }
2200  else
2201 #endif
2202  Hierarchy22_->Iterate(*D0resSubComm_, *D0xSubComm_, numIters22_, true);
2203  }
2204 
2205  if (AH_.is_null() && !ImporterH_.is_null() && !implicitTranspose_)
2206  P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2207  if (A22_.is_null() && !allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2208  D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2209  }
2210 
2211  if (fuseProlongationAndUpdate_) {
2212  { // prolongate (1,1) block
2213  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (fused)");
2214  P11_->apply(*P11x_,X,Teuchos::NO_TRANS,one,one);
2215  }
2216 
2217  if (!allNodesBoundary_) { // prolongate (2,2) block
2218  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (fused)");
2219  D0_Matrix_->apply(*D0x_,X,Teuchos::NO_TRANS,one,one);
2220  }
2221  } else {
2222  { // prolongate (1,1) block
2223  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (unfused)");
2224  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2225  }
2226 
2227  if (!allNodesBoundary_) { // prolongate (2,2) block
2228  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (unfused)");
2229  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS,one,one);
2230  }
2231 
2232  { // update current solution
2233  RCP<Teuchos::TimeMonitor> tmUpdate = getTimer("MueLu RefMaxwell: update");
2234  X.update(one, *residual_, one);
2235  }
2236  }
2237 
2238  }
2239 
2240 
2241  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2242  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solveH(const MultiVector& RHS, MultiVector& X) const {
2243 
2244  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2245 
2246  { // compute residual
2247  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2248  Utilities::Residual(*SM_Matrix_, X, RHS,*residual_);
2249  if (implicitTranspose_)
2250  P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2251  else
2252  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2253  }
2254 
2255  { // solve coarse (1,1) block
2256  if (!ImporterH_.is_null() && !implicitTranspose_) {
2257  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2258  P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2259  }
2260  if (!AH_.is_null()) {
2261  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2262  HierarchyH_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersH_, true);
2263  }
2264  }
2265 
2266  { // update current solution
2267  RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2268  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2269  X.update(one, *residual_, one);
2270  }
2271 
2272  }
2273 
2274 
2275  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2276  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solve22(const MultiVector& RHS, MultiVector& X) const {
2277 
2278  if (allNodesBoundary_)
2279  return;
2280 
2281  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2282 
2283  { // compute residual
2284  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2285  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2286  if (implicitTranspose_)
2287  D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2288  else
2289  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2290  }
2291 
2292  { // solve (2,2) block
2293  if (!Importer22_.is_null() && !implicitTranspose_) {
2294  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2295  D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
2296  }
2297  if (!A22_.is_null()) {
2298  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2299  Hierarchy22_->Iterate(*D0resSubComm_, *D0xSubComm_, numIters22_, true);
2300  }
2301  }
2302 
2303  { //update current solution
2304  RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2305  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
2306  X.update(one, *residual_, one);
2307  }
2308 
2309  }
2310 
2311 
2312  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2313  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply (const MultiVector& RHS, MultiVector& X,
2314  Teuchos::ETransp /* mode */,
2315  Scalar /* alpha */,
2316  Scalar /* beta */) const {
2317 
2318  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: solve");
2319 
2320  // make sure that we have enough temporary memory
2321  if (!allEdgesBoundary_ && X.getNumVectors() != P11res_->getNumVectors())
2322  allocateMemory(X.getNumVectors());
2323 
2324  { // apply pre-smoothing
2325 
2326  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2327 
2328  PreSmoother_->Apply(X, RHS, use_as_preconditioner_);
2329  }
2330 
2331  // do solve for the 2x2 block system
2332  if(mode_=="additive")
2333  applyInverseAdditive(RHS,X);
2334  else if(mode_=="121") {
2335  solveH(RHS,X);
2336  solve22(RHS,X);
2337  solveH(RHS,X);
2338  } else if(mode_=="212") {
2339  solve22(RHS,X);
2340  solveH(RHS,X);
2341  solve22(RHS,X);
2342  } else if(mode_=="1")
2343  solveH(RHS,X);
2344  else if(mode_=="2")
2345  solve22(RHS,X);
2346  else if(mode_=="7") {
2347  solveH(RHS,X);
2348  { // apply pre-smoothing
2349 
2350  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2351 
2352  PreSmoother_->Apply(X, RHS, false);
2353  }
2354  solve22(RHS,X);
2355  { // apply post-smoothing
2356 
2357  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2358 
2359  PostSmoother_->Apply(X, RHS, false);
2360  }
2361  solveH(RHS,X);
2362  } else if(mode_=="none") {
2363  // do nothing
2364  }
2365  else
2366  applyInverseAdditive(RHS,X);
2367 
2368  { // apply post-smoothing
2369 
2370  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2371 
2372  PostSmoother_->Apply(X, RHS, false);
2373  }
2374 
2375  }
2376 
2377 
2378  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2380  return false;
2381  }
2382 
2383 
2384  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2386  initialize(const Teuchos::RCP<Matrix> & D0_Matrix,
2387  const Teuchos::RCP<Matrix> & Ms_Matrix,
2388  const Teuchos::RCP<Matrix> & M0inv_Matrix,
2389  const Teuchos::RCP<Matrix> & M1_Matrix,
2390  const Teuchos::RCP<MultiVector> & Nullspace,
2391  const Teuchos::RCP<RealValuedMultiVector> & Coords,
2392  Teuchos::ParameterList& List)
2393  {
2394  // some pre-conditions
2395  TEUCHOS_ASSERT(D0_Matrix!=Teuchos::null);
2396  TEUCHOS_ASSERT(Ms_Matrix!=Teuchos::null);
2397  TEUCHOS_ASSERT(M1_Matrix!=Teuchos::null);
2398 
2399 #ifdef HAVE_MUELU_DEBUG
2400  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRangeMap()));
2401  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRowMap()));
2402  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2403 
2404  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRangeMap()));
2405  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRowMap()));
2406  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2407 
2408  TEUCHOS_ASSERT(D0_Matrix->getRangeMap()->isSameAs(*D0_Matrix->getRowMap()));
2409 
2410  if (!M0inv_Matrix) {
2411  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRowMap()));
2412  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRangeMap()));
2413  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getDomainMap()));
2414  }
2415 #endif
2416 
2417  HierarchyH_ = Teuchos::null;
2418  Hierarchy22_ = Teuchos::null;
2419  PreSmoother_ = Teuchos::null;
2420  PostSmoother_ = Teuchos::null;
2421  disable_addon_ = false;
2422  mode_ = "additive";
2423 
2424  // set parameters
2425  setParameters(List);
2426 
2427  if (D0_Matrix->getRowMap()->lib() == Xpetra::UseTpetra) {
2428  // We will remove boundary conditions from D0, and potentially change maps, so we copy the input.
2429  // Fortunately, D0 is quite sparse.
2430  // We cannot use the Tpetra copy constructor, since it does not copy the graph.
2431 
2432  RCP<Matrix> D0copy = MatrixFactory::Build(D0_Matrix->getRowMap(), D0_Matrix->getColMap(), 0);
2433  RCP<CrsMatrix> D0copyCrs = rcp_dynamic_cast<CrsMatrixWrap>(D0copy,true)->getCrsMatrix();
2434  ArrayRCP<const size_t> D0rowptr_RCP;
2435  ArrayRCP<const LO> D0colind_RCP;
2436  ArrayRCP<const SC> D0vals_RCP;
2437  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getAllValues(D0rowptr_RCP,
2438  D0colind_RCP,
2439  D0vals_RCP);
2440 
2441  ArrayRCP<size_t> D0copyrowptr_RCP;
2442  ArrayRCP<LO> D0copycolind_RCP;
2443  ArrayRCP<SC> D0copyvals_RCP;
2444  D0copyCrs->allocateAllValues(D0vals_RCP.size(),D0copyrowptr_RCP,D0copycolind_RCP,D0copyvals_RCP);
2445  D0copyrowptr_RCP.deepCopy(D0rowptr_RCP());
2446  D0copycolind_RCP.deepCopy(D0colind_RCP());
2447  D0copyvals_RCP.deepCopy(D0vals_RCP());
2448  D0copyCrs->setAllValues(D0copyrowptr_RCP,
2449  D0copycolind_RCP,
2450  D0copyvals_RCP);
2451  D0copyCrs->expertStaticFillComplete(D0_Matrix->getDomainMap(), D0_Matrix->getRangeMap(),
2452  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getImporter(),
2453  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getExporter());
2454  D0_Matrix_ = D0copy;
2455  } else
2456  D0_Matrix_ = MatrixFactory::BuildCopy(D0_Matrix);
2457 
2458 
2459  M0inv_Matrix_ = M0inv_Matrix;
2460  Ms_Matrix_ = Ms_Matrix;
2461  M1_Matrix_ = M1_Matrix;
2462  Coords_ = Coords;
2463  Nullspace_ = Nullspace;
2464 
2465  dump(*D0_Matrix_, "D0_clean.m");
2466  dump(*Ms_Matrix_, "Ms.m");
2467  dump(*M1_Matrix_, "M1.m");
2468  if (!M0inv_Matrix_.is_null()) dump(*M0inv_Matrix_, "M0inv.m");
2469  if (!Coords_.is_null()) dumpCoords(*Coords_, "coords.m");
2470 
2471  }
2472 
2473 
2474  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2476  describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel /* verbLevel */) const {
2477 
2478  std::ostringstream oss;
2479 
2480  RCP<const Teuchos::Comm<int> > comm = SM_Matrix_->getDomainMap()->getComm();
2481 
2482 #ifdef HAVE_MPI
2483  int root;
2484  if (!AH_.is_null())
2485  root = comm->getRank();
2486  else
2487  root = -1;
2488 
2489  int actualRoot;
2490  reduceAll(*comm, Teuchos::REDUCE_MAX, root, Teuchos::ptr(&actualRoot));
2491  root = actualRoot;
2492 #endif
2493 
2494 
2495  oss << "\n--------------------------------------------------------------------------------\n" <<
2496  "--- RefMaxwell Summary ---\n"
2497  "--------------------------------------------------------------------------------" << std::endl;
2498  oss << std::endl;
2499 
2500  GlobalOrdinal numRows;
2501  GlobalOrdinal nnz;
2502 
2503  SM_Matrix_->getRowMap()->getComm()->barrier();
2504 
2505  numRows = SM_Matrix_->getGlobalNumRows();
2506  nnz = SM_Matrix_->getGlobalNumEntries();
2507 
2508  Xpetra::global_size_t tt = numRows;
2509  int rowspacer = 3; while (tt != 0) { tt /= 10; rowspacer++; }
2510  tt = nnz;
2511  int nnzspacer = 2; while (tt != 0) { tt /= 10; nnzspacer++; }
2512 
2513  oss << "block " << std::setw(rowspacer) << " rows " << std::setw(nnzspacer) << " nnz " << std::setw(9) << " nnz/row" << std::endl;
2514  oss << "(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2515 
2516  if (!A22_.is_null()) {
2517  // ToDo: make sure that this is printed correctly
2518  numRows = A22_->getGlobalNumRows();
2519  nnz = A22_->getGlobalNumEntries();
2520 
2521  oss << "(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2522  }
2523 
2524  oss << std::endl;
2525 
2526  {
2527  if (PreSmoother_ != null && PreSmoother_ == PostSmoother_)
2528  oss << "Smoother both : " << PreSmoother_->description() << std::endl;
2529  else {
2530  oss << "Smoother pre : "
2531  << (PreSmoother_ != null ? PreSmoother_->description() : "no smoother") << std::endl;
2532  oss << "Smoother post : "
2533  << (PostSmoother_ != null ? PostSmoother_->description() : "no smoother") << std::endl;
2534  }
2535  }
2536  oss << std::endl;
2537 
2538  std::string outstr = oss.str();
2539 
2540 #ifdef HAVE_MPI
2541  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2542  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
2543 
2544  int strLength = outstr.size();
2545  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
2546  if (comm->getRank() != root)
2547  outstr.resize(strLength);
2548  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
2549 #endif
2550 
2551  out << outstr;
2552 
2553  if (!HierarchyH_.is_null())
2554  HierarchyH_->describe(out, GetVerbLevel());
2555 
2556  if (!Hierarchy22_.is_null())
2557  Hierarchy22_->describe(out, GetVerbLevel());
2558 
2559  if (IsPrint(Statistics2)) {
2560  // Print the grid of processors
2561  std::ostringstream oss2;
2562 
2563  oss2 << "Sub-solver distribution over ranks" << std::endl;
2564  oss2 << "( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
2565 
2566  int numProcs = comm->getSize();
2567 #ifdef HAVE_MPI
2568  RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2569  TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
2570  RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
2571 #endif
2572 
2573  char status = 0;
2574  if (!AH_.is_null())
2575  status += 1;
2576  if (!A22_.is_null())
2577  status += 2;
2578  std::vector<char> states(numProcs, 0);
2579 #ifdef HAVE_MPI
2580  MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
2581 #else
2582  states.push_back(status);
2583 #endif
2584 
2585  int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
2586  for (int proc = 0; proc < numProcs; proc += rowWidth) {
2587  for (int j = 0; j < rowWidth; j++)
2588  if (proc + j < numProcs)
2589  if (states[proc+j] == 0)
2590  oss2 << ".";
2591  else if (states[proc+j] == 1)
2592  oss2 << "1";
2593  else if (states[proc+j] == 2)
2594  oss2 << "2";
2595  else
2596  oss2 << "B";
2597  else
2598  oss2 << " ";
2599 
2600  oss2 << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
2601  }
2602  oss2 << std::endl;
2603  GetOStream(Statistics2) << oss2.str();
2604  }
2605 
2606 
2607  }
2608 
2609 
2610 } // namespace
2611 
2612 #define MUELU_REFMAXWELL_SHORT
2613 #endif //ifdef MUELU_REFMAXWELL_DEF_HPP
Important warning messages (one line)
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
This class specifies the default factory that should generate some data on a Level if the data does n...
#define MueLu_sumAll(rcpComm, in, out)
static void SetMueLuOFileStream(const std::string &filename)
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
MueLu::DefaultLocalOrdinal LocalOrdinal
Factory for determing the number of partitions for rebalancing.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level->Get< RCP<Matrix> >("A", factory) if factory == NULL => use default factory.
Factory for generating coarse level map. Used by TentativePFactory.
AmalgamationFactory_kokkos for subblocks of strided map based amalgamation data.
void solveH(const MultiVector &RHS, MultiVector &X) const
apply solve to 1-1 block only
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows)
#define MueLu_maxAll(rcpComm, in, out)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
static void removeExplicitZeros(Teuchos::ParameterList &parameterList, RCP< Matrix > &D0_Matrix, RCP< Matrix > &SM_Matrix, RCP< Matrix > &M1_Matrix, RCP< Matrix > &Ms_Matrix)
Remove explicit zeros.
static void detectBoundaryConditionsSM(RCP< Matrix > &SM_Matrix, RCP< Matrix > &D0_Matrix, magnitudeType rowSumTol, bool useKokkos_, Kokkos::View< bool *, typename Node::device_type > &BCrowsKokkos, Kokkos::View< bool *, typename Node::device_type > &BCcolsKokkos, Kokkos::View< bool *, typename Node::device_type > &BCdomainKokkos, int &BCedges, int &BCnodes, Teuchos::ArrayRCP< bool > &BCrows, Teuchos::ArrayRCP< bool > &BCcols, Teuchos::ArrayRCP< bool > &BCdomain, bool &allEdgesBoundary, bool &allNodesBoundary)
Detect Dirichlet boundary conditions.
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
static RCP< Matrix > Transpose(Matrix &Op, bool optimizeTranspose=false, const std::string &label=std::string())
Transpose a Xpetra::Matrix.
Class that encapsulates external library smoothers.
Factory for building permutation matrix that can be be used to shuffle data (matrices, vectors) among processes.
void setFineLevelSmoother()
Set the fine level smoother.
One-liner description of what is happening.
std::string tolower(const std::string &str)
Namespace for MueLu classes and methods.
void SetPreviousLevel(const RCP< Level > &previousLevel)
Definition: MueLu_Level.cpp:85
static RCP< MultiVector > RealValuedToScalarMultiVector(RCP< RealValuedMultiVector > X)
Interface to Zoltan library.This interface provides access to partitioning methods in Zoltan...
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
Definition: MueLu_Level.cpp:92
void setParameters(Teuchos::ParameterList &list)
Set parameters.
void applyInverseAdditive(const MultiVector &RHS, MultiVector &X) const
apply additive algorithm for 2x2 solve
void buildProlongator()
Setup the prolongator for the (1,1)-block.
MueLu::DefaultNode Node
#define MueLu_minAll(rcpComm, in, out)
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
Factory for building tentative prolongator.
MsgType toVerbLevel(const std::string &verbLevelStr)
void allocateMemory(int numVectors) const
allocate multivectors for solve
Print even more statistics.
void setlib(Xpetra::UnderlyingLib lib2)
Utility functions for Maxwell.
Additional warnings.
Print statistics that do not involve significant additional computation.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_HIGH) const
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &RHS)
void dumpCoords(const RealValuedMultiVector &X, std::string name) const
dump out real-valued multivector
MueLu::DefaultScalar Scalar
void resetMatrix(Teuchos::RCP< Matrix > SM_Matrix_new, bool ComputePrec=true)
Reset system matrix.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
static void setMatvecParams(Matrix &A, RCP< ParameterList > matvecParams)
Sets matvec params on a matrix.
Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
Factory to export aggregation info or visualize aggregates using VTK.
Interface to Zoltan2 library.This interface provides access to partitioning methods in Zoltan2...
static void SetMueLuOStream(const Teuchos::RCP< Teuchos::FancyOStream > &mueluOStream)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Xpetra::MultiVector< coordinateType, LO, GO, NO > RealValuedMultiVector
AmalgamationFactory for subblocks of strided map based amalgamation data.
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
void compute(bool reuse=false)
Setup the preconditioner.
Applies permutation to grid transfer operators.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
Factory for building Smoothed Aggregation prolongators.Input/output of SaPFactory_kokkos
Print all timing information.
void initialize(const Teuchos::RCP< Matrix > &D0_Matrix, const Teuchos::RCP< Matrix > &Ms_Matrix, const Teuchos::RCP< Matrix > &M0inv_Matrix, const Teuchos::RCP< Matrix > &M1_Matrix, const Teuchos::RCP< MultiVector > &Nullspace, const Teuchos::RCP< RealValuedMultiVector > &Coords, Teuchos::ParameterList &List)
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > PtAPWrapper(RCP< Matrix > &A, RCP< Matrix > &P, Teuchos::ParameterList &params, std::string &label)
Factory for creating a graph based on a given matrix.
Teuchos::RCP< Teuchos::TimeMonitor > getTimer(std::string name, RCP< const Teuchos::Comm< int > > comm=Teuchos::null) const
get a (synced) timer
void SetLevelID(int levelID)
Set level number.
Definition: MueLu_Level.cpp:78
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix...
Class for transferring coordinates from a finer level to a coarser one.
Factory for creating a graph based on a given matrix.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
void dump(const Matrix &A, std::string name) const
dump out matrix
Factory for building coarse matrices.
void formCoarseMatrix()
Compute P11^{T}*A*P11 efficiently.
Exception throws to report errors in the internal logical of the program.
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Description of what is happening (more verbose)
static void ZeroDirichletCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
Factory for building coarse matrices.
Teuchos::ScalarTraits< Scalar >::coordinateType coordinateType
static void ApplyOAZToMatrixRows(RCP< Matrix > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows)
Factory for building Smoothed Aggregation prolongators.
Factory for building uncoupled aggregates.
static std::string translate(Teuchos::ParameterList &paramList, const std::string &defaultVals="")
: Translate ML parameters to MueLu parameter XML string
void solve22(const MultiVector &RHS, MultiVector &X) const
apply solve to 2-2 block only
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::coordinateType, LocalOrdinal, GlobalOrdinal, Node > > X)
static void ZeroDirichletCols(RCP< Matrix > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletCols, SC replaceWith=Teuchos::ScalarTraits< SC >::zero())
static void ZeroDirichletRows(RCP< Matrix > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows, SC replaceWith=Teuchos::ScalarTraits< SC >::zero())
Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
static const RCP< const NoFactory > getRCP()
Static Get() functions.