MueLu  Version of the Day
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
47 #define MUELU_TENTATIVEPFACTORY_DEF_HPP
48 
49 #include <Xpetra_MapFactory.hpp>
50 #include <Xpetra_Map.hpp>
51 #include <Xpetra_CrsMatrix.hpp>
52 #include <Xpetra_CrsGraphFactory.hpp>
53 #include <Xpetra_Matrix.hpp>
54 #include <Xpetra_MatrixMatrix.hpp>
55 #include <Xpetra_MultiVector.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_VectorFactory.hpp>
58 #include <Xpetra_Import.hpp>
59 #include <Xpetra_ImportFactory.hpp>
60 #include <Xpetra_CrsMatrixWrap.hpp>
61 #include <Xpetra_StridedMap.hpp>
62 #include <Xpetra_StridedMapFactory.hpp>
63 #include <Xpetra_IO.hpp>
64 
65 #ifdef HAVE_MUELU_TPETRA
66 #include "Xpetra_TpetraBlockCrsMatrix.hpp"
67 //#include "Tpetra_BlockCrsMatrix.hpp"
68 #endif
69 
71 
72 #include "MueLu_Aggregates.hpp"
73 #include "MueLu_AmalgamationFactory.hpp"
74 #include "MueLu_AmalgamationInfo.hpp"
75 #include "MueLu_CoarseMapFactory.hpp"
76 #include "MueLu_MasterList.hpp"
77 #include "MueLu_Monitor.hpp"
78 #include "MueLu_NullspaceFactory.hpp"
79 #include "MueLu_PerfUtils.hpp"
80 #include "MueLu_Utilities.hpp"
81 
82 
83 
84 
85 namespace MueLu {
86 
87  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
89  RCP<ParameterList> validParamList = rcp(new ParameterList());
90 
91 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
92  SET_VALID_ENTRY("tentative: calculate qr");
93  SET_VALID_ENTRY("tentative: build coarse coordinates");
94  SET_VALID_ENTRY("tentative: constant column sums");
95 #undef SET_VALID_ENTRY
96  validParamList->set< std::string >("Nullspace name", "Nullspace", "Name for the input nullspace");
97 
98  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
99  validParamList->set< RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
100  validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
101  validParamList->set< RCP<const FactoryBase> >("Scaled Nullspace", Teuchos::null, "Generating factory of the scaled nullspace");
102  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
103  validParamList->set< RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
104  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory of the coordinates");
105  validParamList->set< RCP<const FactoryBase> >("Node Comm", Teuchos::null, "Generating factory of the node level communicator");
106 
107  // Make sure we don't recursively validate options for the matrixmatrix kernels
108  ParameterList norecurse;
109  norecurse.disableRecursiveValidation();
110  validParamList->set<ParameterList> ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
111 
112  return validParamList;
113  }
114 
115  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
117 
118  const ParameterList& pL = GetParameterList();
119  // NOTE: This guy can only either be 'Nullspace' or 'Scaled Nullspace' or else the validator above will cause issues
120  std::string nspName = "Nullspace";
121  if(pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
122 
123  Input(fineLevel, "A");
124  Input(fineLevel, "Aggregates");
125  Input(fineLevel, nspName);
126  Input(fineLevel, "UnAmalgamationInfo");
127  Input(fineLevel, "CoarseMap");
128  if( fineLevel.GetLevelID() == 0 &&
129  fineLevel.IsAvailable("Coordinates", NoFactory::get()) && // we have coordinates (provided by user app)
130  pL.get<bool>("tentative: build coarse coordinates") ) { // and we want coordinates on other levels
131  bTransferCoordinates_ = true; // then set the transfer coordinates flag to true
132  Input(fineLevel, "Coordinates");
133  } else if (bTransferCoordinates_) {
134  Input(fineLevel, "Coordinates");
135  }
136  }
137 
138  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
140  return BuildP(fineLevel, coarseLevel);
141  }
142 
143  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
145  FactoryMonitor m(*this, "Build", coarseLevel);
146  typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
147  typedef Xpetra::MultiVector<coordinate_type,LO,GO,NO> RealValuedMultiVector;
148  typedef Xpetra::MultiVectorFactory<coordinate_type,LO,GO,NO> RealValuedMultiVectorFactory;
149 
150  const ParameterList& pL = GetParameterList();
151  std::string nspName = "Nullspace";
152  if(pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
153 
154 
155  RCP<Matrix> Ptentative;
156  RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
157  RCP<Aggregates> aggregates = Get< RCP<Aggregates> > (fineLevel, "Aggregates");
158  // No coarse DoFs so we need to bail by setting Ptentattive to null and returning
159  // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
160  if ( aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
161  Ptentative = Teuchos::null;
162  Set(coarseLevel, "P", Ptentative);
163  return;
164  }
165 
166  RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel, "UnAmalgamationInfo");
167  RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, nspName);
168  RCP<const Map> coarseMap = Get< RCP<const Map> > (fineLevel, "CoarseMap");
169  RCP<RealValuedMultiVector> fineCoords;
170  if(bTransferCoordinates_) {
171  fineCoords = Get< RCP<RealValuedMultiVector> >(fineLevel, "Coordinates");
172  }
173 
174  // FIXME: We should remove the NodeComm on levels past the threshold
175  if(fineLevel.IsAvailable("Node Comm")) {
176  RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel,"Node Comm");
177  Set<RCP<const Teuchos::Comm<int> > >(coarseLevel, "Node Comm", nodeComm);
178  }
179 
180  // NOTE: We check DomainMap here rather than RowMap because those are different for BlockCrs matrices
181  TEUCHOS_TEST_FOR_EXCEPTION( A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
182  Exceptions::RuntimeError,"MueLu::TentativePFactory::MakeTentative: Size mismatch between A and Nullspace");
183 
184  RCP<MultiVector> coarseNullspace;
185  RCP<RealValuedMultiVector> coarseCoords;
186 
187  if(bTransferCoordinates_) {
188  //*** Create the coarse coordinates ***
189  // First create the coarse map and coarse multivector
190  ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
191  LO blkSize = 1;
192  if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
193  blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
194  }
195  GO indexBase = coarseMap->getIndexBase();
196  LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
197  Array<GO> nodeList(numCoarseNodes);
198  const int numDimensions = fineCoords->getNumVectors();
199 
200  for (LO i = 0; i < numCoarseNodes; i++) {
201  nodeList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase;
202  }
203  RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
204  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
205  nodeList,
206  indexBase,
207  fineCoords->getMap()->getComm());
208  coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
209 
210  // Create overlapped fine coordinates to reduce global communication
211  RCP<RealValuedMultiVector> ghostedCoords;
212  if (aggregates->AggregatesCrossProcessors()) {
213  RCP<const Map> aggMap = aggregates->GetMap();
214  RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
215 
216  ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
217  ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
218  } else {
219  ghostedCoords = fineCoords;
220  }
221 
222  // Get some info about aggregates
223  int myPID = coarseCoordsMap->getComm()->getRank();
224  LO numAggs = aggregates->GetNumAggregates();
225  ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
226  const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
227  const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
228 
229  // Fill in coarse coordinates
230  for (int dim = 0; dim < numDimensions; ++dim) {
231  ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
232  ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
233 
234  for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
235  if (procWinner[lnode] == myPID &&
236  lnode < fineCoordsData.size() &&
237  vertex2AggID[lnode] < coarseCoordsData.size() &&
238  Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) == false) {
239  coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
240  }
241  }
242  for (LO agg = 0; agg < numAggs; agg++) {
243  coarseCoordsData[agg] /= aggSizes[agg];
244  }
245  }
246  }
247 
248  if (!aggregates->AggregatesCrossProcessors()) {
249  if(Xpetra::Helpers<SC,LO,GO,NO>::isTpetraBlockCrs(A)) {
250  BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,coarseLevel.GetLevelID());
251  }
252  else {
253  BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,coarseLevel.GetLevelID());
254  }
255  }
256  else
257  BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
258 
259  // If available, use striding information of fine level matrix A for range
260  // map and coarseMap as domain map; otherwise use plain range map of
261  // Ptent = plain range map of A for range map and coarseMap as domain map.
262  // NOTE:
263  // The latter is not really safe, since there is no striding information
264  // for the range map. This is not really a problem, since striding
265  // information is always available on the intermedium levels and the
266  // coarsest levels.
267  if (A->IsView("stridedMaps") == true)
268  Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
269  else
270  Ptentative->CreateView("stridedMaps", Ptentative->getRangeMap(), coarseMap);
271 
272  if(bTransferCoordinates_) {
273  Set(coarseLevel, "Coordinates", coarseCoords);
274  }
275  Set(coarseLevel, "Nullspace", coarseNullspace);
276  Set(coarseLevel, "P", Ptentative);
277 
278  if (IsPrint(Statistics2)) {
279  RCP<ParameterList> params = rcp(new ParameterList());
280  params->set("printLoadBalancingInfo", true);
281  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
282  }
283  }
284 
285  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
287  BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
288  RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
289 #ifdef HAVE_MUELU_TPETRA
290 
291  /* This routine generates a BlockCrs P for a BlockCrs A. There are a few assumptions here, which meet the use cases we care about, but could
292  be generalized later, if we ever need to do so:
293  1) Null space dimension === block size of matrix: So no elasticity right now
294  2) QR is not supported: Under assumption #1, this shouldn't cause problems.
295  3) Maps are "good": Aka the first chunk of the ColMap is the RowMap.
296 
297  These assumptions keep our code way simpler and still support the use cases we actually care about.
298  */
299 
300  RCP<const Map> rowMap = A->getRowMap();
301  RCP<const Map> rangeMap = A->getRangeMap();
302  RCP<const Map> colMap = A->getColMap();
303  // const size_t numFinePointRows = rangeMap->getLocalNumElements();
304  const size_t numFineBlockRows = rowMap->getLocalNumElements();
305 
306  typedef Teuchos::ScalarTraits<SC> STS;
307  // typedef typename STS::magnitudeType Magnitude;
308  const SC zero = STS::zero();
309  const SC one = STS::one();
310  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
311 
312  const GO numAggs = aggregates->GetNumAggregates();
313  const size_t NSDim = fineNullspace->getNumVectors();
314  ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
315 
316  // Need to generate the coarse block map
317  // NOTE: We assume NSDim == block size here
318  // NOTE: We also assume that coarseMap has contiguous GIDs
319  //const size_t numCoarsePointRows = coarsePointMap->getLocalNumElements();
320  const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
321  RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
322  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
323  numCoarseBlockRows,
324  coarsePointMap->getIndexBase(),
325  coarsePointMap->getComm());
326  // Sanity checking
327  const ParameterList& pL = GetParameterList();
328  const bool &doQRStep = pL.get<bool>("tentative: calculate qr");
329  const bool &constantColSums = pL.get<bool>("tentative: constant column sums");
330 
331  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums,Exceptions::RuntimeError,
332  "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
333 
334  // The aggregates use the amalgamated column map, which in this case is what we want
335 
336  // Aggregates map is based on the amalgamated column map
337  // We can skip global-to-local conversion if LIDs in row map are
338  // same as LIDs in column map
339  bool goodMap = MueLu::Utilities<SC,LO,GO,NO>::MapsAreNested(*rowMap, *colMap);
340 
341  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
342  // aggStart is a pointer into aggToRowMapLO
343  // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
344  // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
345  ArrayRCP<LO> aggStart;
346  ArrayRCP<LO> aggToRowMapLO;
347  ArrayRCP<GO> aggToRowMapGO;
348  if (goodMap) {
349  amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
350  GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
351  } else {
352  throw std::runtime_error("TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
353  }
354 
355  coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
356 
357  // Pull out the nullspace vectors so that we can have random access.
358  ArrayRCP<ArrayRCP<const SC> > fineNS (NSDim);
359  ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
360  for (size_t i = 0; i < NSDim; i++) {
361  fineNS[i] = fineNullspace->getData(i);
362  if (coarsePointMap->getLocalNumElements() > 0)
363  coarseNS[i] = coarseNullspace->getDataNonConst(i);
364  }
365 
366  // BlockCrs requires that we build the (block) graph first, so let's do that...
367  // NOTE: Because we're assuming that the NSDim == BlockSize, we only have one
368  // block non-zero per row in the matrix;
369  RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap,coarseBlockMap,0);
370  ArrayRCP<size_t> iaPtent;
371  ArrayRCP<LO> jaPtent;
372  BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
373  ArrayView<size_t> ia = iaPtent();
374  ArrayView<LO> ja = jaPtent();
375 
376  for (size_t i = 0; i < numFineBlockRows; i++) {
377  ia[i] = i;
378  ja[i] = INVALID;
379  }
380  ia[numCoarseBlockRows] = numCoarseBlockRows;
381 
382 
383  for (GO agg = 0; agg < numAggs; agg++) {
384  LO aggSize = aggStart[agg+1] - aggStart[agg];
385  Xpetra::global_size_t offset = agg;
386 
387  for (LO j = 0; j < aggSize; j++) {
388  // FIXME: Allow for bad maps
389  const LO localRow = aggToRowMapLO[aggStart[agg]+j];
390  const size_t rowStart = ia[localRow];
391  ja[rowStart] = offset;
392  }
393  }
394 
395  // Compress storage (remove all INVALID, which happen when we skip zeros)
396  // We do that in-place
397  size_t ia_tmp = 0, nnz = 0;
398  for (size_t i = 0; i < numFineBlockRows; i++) {
399  for (size_t j = ia_tmp; j < ia[i+1]; j++)
400  if (ja[j] != INVALID) {
401  ja [nnz] = ja [j];
402  nnz++;
403  }
404  ia_tmp = ia[i+1];
405  ia[i+1] = nnz;
406  }
407 
408  if (rowMap->lib() == Xpetra::UseTpetra) {
409  // - Cannot resize for Epetra, as it checks for same pointers
410  // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
411  // NOTE: these invalidate ja and val views
412  jaPtent .resize(nnz);
413  }
414 
415  GetOStream(Runtime1) << "TentativePFactory : generating block graph" << std::endl;
416  BlockGraph->setAllIndices(iaPtent, jaPtent);
417 
418  // Managing labels & constants for ESFC
419  {
420  RCP<ParameterList> FCparams;
421  if(pL.isSublist("matrixmatrix: kernel params"))
422  FCparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
423  else
424  FCparams= rcp(new ParameterList);
425  // By default, we don't need global constants for TentativeP, but we do want it for the graph
426  // if we're printing statistics, so let's leave it on for now.
427  FCparams->set("compute global constants",FCparams->get("compute global constants",true));
428  std::string levelIDs = toString(levelID);
429  FCparams->set("Timer Label",std::string("MueLu::TentativeP-")+levelIDs);
430  RCP<const Export> dummy_e;
431  RCP<const Import> dummy_i;
432  BlockGraph->expertStaticFillComplete(coarseBlockMap,rowMap,dummy_i,dummy_e,FCparams);
433  }
434 
435  // Now let's make a BlockCrs Matrix
436  // NOTE: Assumes block size== NSDim
437  RCP<Xpetra::CrsMatrix<SC,LO,GO,NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC,LO,GO,NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap,NSDim);
438  RCP<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> >(P_xpetra);
439  if(P_tpetra.is_null()) throw std::runtime_error("BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
440  RCP<CrsMatrixWrap> P_wrap = rcp(new CrsMatrixWrap(P_xpetra));
441 
443  // "no-QR" option //
445  // Local Q factor is just the fine nullspace support over the current aggregate.
446  // Local R factor is the identity.
447  // NOTE: We're not going to do a QR here as we're assuming that blocksize == NSDim
448  // NOTE: "goodMap" case only
449  Teuchos::Array<Scalar> block(NSDim*NSDim, zero);
450  Teuchos::Array<LO> bcol(1);
451 
452  GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
453  for (LO agg = 0; agg < numAggs; agg++) {
454  bcol[0] = agg;
455  const LO aggSize = aggStart[agg+1] - aggStart[agg];
456  Xpetra::global_size_t offset = agg*NSDim;
457 
458  // Process each row in the local Q factor
459  // NOTE: Blocks are in row-major order
460  for (LO j = 0; j < aggSize; j++) {
461  const LO localBlockRow = aggToRowMapLO[aggStart[agg]+j];
462 
463  for (size_t r = 0; r < NSDim; r++) {
464  LO localPointRow = localBlockRow*NSDim + r;
465  for (size_t c = 0; c < NSDim; c++)
466  block[r*NSDim+c] = fineNS[c][localPointRow];
467  }
468  // NOTE: Assumes columns==aggs and are ordered sequentially
469  P_tpetra->replaceLocalValues(localBlockRow,bcol(),block());
470 
471  }//end aggSize
472 
473  for (size_t j = 0; j < NSDim; j++)
474  coarseNS[j][offset+j] = one;
475 
476  } //for (GO agg = 0; agg < numAggs; agg++)
477 
478  Ptentative = P_wrap;
479 #else
480  throw std::runtime_error("TentativePFactory::BuildPuncoupledBlockCrs: Requires Tpetra");
481 #endif
482  }
483 
484  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
486  BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
487  RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
488  typedef Teuchos::ScalarTraits<SC> STS;
489  typedef typename STS::magnitudeType Magnitude;
490  const SC zero = STS::zero();
491  const SC one = STS::one();
492 
493  // number of aggregates
494  GO numAggs = aggregates->GetNumAggregates();
495 
496  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
497  // aggStart is a pointer into aggToRowMap
498  // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
499  // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
500  ArrayRCP<LO> aggStart;
501  ArrayRCP< GO > aggToRowMap;
502  amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
503 
504  // find size of the largest aggregate
505  LO maxAggSize=0;
506  for (GO i=0; i<numAggs; ++i) {
507  LO sizeOfThisAgg = aggStart[i+1] - aggStart[i];
508  if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
509  }
510 
511  // dimension of fine level nullspace
512  const size_t NSDim = fineNullspace->getNumVectors();
513 
514  // index base for coarse Dof map (usually 0)
515  GO indexBase=A->getRowMap()->getIndexBase();
516 
517  const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
518  const RCP<const Map> uniqueMap = A->getDomainMap();
519  RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
520  RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap,NSDim);
521  fineNullspaceWithOverlap->doImport(*fineNullspace,*importer,Xpetra::INSERT);
522 
523  // Pull out the nullspace vectors so that we can have random access.
524  ArrayRCP< ArrayRCP<const SC> > fineNS(NSDim);
525  for (size_t i=0; i<NSDim; ++i)
526  fineNS[i] = fineNullspaceWithOverlap->getData(i);
527 
528  //Allocate storage for the coarse nullspace.
529  coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
530 
531  ArrayRCP< ArrayRCP<SC> > coarseNS(NSDim);
532  for (size_t i=0; i<NSDim; ++i)
533  if (coarseMap->getLocalNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
534 
535  //This makes the rowmap of Ptent the same as that of A->
536  //This requires moving some parts of some local Q's to other processors
537  //because aggregates can span processors.
538  RCP<const Map > rowMapForPtent = A->getRowMap();
539  const Map& rowMapForPtentRef = *rowMapForPtent;
540 
541  // Set up storage for the rows of the local Qs that belong to other processors.
542  // FIXME This is inefficient and could be done within the main loop below with std::vector's.
543  RCP<const Map> colMap = A->getColMap();
544 
545  RCP<const Map > ghostQMap;
546  RCP<MultiVector> ghostQvalues;
547  Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > ghostQcolumns;
548  RCP<Xpetra::Vector<GO,LO,GO,Node> > ghostQrowNums;
549  ArrayRCP< ArrayRCP<SC> > ghostQvals;
550  ArrayRCP< ArrayRCP<GO> > ghostQcols;
551  ArrayRCP< GO > ghostQrows;
552 
553  Array<GO> ghostGIDs;
554  for (LO j=0; j<numAggs; ++j) {
555  for (LO k=aggStart[j]; k<aggStart[j+1]; ++k) {
556  if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
557  ghostGIDs.push_back(aggToRowMap[k]);
558  }
559  }
560  }
561  ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
562  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
563  ghostGIDs,
564  indexBase, A->getRowMap()->getComm()); //JG:Xpetra::global_size_t>?
565  //Vector to hold bits of Q that go to other processors.
566  ghostQvalues = MultiVectorFactory::Build(ghostQMap,NSDim);
567  //Note that Epetra does not support MultiVectors templated on Scalar != double.
568  //So to work around this, we allocate an array of Vectors. This shouldn't be too
569  //expensive, as the number of Vectors is NSDim.
570  ghostQcolumns.resize(NSDim);
571  for (size_t i=0; i<NSDim; ++i)
572  ghostQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
573  ghostQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
574  if (ghostQvalues->getLocalLength() > 0) {
575  ghostQvals.resize(NSDim);
576  ghostQcols.resize(NSDim);
577  for (size_t i=0; i<NSDim; ++i) {
578  ghostQvals[i] = ghostQvalues->getDataNonConst(i);
579  ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
580  }
581  ghostQrows = ghostQrowNums->getDataNonConst(0);
582  }
583 
584  //importer to handle moving Q
585  importer = ImportFactory::Build(ghostQMap, A->getRowMap());
586 
587  // Dense QR solver
588  Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
589 
590  //Allocate temporary storage for the tentative prolongator.
591  Array<GO> globalColPtr(maxAggSize*NSDim,0);
592  Array<LO> localColPtr(maxAggSize*NSDim,0);
593  Array<SC> valPtr(maxAggSize*NSDim,0.);
594 
595  //Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
596  const Map& coarseMapRef = *coarseMap;
597 
598  // For the 3-arrays constructor
599  ArrayRCP<size_t> ptent_rowptr;
600  ArrayRCP<LO> ptent_colind;
601  ArrayRCP<Scalar> ptent_values;
602 
603  // Because ArrayRCPs are slow...
604  ArrayView<size_t> rowptr_v;
605  ArrayView<LO> colind_v;
606  ArrayView<Scalar> values_v;
607 
608  // For temporary usage
609  Array<size_t> rowptr_temp;
610  Array<LO> colind_temp;
611  Array<Scalar> values_temp;
612 
613  RCP<CrsMatrix> PtentCrs;
614 
615  RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim));
616  PtentCrs = PtentCrsWrap->getCrsMatrix();
617  Ptentative = PtentCrsWrap;
618 
619  //*****************************************************************
620  //Loop over all aggregates and calculate local QR decompositions.
621  //*****************************************************************
622  GO qctr=0; //for indexing into Ptent data vectors
623  const Map& nonUniqueMapRef = *nonUniqueMap;
624 
625  size_t total_nnz_count=0;
626 
627  for (GO agg=0; agg<numAggs; ++agg)
628  {
629  LO myAggSize = aggStart[agg+1]-aggStart[agg];
630  // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
631  // "localQR" (in column major format) for the QR routine.
632  Teuchos::SerialDenseMatrix<LO,SC> localQR(myAggSize, NSDim);
633  for (size_t j=0; j<NSDim; ++j) {
634  bool bIsZeroNSColumn = true;
635  for (LO k=0; k<myAggSize; ++k)
636  {
637  // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
638  // fineNS[j][n] is the nth entry in the jth NS vector
639  try{
640  SC nsVal = fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ]; // extract information from fine level NS
641  localQR(k,j) = nsVal;
642  if (nsVal != zero) bIsZeroNSColumn = false;
643  }
644  catch(...) {
645  GetOStream(Runtime1,-1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
646  GetOStream(Runtime1,-1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
647  GetOStream(Runtime1,-1) << "(local?) aggId=" << agg << std::endl;
648  GetOStream(Runtime1,-1) << "aggSize=" << myAggSize << std::endl;
649  GetOStream(Runtime1,-1) << "agg DOF=" << k << std::endl;
650  GetOStream(Runtime1,-1) << "NS vector j=" << j << std::endl;
651  GetOStream(Runtime1,-1) << "j*myAggSize + k = " << j*myAggSize + k << std::endl;
652  GetOStream(Runtime1,-1) << "aggToRowMap["<<agg<<"][" << k << "] = " << aggToRowMap[aggStart[agg]+k] << std::endl;
653  GetOStream(Runtime1,-1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg]+k] << " is global element in nonUniqueMap = " <<
654  nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
655  GetOStream(Runtime1,-1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
656  GetOStream(Runtime1,-1) << "fineNS...=" << fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ] << std::endl;
657  GetOStream(Errors,-1) << "caught an error!" << std::endl;
658  }
659  } //for (LO k=0 ...
660  TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
661  } //for (LO j=0 ...
662 
663  Xpetra::global_size_t offset=agg*NSDim;
664 
665  if(myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
666  // calculate QR decomposition (standard)
667  // R is stored in localQR (size: myAggSize x NSDim)
668 
669  // Householder multiplier
670  SC tau = localQR(0,0);
671 
672  if (NSDim == 1) {
673  // Only one nullspace vector, so normalize by hand
674  Magnitude dtemp=0;
675  for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
676  Magnitude tmag = STS::magnitude(localQR(k,0));
677  dtemp += tmag*tmag;
678  }
679  dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
680  tau = localQR(0,0);
681  localQR(0,0) = dtemp;
682  } else {
683  qrSolver.setMatrix( Teuchos::rcp(&localQR, false) );
684  qrSolver.factor();
685  }
686 
687  // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
688  // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
689  // This stores the (offset+k)th entry only if it is local according to the coarseMap.
690  for (size_t j=0; j<NSDim; ++j) {
691  for (size_t k=0; k<=j; ++k) {
692  try {
693  if (coarseMapRef.isNodeLocalElement(offset+k)) {
694  coarseNS[j][offset+k] = localQR(k, j); //TODO is offset+k the correct local ID?!
695  }
696  }
697  catch(...) {
698  GetOStream(Errors,-1) << "caught error in coarseNS insert, j="<<j<<", offset+k = "<<offset+k<<std::endl;
699  }
700  }
701  }
702 
703  // Calculate Q, the tentative prolongator.
704  // The Lapack GEQRF call only works for myAggsize >= NSDim
705 
706  if (NSDim == 1) {
707  // Only one nullspace vector, so calculate Q by hand
708  Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0,0));
709  localQR(0,0) = tau;
710  dtemp = 1 / dtemp;
711  for (LocalOrdinal i=0; i<myAggSize; ++i) {
712  localQR(i,0) *= dtemp ;
713  }
714  } else {
715  qrSolver.formQ();
716  Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
717  for (size_t j=0; j<NSDim; j++) {
718  for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
719  localQR(i,j) = (*qFactor)(i,j);
720  }
721  }
722  }
723 
724  // end default case (myAggSize >= NSDim)
725  } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
726  // See comments for the uncoupled case
727 
728  // R = extended (by adding identity rows) localQR
729  for (size_t j = 0; j < NSDim; j++)
730  for (size_t k = 0; k < NSDim; k++) {
731  TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset+k), Exceptions::RuntimeError,
732  "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset+k);
733 
734  if (k < as<size_t>(myAggSize))
735  coarseNS[j][offset+k] = localQR(k,j);
736  else
737  coarseNS[j][offset+k] = (k == j ? one : zero);
738  }
739 
740  // Q = I (rectangular)
741  for (size_t i = 0; i < as<size_t>(myAggSize); i++)
742  for (size_t j = 0; j < NSDim; j++)
743  localQR(i,j) = (j == i ? one : zero);
744  } // end else (special handling for 1pt aggregates)
745 
746  //Process each row in the local Q factor. If the row is local to the current processor
747  //according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
748  //to be communicated later to the owning processor.
749  //FIXME -- what happens if maps are blocked?
750  for (GO j=0; j<myAggSize; ++j) {
751  //This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
752  //If it is, the row is inserted. If not, the row number, columns, and values are saved in
753  //MultiVectors that will be sent to other processors.
754  GO globalRow = aggToRowMap[aggStart[agg]+j];
755 
756  //TODO is the use of Xpetra::global_size_t below correct?
757  if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false ) {
758  ghostQrows[qctr] = globalRow;
759  for (size_t k=0; k<NSDim; ++k) {
760  ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg*NSDim+k);
761  ghostQvals[k][qctr] = localQR(j,k);
762  }
763  ++qctr;
764  } else {
765  size_t nnz=0;
766  for (size_t k=0; k<NSDim; ++k) {
767  try{
768  if (localQR(j,k) != Teuchos::ScalarTraits<SC>::zero()) {
769  localColPtr[nnz] = agg * NSDim + k;
770  globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
771  valPtr[nnz] = localQR(j,k);
772  ++total_nnz_count;
773  ++nnz;
774  }
775  }
776  catch(...) {
777  GetOStream(Errors,-1) << "caught error in colPtr/valPtr insert, current index="<<nnz<<std::endl;
778  }
779  } //for (size_t k=0; k<NSDim; ++k)
780 
781  try{
782  Ptentative->insertGlobalValues(globalRow,globalColPtr.view(0,nnz),valPtr.view(0,nnz));
783  }
784  catch(...) {
785  GetOStream(Errors,-1) << "pid " << A->getRowMap()->getComm()->getRank()
786  << "caught error during Ptent row insertion, global row "
787  << globalRow << std::endl;
788  }
789  }
790  } //for (GO j=0; j<myAggSize; ++j)
791 
792  } // for (LO agg=0; agg<numAggs; ++agg)
793 
794 
795  // ***********************************************************
796  // ************* end of aggregate-wise QR ********************
797  // ***********************************************************
798  GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
799  // Import ghost parts of Q factors and insert into Ptentative.
800  // First import just the global row numbers.
801  RCP<Xpetra::Vector<GO,LO,GO,Node> > targetQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(rowMapForPtent);
802  targetQrowNums->putScalar(-1);
803  targetQrowNums->doImport(*ghostQrowNums,*importer,Xpetra::INSERT);
804  ArrayRCP< GO > targetQrows = targetQrowNums->getDataNonConst(0);
805 
806  // Now create map based on just the row numbers imported.
807  Array<GO> gidsToImport;
808  gidsToImport.reserve(targetQrows.size());
809  for (typename ArrayRCP<GO>::iterator r=targetQrows.begin(); r!=targetQrows.end(); ++r) {
810  if (*r > -1) {
811  gidsToImport.push_back(*r);
812  }
813  }
814  RCP<const Map > reducedMap = MapFactory::Build( A->getRowMap()->lib(),
815  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
816  gidsToImport, indexBase, A->getRowMap()->getComm() );
817 
818  // Import using the row numbers that this processor will receive.
819  importer = ImportFactory::Build(ghostQMap, reducedMap);
820 
821  Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > targetQcolumns(NSDim);
822  for (size_t i=0; i<NSDim; ++i) {
823  targetQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(reducedMap);
824  targetQcolumns[i]->doImport(*(ghostQcolumns[i]),*importer,Xpetra::INSERT);
825  }
826  RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap,NSDim);
827  targetQvalues->doImport(*ghostQvalues,*importer,Xpetra::INSERT);
828 
829  ArrayRCP< ArrayRCP<SC> > targetQvals;
830  ArrayRCP<ArrayRCP<GO> > targetQcols;
831  if (targetQvalues->getLocalLength() > 0) {
832  targetQvals.resize(NSDim);
833  targetQcols.resize(NSDim);
834  for (size_t i=0; i<NSDim; ++i) {
835  targetQvals[i] = targetQvalues->getDataNonConst(i);
836  targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
837  }
838  }
839 
840  valPtr = Array<SC>(NSDim,0.);
841  globalColPtr = Array<GO>(NSDim,0);
842  for (typename Array<GO>::iterator r=gidsToImport.begin(); r!=gidsToImport.end(); ++r) {
843  if (targetQvalues->getLocalLength() > 0) {
844  for (size_t j=0; j<NSDim; ++j) {
845  valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
846  globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
847  }
848  Ptentative->insertGlobalValues(*r, globalColPtr.view(0,NSDim), valPtr.view(0,NSDim));
849  } //if (targetQvalues->getLocalLength() > 0)
850  }
851 
852  Ptentative->fillComplete(coarseMap, A->getDomainMap());
853  }
854 
855 
856 
857  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
859  BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
860  RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
861  RCP<const Map> rowMap = A->getRowMap();
862  RCP<const Map> colMap = A->getColMap();
863  const size_t numRows = rowMap->getLocalNumElements();
864 
865  typedef Teuchos::ScalarTraits<SC> STS;
866  typedef typename STS::magnitudeType Magnitude;
867  const SC zero = STS::zero();
868  const SC one = STS::one();
869  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
870 
871  const GO numAggs = aggregates->GetNumAggregates();
872  const size_t NSDim = fineNullspace->getNumVectors();
873  ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
874 
875 
876  // Sanity checking
877  const ParameterList& pL = GetParameterList();
878  const bool &doQRStep = pL.get<bool>("tentative: calculate qr");
879  const bool &constantColSums = pL.get<bool>("tentative: constant column sums");
880 
881  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums,Exceptions::RuntimeError,
882  "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
883 
884  // Aggregates map is based on the amalgamated column map
885  // We can skip global-to-local conversion if LIDs in row map are
886  // same as LIDs in column map
887  bool goodMap = MueLu::Utilities<SC,LO,GO,NO>::MapsAreNested(*rowMap, *colMap);
888 
889  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
890  // aggStart is a pointer into aggToRowMapLO
891  // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
892  // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
893  ArrayRCP<LO> aggStart;
894  ArrayRCP<LO> aggToRowMapLO;
895  ArrayRCP<GO> aggToRowMapGO;
896  if (goodMap) {
897  amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
898  GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
899 
900  } else {
901  amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
902  GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
903  << "using GO->LO conversion with performance penalty" << std::endl;
904  }
905  coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
906 
907  // Pull out the nullspace vectors so that we can have random access.
908  ArrayRCP<ArrayRCP<const SC> > fineNS (NSDim);
909  ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
910  for (size_t i = 0; i < NSDim; i++) {
911  fineNS[i] = fineNullspace->getData(i);
912  if (coarseMap->getLocalNumElements() > 0)
913  coarseNS[i] = coarseNullspace->getDataNonConst(i);
914  }
915 
916  size_t nnzEstimate = numRows * NSDim;
917 
918  // Time to construct the matrix and fill in the values
919  Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
920  RCP<CrsMatrix> PtentCrs = rcp_dynamic_cast<CrsMatrixWrap>(Ptentative)->getCrsMatrix();
921 
922  ArrayRCP<size_t> iaPtent;
923  ArrayRCP<LO> jaPtent;
924  ArrayRCP<SC> valPtent;
925 
926  PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
927 
928  ArrayView<size_t> ia = iaPtent();
929  ArrayView<LO> ja = jaPtent();
930  ArrayView<SC> val = valPtent();
931 
932  ia[0] = 0;
933  for (size_t i = 1; i <= numRows; i++)
934  ia[i] = ia[i-1] + NSDim;
935 
936  for (size_t j = 0; j < nnzEstimate; j++) {
937  ja [j] = INVALID;
938  val[j] = zero;
939  }
940 
941 
942  if (doQRStep) {
944  // Standard aggregate-wise QR //
946  for (GO agg = 0; agg < numAggs; agg++) {
947  LO aggSize = aggStart[agg+1] - aggStart[agg];
948 
949  Xpetra::global_size_t offset = agg*NSDim;
950 
951  // Extract the piece of the nullspace corresponding to the aggregate, and
952  // put it in the flat array, "localQR" (in column major format) for the
953  // QR routine.
954  Teuchos::SerialDenseMatrix<LO,SC> localQR(aggSize, NSDim);
955  if (goodMap) {
956  for (size_t j = 0; j < NSDim; j++)
957  for (LO k = 0; k < aggSize; k++)
958  localQR(k,j) = fineNS[j][aggToRowMapLO[aggStart[agg]+k]];
959  } else {
960  for (size_t j = 0; j < NSDim; j++)
961  for (LO k = 0; k < aggSize; k++)
962  localQR(k,j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+k])];
963  }
964 
965  // Test for zero columns
966  for (size_t j = 0; j < NSDim; j++) {
967  bool bIsZeroNSColumn = true;
968 
969  for (LO k = 0; k < aggSize; k++)
970  if (localQR(k,j) != zero)
971  bIsZeroNSColumn = false;
972 
973  TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
974  "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
975  }
976 
977  // Calculate QR decomposition (standard)
978  // NOTE: Q is stored in localQR and R is stored in coarseNS
979  if (aggSize >= Teuchos::as<LO>(NSDim)) {
980 
981  if (NSDim == 1) {
982  // Only one nullspace vector, calculate Q and R by hand
983  Magnitude norm = STS::magnitude(zero);
984  for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
985  norm += STS::magnitude(localQR(k,0)*localQR(k,0));
986  norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
987 
988  // R = norm
989  coarseNS[0][offset] = norm;
990 
991  // Q = localQR(:,0)/norm
992  for (LO i = 0; i < aggSize; i++)
993  localQR(i,0) /= norm;
994 
995  } else {
996  Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
997  qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
998  qrSolver.factor();
999 
1000  // R = upper triangular part of localQR
1001  for (size_t j = 0; j < NSDim; j++)
1002  for (size_t k = 0; k <= j; k++)
1003  coarseNS[j][offset+k] = localQR(k,j); //TODO is offset+k the correct local ID?!
1004 
1005  // Calculate Q, the tentative prolongator.
1006  // The Lapack GEQRF call only works for myAggsize >= NSDim
1007  qrSolver.formQ();
1008  Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
1009  for (size_t j = 0; j < NSDim; j++)
1010  for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
1011  localQR(i,j) = (*qFactor)(i,j);
1012  }
1013 
1014  } else {
1015  // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
1016 
1017  // The local QR decomposition is not possible in the "overconstrained"
1018  // case (i.e. number of columns in localQR > number of rows), which
1019  // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
1020  // is only possible for single node aggregates in structural mechanics.
1021  // (Similar problems may arise in discontinuous Galerkin problems...)
1022  // We bypass the QR decomposition and use an identity block in the
1023  // tentative prolongator for the single node aggregate and transfer the
1024  // corresponding fine level null space information 1-to-1 to the coarse
1025  // level null space part.
1026 
1027  // NOTE: The resulting tentative prolongation operator has
1028  // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
1029  // coarse level operator A. To deal with that one has the following
1030  // options:
1031  // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
1032  // false) to add some identity block to the diagonal of the zero rows
1033  // in the coarse level operator A, such that standard level smoothers
1034  // can be used again.
1035  // - Use special (projection-based) level smoothers, which can deal
1036  // with singular matrices (very application specific)
1037  // - Adapt the code below to avoid zero columns. However, we do not
1038  // support a variable number of DOFs per node in MueLu/Xpetra which
1039  // makes the implementation really hard.
1040 
1041  // R = extended (by adding identity rows) localQR
1042  for (size_t j = 0; j < NSDim; j++)
1043  for (size_t k = 0; k < NSDim; k++)
1044  if (k < as<size_t>(aggSize))
1045  coarseNS[j][offset+k] = localQR(k,j);
1046  else
1047  coarseNS[j][offset+k] = (k == j ? one : zero);
1048 
1049  // Q = I (rectangular)
1050  for (size_t i = 0; i < as<size_t>(aggSize); i++)
1051  for (size_t j = 0; j < NSDim; j++)
1052  localQR(i,j) = (j == i ? one : zero);
1053  }
1054 
1055 
1056  // Process each row in the local Q factor
1057  // FIXME: What happens if maps are blocked?
1058  for (LO j = 0; j < aggSize; j++) {
1059  LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg]+j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]));
1060 
1061  size_t rowStart = ia[localRow];
1062  for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1063  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1064  if (localQR(j,k) != zero) {
1065  ja [rowStart+lnnz] = offset + k;
1066  val[rowStart+lnnz] = localQR(j,k);
1067  lnnz++;
1068  }
1069  }
1070  }
1071  }
1072 
1073  } else {
1074  GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
1075  if (NSDim>1)
1076  GetOStream(Warnings0) << "TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1078  // "no-QR" option //
1080  // Local Q factor is just the fine nullspace support over the current aggregate.
1081  // Local R factor is the identity.
1082  // TODO I have not implemented any special handling for aggregates that are too
1083  // TODO small to locally support the nullspace, as is done in the standard QR
1084  // TODO case above.
1085  if (goodMap) {
1086  for (GO agg = 0; agg < numAggs; agg++) {
1087  const LO aggSize = aggStart[agg+1] - aggStart[agg];
1088  Xpetra::global_size_t offset = agg*NSDim;
1089 
1090  // Process each row in the local Q factor
1091  // FIXME: What happens if maps are blocked?
1092  for (LO j = 0; j < aggSize; j++) {
1093 
1094  //TODO Here I do not check for a zero nullspace column on the aggregate.
1095  // as is done in the standard QR case.
1096 
1097  const LO localRow = aggToRowMapLO[aggStart[agg]+j];
1098 
1099  const size_t rowStart = ia[localRow];
1100 
1101  for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1102  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1103  SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg]+j]];
1104  if(constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1105  if (qr_jk != zero) {
1106  ja [rowStart+lnnz] = offset + k;
1107  val[rowStart+lnnz] = qr_jk;
1108  lnnz++;
1109  }
1110  }
1111  }
1112  for (size_t j = 0; j < NSDim; j++)
1113  coarseNS[j][offset+j] = one;
1114  } //for (GO agg = 0; agg < numAggs; agg++)
1115 
1116  } else {
1117  for (GO agg = 0; agg < numAggs; agg++) {
1118  const LO aggSize = aggStart[agg+1] - aggStart[agg];
1119  Xpetra::global_size_t offset = agg*NSDim;
1120  for (LO j = 0; j < aggSize; j++) {
1121 
1122  const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]);
1123 
1124  const size_t rowStart = ia[localRow];
1125 
1126  for (size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1127  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1128  SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j])];
1129  if(constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1130  if (qr_jk != zero) {
1131  ja [rowStart+lnnz] = offset + k;
1132  val[rowStart+lnnz] = qr_jk;
1133  lnnz++;
1134  }
1135  }
1136  }
1137  for (size_t j = 0; j < NSDim; j++)
1138  coarseNS[j][offset+j] = one;
1139  } //for (GO agg = 0; agg < numAggs; agg++)
1140 
1141  } //if (goodmap) else ...
1142 
1143  } //if doQRStep ... else
1144 
1145  // Compress storage (remove all INVALID, which happen when we skip zeros)
1146  // We do that in-place
1147  size_t ia_tmp = 0, nnz = 0;
1148  for (size_t i = 0; i < numRows; i++) {
1149  for (size_t j = ia_tmp; j < ia[i+1]; j++)
1150  if (ja[j] != INVALID) {
1151  ja [nnz] = ja [j];
1152  val[nnz] = val[j];
1153  nnz++;
1154  }
1155  ia_tmp = ia[i+1];
1156  ia[i+1] = nnz;
1157  }
1158  if (rowMap->lib() == Xpetra::UseTpetra) {
1159  // - Cannot resize for Epetra, as it checks for same pointers
1160  // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
1161  // NOTE: these invalidate ja and val views
1162  jaPtent .resize(nnz);
1163  valPtent.resize(nnz);
1164  }
1165 
1166  GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1167 
1168  PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1169 
1170 
1171  // Managing labels & constants for ESFC
1172  RCP<ParameterList> FCparams;
1173  if(pL.isSublist("matrixmatrix: kernel params"))
1174  FCparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
1175  else
1176  FCparams= rcp(new ParameterList);
1177  // By default, we don't need global constants for TentativeP
1178  FCparams->set("compute global constants",FCparams->get("compute global constants",false));
1179  std::string levelIDs = toString(levelID);
1180  FCparams->set("Timer Label",std::string("MueLu::TentativeP-")+levelIDs);
1181  RCP<const Export> dummy_e;
1182  RCP<const Import> dummy_i;
1183 
1184  PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(),dummy_i,dummy_e,FCparams);
1185  }
1186 
1187 
1188 
1189 } //namespace MueLu
1190 
1191 // TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
1192 
1193 #define MUELU_TENTATIVEPFACTORY_SHORT
1194 #endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
Important warning messages (one line)
MueLu::DefaultLocalOrdinal LocalOrdinal
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
Timer to be used in factories. Similar to Monitor but with additional timers.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Namespace for MueLu classes and methods.
static const NoFactory * get()
Print even more statistics.
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
#define SET_VALID_ENTRY(name)
void BuildPuncoupledBlockCrs(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.