MueLu  Version of the Day
MueLu_SemiCoarsenPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_SEMICOARSENPFACTORY_DEF_HPP
47 #define MUELU_SEMICOARSENPFACTORY_DEF_HPP
48 
49 #include <stdlib.h>
50 
51 #include <Teuchos_LAPACK.hpp>
52 
53 #include <Xpetra_CrsMatrixWrap.hpp>
54 #include <Xpetra_ImportFactory.hpp>
55 #include <Xpetra_Matrix.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_VectorFactory.hpp>
58 
61 
62 #include "MueLu_MasterList.hpp"
63 #include "MueLu_Monitor.hpp"
64 
65 namespace MueLu {
66 
67  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
69  RCP<ParameterList> validParamList = rcp(new ParameterList());
70 
71 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
72  SET_VALID_ENTRY("semicoarsen: piecewise constant");
73  SET_VALID_ENTRY("semicoarsen: piecewise linear");
74  SET_VALID_ENTRY("semicoarsen: coarsen rate");
75  SET_VALID_ENTRY("semicoarsen: calculate nonsym restriction");
76 #undef SET_VALID_ENTRY
77  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
78  validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
79  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coordinates");
80 
81  validParamList->set< RCP<const FactoryBase> >("LineDetection_VertLineIds", Teuchos::null, "Generating factory for LineDetection vertical line ids");
82  validParamList->set< RCP<const FactoryBase> >("LineDetection_Layers", Teuchos::null, "Generating factory for LineDetection layer ids");
83  validParamList->set< RCP<const FactoryBase> >("CoarseNumZLayers", Teuchos::null, "Generating factory for number of coarse z-layers");
84 
85  return validParamList;
86  }
87 
88  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
90  Input(fineLevel, "A");
91  Input(fineLevel, "Nullspace");
92 
93  Input(fineLevel, "LineDetection_VertLineIds");
94  Input(fineLevel, "LineDetection_Layers");
95  Input(fineLevel, "CoarseNumZLayers");
96 
97  // check whether fine level coordinate information is available.
98  // If yes, request the fine level coordinates and generate coarse coordinates
99  // during the Build call
100  if (fineLevel.GetLevelID() == 0) {
101  if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
102  fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
103  bTransferCoordinates_ = true;
104  }
105  } else if (bTransferCoordinates_ == true){
106  // on coarser levels we check the default factory providing "Coordinates"
107  // or the factory declared to provide "Coordinates"
108  // first, check which factory is providing coordinate information
109  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
110  if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
111  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
112  fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
113  }
114  }
115  }
116 
117  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
119  return BuildP(fineLevel, coarseLevel);
120  }
121 
122  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
124  FactoryMonitor m(*this, "Build", coarseLevel);
125 
126  // obtain general variables
127  RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
128  RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
129 
130  // get user-provided coarsening rate parameter (constant over all levels)
131  const ParameterList& pL = GetParameterList();
132  LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
133  bool buildRestriction = pL.get<bool>("semicoarsen: calculate nonsym restriction");
134  bool doLinear = pL.get<bool>("semicoarsen: piecewise linear");
135 
136  // collect general input data
137  LO BlkSize = A->GetFixedBlockSize();
138  RCP<const Map> rowMap = A->getRowMap();
139  LO Ndofs = rowMap->getLocalNumElements();
140  LO Nnodes = Ndofs/BlkSize;
141 
142  // collect line detection information generated by the LineDetectionFactory instance
143  LO FineNumZLayers = Get< LO >(fineLevel, "CoarseNumZLayers");
144  Teuchos::ArrayRCP<LO> TVertLineId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_VertLineIds");
145  Teuchos::ArrayRCP<LO> TLayerId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_Layers");
146  LO* VertLineId = TVertLineId.getRawPtr();
147  LO* LayerId = TLayerId.getRawPtr();
148 
149  // generate transfer operator with semicoarsening
150  RCP<const Map> theCoarseMap;
151  RCP<Matrix> P, R;
152  RCP<MultiVector> coarseNullspace;
153  GO Ncoarse = MakeSemiCoarsenP(Nnodes,FineNumZLayers,CoarsenRate,LayerId,VertLineId,
154  BlkSize, A, P, theCoarseMap, fineNullspace,coarseNullspace,R,buildRestriction,doLinear);
155 
156  // set StridingInformation of P
157  if (A->IsView("stridedMaps") == true)
158  P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), theCoarseMap);
159  else
160  P->CreateView("stridedMaps", P->getRangeMap(), theCoarseMap);
161 
162  if (buildRestriction) {
163  if (A->IsView("stridedMaps") == true)
164  R->CreateView("stridedMaps", theCoarseMap, A->getRowMap("stridedMaps"));
165  else
166  R->CreateView("stridedMaps", theCoarseMap, R->getDomainMap());
167  }
168  if (pL.get<bool>("semicoarsen: piecewise constant")) {
169  TEUCHOS_TEST_FOR_EXCEPTION(buildRestriction, Exceptions::RuntimeError, "Cannot use calculate nonsym restriction with piecewise constant.");
170  RevertToPieceWiseConstant(P, BlkSize);
171  }
172  if (pL.get<bool>("semicoarsen: piecewise linear")) {
173  TEUCHOS_TEST_FOR_EXCEPTION(buildRestriction, Exceptions::RuntimeError, "Cannot use calculate nonsym restriction with piecewise linear.");
174  TEUCHOS_TEST_FOR_EXCEPTION(pL.get<bool>("semicoarsen: piecewise constant"), Exceptions::RuntimeError, "Cannot use piecewise constant with piecewise linear.");
175  }
176 
177  // Store number of coarse z-layers on the coarse level container
178  // This information is used by the LineDetectionAlgorithm
179  // TODO get rid of the NoFactory
180 
181  LO CoarseNumZLayers = FineNumZLayers*Ncoarse;
182  CoarseNumZLayers /= Ndofs;
183  coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers), MueLu::NoFactory::get());
184 
185  // store semicoarsening transfer on coarse level
186  Set(coarseLevel, "P", P);
187  if (buildRestriction) Set(coarseLevel, "RfromPfactory", R);
188 
189  Set(coarseLevel, "Nullspace", coarseNullspace);
190 
191  // transfer coordinates
192  if(bTransferCoordinates_) {
193  typedef Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType,LO,GO,NO> xdMV;
194  RCP<xdMV> fineCoords = Teuchos::null;
195  if (fineLevel.GetLevelID() == 0 &&
196  fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
197  fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", NoFactory::get());
198  } else {
199  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
200  if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
201  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
202  fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", myCoordsFact.get());
203  }
204  }
205 
206  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords==Teuchos::null, Exceptions::RuntimeError, "No Coordinates found provided by the user.");
207 
208  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given.");
209  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x = fineCoords->getDataNonConst(0);
210  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y = fineCoords->getDataNonConst(1);
211  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z = fineCoords->getDataNonConst(2);
212 
213  // determine the maximum and minimum z coordinate value on the current processor.
214  typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max = -Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() / Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
215  typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min = Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() / Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
216  for ( auto it = z.begin(); it != z.end(); ++it) {
217  if(*it > zval_max) zval_max = *it;
218  if(*it < zval_min) zval_min = *it;
219  }
220 
221  LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
222 
223  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> myZLayerCoords = Teuchos::arcp<typename Teuchos::ScalarTraits<Scalar>::coordinateType>(myCoarseZLayers);
224  if(myCoarseZLayers == 1) {
225  myZLayerCoords[0] = zval_min;
226  } else {
227  typename Teuchos::ScalarTraits<Scalar>::coordinateType dz = (zval_max-zval_min)/(myCoarseZLayers-1);
228  for(LO k = 0; k<myCoarseZLayers; ++k) {
229  myZLayerCoords[k] = k*dz;
230  }
231  }
232 
233  // Note, that the coarse level node coordinates have to be in vertical ordering according
234  // to the numbering of the vertical lines
235 
236  // number of vertical lines on current node:
237  LO numVertLines = Nnodes / FineNumZLayers;
238  LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
239 
240  RCP<const Map> coarseCoordMap =
241  MapFactory::Build (fineCoords->getMap()->lib(),
242  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
243  Teuchos::as<size_t>(numLocalCoarseNodes),
244  fineCoords->getMap()->getIndexBase(),
245  fineCoords->getMap()->getComm());
246  RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::coordinateType,LO,GO,NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
247  coarseCoords->putScalar(-1.0);
248  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx = coarseCoords->getDataNonConst(0);
249  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy = coarseCoords->getDataNonConst(1);
250  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz = coarseCoords->getDataNonConst(2);
251 
252  // loop over all vert line indices (stop as soon as possible)
253  LO cntCoarseNodes = 0;
254  for( LO vt = 0; vt < TVertLineId.size(); ++vt) {
255  //vertical line id in *vt
256  LO curVertLineId = TVertLineId[vt];
257 
258  if(cx[curVertLineId * myCoarseZLayers] == -1.0 &&
259  cy[curVertLineId * myCoarseZLayers] == -1.0) {
260  // loop over all local myCoarseZLayers
261  for (LO n=0; n<myCoarseZLayers; ++n) {
262  cx[curVertLineId * myCoarseZLayers + n] = x[vt];
263  cy[curVertLineId * myCoarseZLayers + n] = y[vt];
264  cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
265  }
266  cntCoarseNodes += myCoarseZLayers;
267  }
268 
269  TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes, Exceptions::RuntimeError, "number of coarse nodes is inconsistent.");
270  if(cntCoarseNodes == numLocalCoarseNodes) break;
271  }
272 
273  // set coarse level coordinates
274  Set(coarseLevel, "Coordinates", coarseCoords);
275  } /* end bool bTransferCoordinates */
276 
277  }
278 
279  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
281  /*
282  * Given the number of points in the z direction (PtsPerLine) and a
283  * coarsening rate (CoarsenRate), determine which z-points will serve
284  * as Cpts and return the total number of Cpts.
285  *
286  * Input
287  * PtsPerLine: Number of fine level points in the z direction
288  *
289  * CoarsenRate: Roughly, number of Cpts = (PtsPerLine+1)/CoarsenRate - 1
290  *
291  * Thin: Must be either 0 or 1. Thin decides what to do when
292  * (PtsPerLine+1)/CoarsenRate is not an integer.
293  *
294  * Thin == 0 ==> ceil() the above fraction
295  * Thin == 1 ==> floor() the above fraction
296  *
297  * Output
298  * LayerCpts Array where LayerCpts[i] indicates that the
299  * LayerCpts[i]th fine level layer is a Cpt Layer.
300  * Note: fine level layers are assumed to be numbered starting
301  * a one.
302  */
303  typename Teuchos::ScalarTraits<Scalar>::coordinateType temp, RestStride, di;
304  LO NCpts, i;
305  LO NCLayers = -1;
306  LO FirstStride;
307 
308  temp = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine+1))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (CoarsenRate)) - 1.0;
309  if (Thin == 1) NCpts = (LO) ceil(temp);
310  else NCpts = (LO) floor(temp);
311 
312  TEUCHOS_TEST_FOR_EXCEPTION(PtsPerLine == 1, Exceptions::RuntimeError, "SemiCoarsenPFactory::FindCpts: cannot coarsen further.");
313 
314  if (NCpts < 1) NCpts = 1;
315 
316  FirstStride= (LO) ceil( ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) PtsPerLine+1)/( (typename Teuchos::ScalarTraits<Scalar>::coordinateType) (NCpts+1)));
317  RestStride = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine-FirstStride+1))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) NCpts);
318 
319  NCLayers = (LO) floor((((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine-FirstStride+1))/RestStride)+.00001);
320 
321  TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError, "sizes do not match.");
322 
323  di = (typename Teuchos::ScalarTraits<Scalar>::coordinateType) FirstStride;
324  for (i = 1; i <= NCpts; i++) {
325  (*LayerCpts)[i] = (LO) floor(di);
326  di += RestStride;
327  }
328 
329  return(NCLayers);
330  }
331 
332 #define MaxHorNeighborNodes 75
333 
334  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
336  MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[],
337  LO const VertLineId[], LO const DofsPerNode, RCP<Matrix> & Amat, RCP<Matrix>& P,
338  RCP<const Map>& coarseMap, const RCP<MultiVector> fineNullspace,
339  RCP<MultiVector>& coarseNullspace, RCP<Matrix>& R, bool buildRestriction, bool doLinear) const {
340 
341  /*
342  * Given a CSR matrix (OrigARowPtr, OrigAcols, OrigAvals), information
343  * describing the z-layer and vertical line (LayerId and VertLineId)
344  * of each matrix block row, a coarsening rate, and dofs/node information,
345  * construct a prolongator that coarsening to semicoarsening in the z-direction
346  * using something like an operator-dependent grid transfer. In particular,
347  * matrix stencils are collapsed to vertical lines. Thus, each vertical line
348  * gives rise to a block tridiagonal matrix. BlkRows corresponding to
349  * Cpts are replaced by identity matrices. This tridiagonal is solved
350  * to determine each interpolation basis functions. Each Blk Rhs corresponds
351  * to all zeros except at the corresponding C-pt which has an identity
352  *
353  * On termination, return the number of local prolongator columns owned by
354  * this processor.
355  *
356  * Note: This code was adapted from a matlab code where offsets/arrays
357  * start from 1. In most parts of the code, this 1 offset is kept
358  * (in some cases wasting the first element of the array). The
359  * input and output matrices of this function has been changed to
360  * have offsets/rows/columns which start from 0. LayerId[] and
361  * VertLineId[] currently start from 1.
362  *
363  * Input
364  * =====
365  * Ntotal Number of fine level Blk Rows owned by this processor
366  *
367  * nz Number of vertical layers. Note: partitioning must be done
368  * so that each processor owns an entire vertical line. This
369  * means that nz is the global number of layers, which should
370  * be equal to the local number of layers.
371  * CoarsenRate Rate of z semicoarsening. Smoothed aggregation-like coarsening
372  * would correspond to CoarsenRate = 3.
373  * LayerId Array from 0 to Ntotal-1 + Ghost. LayerId(BlkRow) gives the
374  * layer number associated with the dofs within BlkRow.
375  * VertLineId Array from 1 to Ntotal, VertLineId(BlkRow) gives a unique
376  * vertical line id (from 0 to Ntotal/nz-1) of BlkRow. All
377  * BlkRows associated with nodes along the same vertical line
378  * in the mesh should have the same LineId.
379  * DofsPerNode Number of degrees-of-freedom per mesh node.
380  *
381  * OrigARowPtr, CSR arrays corresponding to the fine level matrix.
382  * OrigAcols,
383  * OrigAvals
384  *
385  * Output
386  * =====
387  * ParamPptr, CSR arrays corresponding to the final prolongation matrix.
388  * ParamPcols,
389  * ParamsPvals
390  */
391  int NLayers, NVertLines, MaxNnz, NCLayers, MyLine, MyLayer;
392  int *InvLineLayer=NULL, *CptLayers=NULL, StartLayer, NStencilNodes;
393  int BlkRow=-1, dof_j, node_k, *Sub2FullMap=NULL, RowLeng;
394  int i, j, iii, col, count, index, loc, PtRow, PtCol;
395  SC *BandSol=NULL, *BandMat=NULL, TheSum, *RestrictBandMat=NULL, *RestrictBandSol=NULL;
396  int *IPIV=NULL, KL, KU, KLU, N, NRHS, LDAB,INFO;
397  int *Pcols;
398  size_t *Pptr;
399  SC *Pvals;
400  LO *Rcols=NULL;
401  size_t *Rptr=NULL;
402  SC *Rvals=NULL;
403  int MaxStencilSize, MaxNnzPerRow;
404  LO *LayDiff=NULL;
405  int CurRow, LastGuy = -1, NewPtr, RLastGuy = -1;
406  int Ndofs;
407  int Nghost;
408  LO *Layerdofs = NULL, *Col2Dof = NULL;
409 
410  Teuchos::LAPACK<LO,SC> lapack;
411 
412  char notrans[3];
413  notrans[0] = 'N';
414  notrans[1] = 'N';
415  char trans[3];
416  trans[0] = 'T';
417  trans[1] = 'T';
418 
419 
420  MaxNnzPerRow = MaxHorNeighborNodes*DofsPerNode*3;
421  Teuchos::ArrayRCP<LO> TLayDiff = Teuchos::arcp<LO>(1+MaxNnzPerRow); LayDiff = TLayDiff.getRawPtr();
422 
423  Nghost = Amat->getColMap()->getLocalNumElements() - Amat->getDomainMap()->getLocalNumElements();
424  if (Nghost < 0) Nghost = 0;
425  Teuchos::ArrayRCP<LO> TLayerdofs= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Layerdofs = TLayerdofs.getRawPtr();
426  Teuchos::ArrayRCP<LO> TCol2Dof= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Col2Dof= TCol2Dof.getRawPtr();
427 
428  RCP<Xpetra::Vector<LO,LO,GO,NO> > localdtemp = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(Amat->getDomainMap());
429  RCP<Xpetra::Vector<LO,LO,GO,NO> > dtemp = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(Amat->getColMap());
430  ArrayRCP<LO> valptr= localdtemp->getDataNonConst(0);
431 
432  for (i = 0; i < Ntotal*DofsPerNode; i++)
433  valptr[i]= LayerId[i/DofsPerNode];
434  valptr=ArrayRCP<LO>();
435 
436  RCP< const Import> importer;
437  importer = Amat->getCrsGraph()->getImporter();
438  if (importer == Teuchos::null) {
439  importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
440  }
441  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
442 
443  valptr= dtemp->getDataNonConst(0);
444  for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Layerdofs[i]= valptr[i];
445  valptr= localdtemp->getDataNonConst(0);
446  for (i = 0; i < Ntotal*DofsPerNode; i++) valptr[i]= i%DofsPerNode;
447  valptr=ArrayRCP<LO>();
448  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
449 
450  valptr= dtemp->getDataNonConst(0);
451  for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Col2Dof[i]= valptr[i];
452  valptr=ArrayRCP<LO>();
453 
454  if (Ntotal != 0) {
455  NLayers = LayerId[0];
456  NVertLines= VertLineId[0];
457  }
458  else { NLayers = -1; NVertLines = -1; }
459 
460  for (i = 1; i < Ntotal; i++) {
461  if ( VertLineId[i] > NVertLines ) NVertLines = VertLineId[i];
462  if ( LayerId[i] > NLayers ) NLayers = LayerId[i];
463  }
464  NLayers++;
465  NVertLines++;
466 
467  /*
468  * Make an inverse map so that we can quickly find the dof
469  * associated with a particular vertical line and layer.
470  */
471 
472  Teuchos::ArrayRCP<LO> TInvLineLayer= Teuchos::arcp<LO>(1+NVertLines*NLayers); InvLineLayer = TInvLineLayer.getRawPtr();
473  for (i=0; i < Ntotal; i++) {
474  InvLineLayer[ VertLineId[i]+1+LayerId[i]*NVertLines ] = i;
475  }
476 
477  /*
478  * Determine coarse layers where injection will be applied.
479  */
480 
481  Teuchos::ArrayRCP<LO> TCptLayers = Teuchos::arcp<LO>(nz+1); CptLayers = TCptLayers.getRawPtr();
482  NCLayers = FindCpts(nz,CoarsenRate,0, &CptLayers);
483 
484  /*
485  * Compute the largest possible interpolation stencil width based
486  * on the location of the Clayers. This stencil width is actually
487  * nodal (i.e. assuming 1 dof/node). To get the true max stencil width
488  * one needs to multiply this by DofsPerNode.
489  */
490 
491  if (NCLayers < 2) MaxStencilSize = nz;
492  else MaxStencilSize = CptLayers[2];
493 
494  for (i = 3; i <= NCLayers; i++) {
495  if (MaxStencilSize < CptLayers[i]- CptLayers[i-2])
496  MaxStencilSize = CptLayers[i]- CptLayers[i-2];
497  }
498  if (NCLayers > 1) {
499  if (MaxStencilSize < nz - CptLayers[NCLayers-1]+1)
500  MaxStencilSize = nz - CptLayers[NCLayers-1]+1;
501  }
502 
503  /*
504  * Allocate storage associated with solving a banded sub-matrix needed to
505  * determine the interpolation stencil. Note: we compute interpolation stencils
506  * for all dofs within a node at the same time, and so the banded solution
507  * must be large enough to hold all DofsPerNode simultaneously.
508  */
509 
510  Teuchos::ArrayRCP<LO> TSub2FullMap= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); Sub2FullMap= TSub2FullMap.getRawPtr();
511  Teuchos::ArrayRCP<SC> TBandSol= Teuchos::arcp<SC>((MaxStencilSize+1)*DofsPerNode*DofsPerNode); BandSol = TBandSol.getRawPtr();
512  Teuchos::ArrayRCP<SC> TResBandSol;
513  if (buildRestriction) {
514  TResBandSol= Teuchos::arcp<SC>((MaxStencilSize+1)*DofsPerNode*DofsPerNode); RestrictBandSol = TResBandSol.getRawPtr();
515  }
516  /*
517  * Lapack variables. See comments for dgbsv().
518  */
519  KL = 2*DofsPerNode-1;
520  KU = 2*DofsPerNode-1;
521  KLU = KL+KU;
522  LDAB = 2*KL+KU+1;
523  NRHS = DofsPerNode;
524  Teuchos::ArrayRCP<SC> TBandMat= Teuchos::arcp<SC>(LDAB*MaxStencilSize*DofsPerNode+1); BandMat = TBandMat.getRawPtr();
525  Teuchos::ArrayRCP<LO> TIPIV= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); IPIV = TIPIV.getRawPtr();
526 
527  Teuchos::ArrayRCP<SC> TResBandMat;
528  if (buildRestriction) {
529  TResBandMat= Teuchos::arcp<SC>(LDAB*MaxStencilSize*DofsPerNode+1); RestrictBandMat = TResBandMat.getRawPtr();
530  }
531 
532  /*
533  * Allocate storage for the final interpolation matrix. Note: each prolongator
534  * row might have entries corresponding to at most two nodes.
535  * Note: the total fine level dofs equals DofsPerNode*Ntotal and the max
536  * nnz per prolongator row is DofsPerNode*2.
537  */
538 
539  Ndofs = DofsPerNode*Ntotal;
540  MaxNnz = 2*DofsPerNode*Ndofs;
541 
542  RCP<const Map> rowMap = Amat->getRowMap();
543  Xpetra::global_size_t GNdofs= rowMap->getGlobalNumElements();
544 
545  std::vector<size_t> stridingInfo_;
546  stridingInfo_.push_back(DofsPerNode);
547 
548  Xpetra::global_size_t itemp = GNdofs/nz;
549  coarseMap = StridedMapFactory::Build(rowMap->lib(),
550  NCLayers*itemp,
551  NCLayers*NVertLines*DofsPerNode,
552  0, /* index base */
553  stridingInfo_,
554  rowMap->getComm(),
555  -1, /* strided block id */
556  0 /* domain gid offset */);
557 
558 
559  //coarseMap = MapFactory::createContigMapWithNode(rowMap->lib(),(NCLayers*GNdofs)/nz, NCLayers*NVertLines*DofsPerNode,(rowMap->getComm()), rowMap->getNode());
560 
561 
562  P = rcp(new CrsMatrixWrap(rowMap, coarseMap , 0));
563  coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
564 
565 
566  Teuchos::ArrayRCP<SC> TPvals= Teuchos::arcp<SC>(1+MaxNnz); Pvals= TPvals.getRawPtr();
567  Teuchos::ArrayRCP<size_t> TPptr = Teuchos::arcp<size_t>(DofsPerNode*(2+Ntotal)); Pptr = TPptr.getRawPtr();
568  Teuchos::ArrayRCP<LO> TPcols= Teuchos::arcp<LO>(1+MaxNnz); Pcols= TPcols.getRawPtr();
569 
570  TEUCHOS_TEST_FOR_EXCEPTION(Pcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
571  Pptr[0] = 0; Pptr[1] = 0;
572 
573 
574  Teuchos::ArrayRCP<SC> TRvals;
575  Teuchos::ArrayRCP<size_t> TRptr;
576  Teuchos::ArrayRCP<LO> TRcols;
577  LO RmaxNnz=-1, RmaxPerRow=-1;
578  if (buildRestriction) {
579  RmaxPerRow = ((LO) ceil(((double) (MaxNnz+7))/((double) (coarseMap->getLocalNumElements()))));
580  RmaxNnz = RmaxPerRow*coarseMap->getLocalNumElements();
581  TRvals= Teuchos::arcp<SC>(1+RmaxNnz); Rvals= TRvals.getRawPtr();
582  TRptr = Teuchos::arcp<size_t>((2+coarseMap->getLocalNumElements())); Rptr = TRptr.getRawPtr();
583  TRcols= Teuchos::arcp<LO>(1+RmaxNnz); Rcols= TRcols.getRawPtr();
584  TEUCHOS_TEST_FOR_EXCEPTION(Rcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
585  Rptr[0] = 0; Rptr[1] = 0;
586  }
587  /*
588  * Setup P's rowptr as if each row had its maximum of 2*DofsPerNode nonzeros.
589  * This will be useful while filling up P, and then later we will squeeze out
590  * the unused nonzeros locations.
591  */
592 
593  for (i = 1; i <= MaxNnz; i++) Pcols[i] = -1; /* mark all entries as unused */
594  count = 1;
595  for (i = 1; i <= DofsPerNode*Ntotal+1; i++) {
596  Pptr[i] = count;
597  count += (2*DofsPerNode);
598  }
599  if (buildRestriction) {
600  for (i = 1; i <= RmaxNnz; i++) Rcols[i] = -1; /* mark all entries as unused */
601  count = 1;
602  for (i = 1; i <= (int) (coarseMap->getLocalNumElements()+1); i++) {
603  Rptr[i] = count;
604  count += RmaxPerRow;
605  }
606  }
607 
608  /*
609  * Build P column by column. The 1st block column corresponds to the 1st coarse
610  * layer and the first line. The 2nd block column corresponds to the 2nd coarse
611  * layer and the first line. The NCLayers+1 block column corresponds to the
612  * 1st coarse layer and the 2nd line, etc.
613  */
614 
615 
616  col = 0;
617  for (MyLine=1; MyLine <= NVertLines; MyLine += 1) {
618  for (iii=1; iii <= NCLayers; iii+= 1) {
619  col = col+1;
620  MyLayer = CptLayers[iii];
621 
622  /*
623  * StartLayer gives the layer number of the lowest layer that
624  * is nonzero in the interpolation stencil that is currently
625  * being computed. Normally, if we are not near a boundary this
626  * is simply CptsLayers[iii-1]+1.
627  *
628  * NStencilNodes indicates the number of nonzero nodes in the
629  * interpolation stencil that is currently being computed. Normally,
630  * if we are not near a boundary this is CptLayers[iii+1]-StartLayer.
631  */
632 
633  if (iii != 1 ) StartLayer = CptLayers[iii-1]+1;
634  else StartLayer = 1;
635 
636  if (iii != NCLayers) NStencilNodes= CptLayers[iii+1]-StartLayer;
637  else NStencilNodes= NLayers - StartLayer + 1;
638 
639 
640  N = NStencilNodes*DofsPerNode;
641 
642  /*
643  * dgbsv() does not require that the first KL rows be initialized,
644  * so we could avoid zeroing out some entries?
645  */
646 
647  for (i = 0; i < NStencilNodes*DofsPerNode*DofsPerNode; i++) BandSol[ i] = 0.0;
648  for (i = 0; i < LDAB*N; i++) BandMat[ i] = 0.0;
649  if (buildRestriction) {
650  for (i = 0; i < NStencilNodes*DofsPerNode*DofsPerNode; i++) RestrictBandSol[ i] = 0.0;
651  for (i = 0; i < LDAB*N; i++) RestrictBandMat[ i] = 0.0;
652  }
653 
654  /*
655  * Fill BandMat and BandSol (which is initially the rhs) for each
656  * node in the interpolation stencil that is being computed.
657  */
658 
659  if (!doLinear) {
660  for (node_k=1; node_k <= NStencilNodes ; node_k++) {
661 
662  /* Map a Line and Layer number to a BlkRow in the fine level matrix
663  * and record the mapping from the sub-system to the BlkRow of the
664  * fine level matrix.
665  */
666  BlkRow = InvLineLayer[MyLine+(StartLayer+node_k-2)*NVertLines]+1;
667  Sub2FullMap[node_k] = BlkRow;
668 
669  /* Two cases:
670  * 1) the current layer is not a Cpoint layer. In this case we
671  * want to basically stick the matrix couplings to other
672  * nonzero stencil rows into the band matrix. One way to do
673  * this is to include couplings associated with only MyLine
674  * and ignore all the other couplings. However, what we do
675  * instead is to sum all the coupling at each layer participating
676  * in this interpolation stencil and stick this sum into BandMat.
677  * 2) the current layer is a Cpoint layer and so we
678  * stick an identity block in BandMat and rhs.
679  */
680  for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
681 
682  j = (BlkRow-1)*DofsPerNode+dof_i;
683  ArrayView<const LO> AAcols;
684  ArrayView<const SC> AAvals;
685  Amat->getLocalRowView(j, AAcols, AAvals);
686  const int *Acols = AAcols.getRawPtr();
687  const SC *Avals = AAvals.getRawPtr();
688  RowLeng = AAvals.size();
689 
690  TEUCHOS_TEST_FOR_EXCEPTION(RowLeng >= MaxNnzPerRow, Exceptions::RuntimeError, "MakeSemiCoarsenP: recompile with larger Max(HorNeighborNodes)\n");
691 
692  for (i = 0; i < RowLeng; i++) {
693  LayDiff[i] = Layerdofs[Acols[i]]-StartLayer-node_k+2;
694 
695  /* This is the main spot where there might be off- */
696  /* processor communication. That is, when we */
697  /* average the stencil in the horizontal direction,*/
698  /* we need to know the layer id of some */
699  /* neighbors that might reside off-processor. */
700  }
701  PtRow = (node_k-1)*DofsPerNode+dof_i+1;
702  for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
703  PtCol = (node_k-1)*DofsPerNode+dof_j + 1;
704  /* Stick in entry corresponding to Mat(PtRow,PtCol) */
705  /* see dgbsv() comments for matrix format. */
706  TheSum = 0.0;
707  for (i = 0; i < RowLeng; i++) {
708  if ((LayDiff[i] == 0) && (Col2Dof[Acols[i]] == dof_j))
709  TheSum += Avals[i];
710  }
711  index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
712  BandMat[index] = TheSum;
713  if (buildRestriction) RestrictBandMat[index] = TheSum;
714  if (node_k != NStencilNodes) {
715  /* Stick Mat(PtRow,PtCol+DofsPerNode) entry */
716  /* see dgbsv() comments for matrix format. */
717  TheSum = 0.0;
718  for (i = 0; i < RowLeng; i++) {
719  if ((LayDiff[i] == 1) &&(Col2Dof[Acols[i]]== dof_j))
720  TheSum += Avals[i];
721  }
722  j = PtCol+DofsPerNode;
723  index=LDAB*(j-1)+KLU+PtRow-j;
724  BandMat[index] = TheSum;
725  if (buildRestriction) RestrictBandMat[index] = TheSum;
726  }
727  if (node_k != 1) {
728  /* Stick Mat(PtRow,PtCol-DofsPerNode) entry */
729  /* see dgbsv() comments for matrix format. */
730  TheSum = 0.0;
731  for (i = 0; i < RowLeng; i++) {
732  if ((LayDiff[i]== -1) &&(Col2Dof[Acols[i]]== dof_j))
733  TheSum += Avals[i];
734  }
735  j = PtCol-DofsPerNode;
736  index=LDAB*(j-1)+KLU+PtRow-j;
737  BandMat[index] = TheSum;
738  if (buildRestriction) RestrictBandMat[index] = TheSum;
739  }
740  }
741  }
742  }
743  node_k = MyLayer - StartLayer+1;
744  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
745  /* Stick Mat(PtRow,PtRow) and Rhs(PtRow,dof_i+1) */
746  /* see dgbsv() comments for matrix format. */
747  PtRow = (node_k-1)*DofsPerNode+dof_i+1;
748  BandSol[(dof_i)*DofsPerNode*NStencilNodes+PtRow-1] = 1.;
749  if (buildRestriction) RestrictBandSol[(dof_i)*DofsPerNode*NStencilNodes+PtRow-1] = 1.;
750 
751  for (dof_j = 0; dof_j < DofsPerNode; dof_j++) {
752  PtCol = (node_k-1)*DofsPerNode+dof_j+1;
753  index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
754  if (PtCol == PtRow) BandMat[index] = 1.0;
755  else BandMat[index] = 0.0;
756  if (buildRestriction) {
757  index = LDAB*(PtRow-1)+KLU+PtCol-PtRow;
758  if (PtCol == PtRow) RestrictBandMat[index] = 1.0;
759  else RestrictBandMat[index] = 0.0;
760  }
761  if (node_k != NStencilNodes) {
762  PtCol = (node_k )*DofsPerNode+dof_j+1;
763  index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
764  BandMat[index] = 0.0;
765  if (buildRestriction) {
766  index = LDAB*(PtRow-1)+KLU+PtCol-PtRow;
767  RestrictBandMat[index] = 0.0;
768  }
769  }
770  if (node_k != 1) {
771  PtCol = (node_k-2)*DofsPerNode+dof_j+1;
772  index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
773  BandMat[index] = 0.0;
774  if (buildRestriction) {
775  index = LDAB*(PtRow-1)+KLU+PtCol-PtRow;
776  RestrictBandMat[index] = 0.0;
777  }
778  }
779  }
780  }
781 
782  /* Solve banded system and then stick result in Pmatrix arrays */
783 
784  lapack.GBTRF( N, N, KL, KU, BandMat, LDAB, IPIV, &INFO);
785 
786  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
787 
788  lapack.GBTRS(notrans[0], N, KL, KU, NRHS, BandMat, LDAB, IPIV,
789  BandSol, N, &INFO );
790 
791  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
792 
793  for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
794  for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
795  for (i =1; i <= NStencilNodes ; i++) {
796  index = (Sub2FullMap[i]-1)*DofsPerNode+dof_i+1;
797  loc = Pptr[index];
798  Pcols[loc] = (col-1)*DofsPerNode+dof_j+1;
799  Pvals[loc] = BandSol[dof_j*DofsPerNode*NStencilNodes +
800  (i-1)*DofsPerNode + dof_i];
801  Pptr[index]= Pptr[index] + 1;
802  }
803  }
804  }
805  if (buildRestriction) {
806  lapack.GBTRF( N, N, KL, KU, RestrictBandMat, LDAB, IPIV, &INFO);
807  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
808  lapack.GBTRS(trans[0], N, KL, KU, NRHS, RestrictBandMat, LDAB, IPIV, RestrictBandSol, N, &INFO );
809  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
810  for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
811  for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
812  for (i =1; i <= NStencilNodes ; i++) {
813  index = (col-1)*DofsPerNode+dof_j+1;
814  loc = Rptr[index];
815  Rcols[loc] = (Sub2FullMap[i]-1)*DofsPerNode+dof_i+1;
816  Rvals[loc] = RestrictBandSol[dof_j*DofsPerNode*NStencilNodes +
817  (i-1)*DofsPerNode + dof_i];
818  Rptr[index]= Rptr[index] + 1;
819  }
820  }
821  }
822  }
823  }
824  else {
825  int denom1 = MyLayer-StartLayer+1;
826  int denom2 = StartLayer+NStencilNodes-MyLayer;
827  for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
828  for (i =1; i <= NStencilNodes ; i++) {
829  index = (InvLineLayer[MyLine+(StartLayer+i-2)*NVertLines])*DofsPerNode+dof_i+1;
830  loc = Pptr[index];
831  Pcols[loc] = (col-1)*DofsPerNode+dof_i+1;
832  if (i > denom1) Pvals[loc] = 1.0 + ((double)( denom1 - i))/((double) denom2);
833  else Pvals[loc] = ((double)( i))/((double) denom1);
834  Pptr[index]= Pptr[index] + 1;
835  }
836  }
837  }
838  /* inject the null space */
839  // int FineStride = Ntotal*DofsPerNode;
840  // int CoarseStride= NVertLines*NCLayers*DofsPerNode;
841 
842  BlkRow = InvLineLayer[MyLine+(MyLayer-1)*NVertLines]+1;
843  for (int k = 0; k < static_cast<int>(fineNullspace->getNumVectors()); k++) {
844  Teuchos::ArrayRCP<SC> OneCNull = coarseNullspace->getDataNonConst(k);
845  Teuchos::ArrayRCP<SC> OneFNull = fineNullspace->getDataNonConst(k);
846  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
847  OneCNull[( col-1)*DofsPerNode+dof_i] = OneFNull[ (BlkRow-1)*DofsPerNode+dof_i];
848  }
849  }
850 
851  }
852  }
853 
854  /*
855  * Squeeze the -1's out of the columns. At the same time convert Pcols
856  * so that now the first column is numbered '0' as opposed to '1'.
857  * Also, the arrays Pcols and Pvals should now use the zeroth element
858  * as opposed to just starting with the first element. Pptr will be
859  * fixed in the for loop below so that Pptr[0] = 0, etc.
860  */
861  CurRow = 1;
862  NewPtr = 1;
863  for (size_t ii=1; ii <= Pptr[Ntotal*DofsPerNode]-1; ii++) {
864  if (ii == Pptr[CurRow]) {
865  Pptr[CurRow] = LastGuy;
866  CurRow++;
867  while (ii > Pptr[CurRow]) {
868  Pptr[CurRow] = LastGuy;
869  CurRow++;
870  }
871  }
872  if (Pcols[ii] != -1) {
873  Pcols[NewPtr-1] = Pcols[ii]-1; /* these -1's fix the offset and */
874  Pvals[NewPtr-1] = Pvals[ii]; /* start using the zeroth element */
875  LastGuy = NewPtr;
876  NewPtr++;
877  }
878  }
879  for (i = CurRow; i <= Ntotal*DofsPerNode; i++) Pptr[CurRow] = LastGuy;
880 
881  /* Now move the pointers so that they now point to the beginning of each
882  * row as opposed to the end of each row
883  */
884  for (i=-Ntotal*DofsPerNode+1; i>= 2 ; i--) {
885  Pptr[i-1] = Pptr[i-2]; /* extra -1 added to start from 0 */
886  }
887  Pptr[0] = 0;
888 
889  // do the same for R
890  if (buildRestriction) {
891  CurRow = 1;
892  NewPtr = 1;
893  for (size_t ii=1; ii <= Rptr[coarseMap->getLocalNumElements()]-1; ii++) {
894  if (ii == Rptr[CurRow]) {
895  Rptr[CurRow] = RLastGuy;
896  CurRow++;
897  while (ii > Rptr[CurRow]) {
898  Rptr[CurRow] = RLastGuy;
899  CurRow++;
900  }
901  }
902  if (Rcols[ii] != -1) {
903  Rcols[NewPtr-1] = Rcols[ii]-1; /* these -1's fix the offset and */
904  Rvals[NewPtr-1] = Rvals[ii]; /* start using the zeroth element */
905  RLastGuy = NewPtr;
906  NewPtr++;
907  }
908  }
909  for (i = CurRow; i <= ((int) coarseMap->getLocalNumElements()); i++) Rptr[CurRow] = RLastGuy;
910  for (i=-coarseMap->getLocalNumElements()+1; i>= 2 ; i--) {
911  Rptr[i-1] = Rptr[i-2]; /* extra -1 added to start from 0 */
912  }
913  Rptr[0] = 0;
914  }
915  ArrayRCP<size_t> rcpRowPtr;
916  ArrayRCP<LO> rcpColumns;
917  ArrayRCP<SC> rcpValues;
918 
919  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
920  LO nnz = Pptr[Ndofs];
921  PCrs->allocateAllValues(nnz, rcpRowPtr, rcpColumns, rcpValues);
922 
923  ArrayView<size_t> rowPtr = rcpRowPtr();
924  ArrayView<LO> columns = rcpColumns();
925  ArrayView<SC> values = rcpValues();
926 
927  // copy data over
928 
929  for (LO ii = 0; ii <= Ndofs; ii++) rowPtr[ii] = Pptr[ii];
930  for (LO ii = 0; ii < nnz; ii++) columns[ii] = Pcols[ii];
931  for (LO ii = 0; ii < nnz; ii++) values[ii] = Pvals[ii];
932  PCrs->setAllValues(rcpRowPtr, rcpColumns, rcpValues);
933  PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
934  ArrayRCP<size_t> RrcpRowPtr;
935  ArrayRCP<LO> RrcpColumns;
936  ArrayRCP<SC> RrcpValues;
937  RCP<CrsMatrix> RCrs;
938  if (buildRestriction) {
939  R = rcp(new CrsMatrixWrap(coarseMap, rowMap, 0));
940  RCrs = rcp_dynamic_cast<CrsMatrixWrap>(R)->getCrsMatrix();
941  nnz = Rptr[coarseMap->getLocalNumElements()];
942  RCrs->allocateAllValues(nnz, RrcpRowPtr, RrcpColumns, RrcpValues);
943 
944  ArrayView<size_t> RrowPtr = RrcpRowPtr();
945  ArrayView<LO> Rcolumns = RrcpColumns();
946  ArrayView<SC> Rvalues = RrcpValues();
947 
948  // copy data over
949 
950  for (LO ii = 0; ii <= ((LO)coarseMap->getLocalNumElements()); ii++) RrowPtr[ii] = Rptr[ii];
951  for (LO ii = 0; ii < nnz; ii++) Rcolumns[ii] = Rcols[ii];
952  for (LO ii = 0; ii < nnz; ii++) Rvalues[ii] = Rvals[ii];
953  RCrs->setAllValues(RrcpRowPtr, RrcpColumns, RrcpValues);
954  RCrs->expertStaticFillComplete(Amat->getRangeMap(), coarseMap);
955  }
956 
957  return NCLayers*NVertLines*DofsPerNode;
958  }
959  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
961  // This function is a bit of a hack. We basically compute the semi-coarsening transfers and then throw
962  // away all the interpolation coefficients, instead replacing them by piecewise constants. The reason for this
963  // is that SemiCoarsening has no notion of aggregates so defining piecewise constants in the "usual way" is
964  // not possible. Instead, we look for the largest entry in each row, make it a one, and zero out the other
965  // non-zero entries
966 
967  ArrayView<const LocalOrdinal> inds;
968  ArrayView<const Scalar> vals1;
969  ArrayView< Scalar> vals;
970  Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
971  Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
972 
973  for (size_t i = 0; i < P->getRowMap()->getLocalNumElements(); i++) {
974  P->getLocalRowView(i, inds, vals1);
975 
976  size_t nnz = inds.size();
977  if (nnz != 0) vals = ArrayView<Scalar>(const_cast<Scalar*>(vals1.getRawPtr()), nnz);
978 
979  LO largestIndex = -1;
980  Scalar largestValue = ZERO;
981  /* find largest value in row and change that one to a 1 while the others are set to 0 */
982 
983  LO rowDof = i%BlkSize;
984  for (size_t j =0; j < nnz; j++) {
985  if (Teuchos::ScalarTraits<SC>::magnitude(vals[ j ]) >= Teuchos::ScalarTraits<SC>::magnitude(largestValue)) {
986  if ( inds[j]%BlkSize == rowDof ) {
987  largestValue = vals[j];
988  largestIndex = (int) j;
989  }
990  }
991  vals[j] = ZERO;
992  }
993  if (largestIndex != -1) vals[largestIndex] = ONE;
994  else
995  TEUCHOS_TEST_FOR_EXCEPTION(nnz > 0, Exceptions::RuntimeError, "no nonzero column associated with a proper dof within node.");
996 
997  if (Teuchos::ScalarTraits<SC>::magnitude(largestValue) == Teuchos::ScalarTraits<SC>::magnitude(ZERO)) vals[largestIndex] = ZERO;
998  }
999  }
1000 
1001 } //namespace MueLu
1002 
1003 #define MUELU_SEMICOARSENPFACTORY_SHORT
1004 #endif // MUELU_SEMICOARSENPFACTORY_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
void RevertToPieceWiseConstant(RCP< Matrix > &P, LO BlkSize) const
LO FindCpts(LO const PtsPerLine, LO const CoarsenRate, LO const Thin, LO **LayerCpts) const
Timer to be used in factories. Similar to Monitor but with additional timers.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Namespace for MueLu classes and methods.
static const NoFactory * get()
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
MueLu::DefaultScalar Scalar
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
LO MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[], LO const VertLineId[], LO const DofsPerNode, RCP< Matrix > &Amat, RCP< Matrix > &P, RCP< const Map > &coarseMap, const RCP< MultiVector > fineNullspace, RCP< MultiVector > &coarseNullspace, RCP< Matrix > &R, bool buildRestriction, bool doLinear) const
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
#define SET_VALID_ENTRY(name)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
#define MaxHorNeighborNodes
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:96
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
Exception throws to report errors in the internal logical of the program.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.