MueLu  Version of the Day
MueLu_RegionRFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
48 
49 #include "Kokkos_UnorderedMap.hpp"
50 
52 
53 #include <Xpetra_Matrix.hpp>
54 #include <Xpetra_CrsGraphFactory.hpp>
55 
56 #include "MueLu_Types.hpp"
57 
58 #include "MueLu_Monitor.hpp"
59 #include "MueLu_MasterList.hpp"
60 #include "MueLu_Utilities_kokkos.hpp"
61 
62 
63 namespace MueLu {
64 
65  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
68  RCP<ParameterList> validParamList = rcp(new ParameterList());
69 
70  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null,
71  "Generating factory of the matrix A");
72  validParamList->set<RCP<const FactoryBase> >("numDimensions", Teuchos::null,
73  "Number of spatial dimensions in the problem.");
74  validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null,
75  "Number of local nodes per spatial dimension on the fine grid.");
76  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null,
77  "Fine level nullspace used to construct the coarse level nullspace.");
78  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null,
79  "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
80  validParamList->set<bool> ("keep coarse coords", false, "Flag to keep coordinates for special coarse grid solve");
81 
82  return validParamList;
83  } // GetValidParameterList()
84 
85  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
87  DeclareInput(Level& fineLevel, Level& /* coarseLevel */) const {
88 
89  Input(fineLevel, "A");
90  Input(fineLevel, "numDimensions");
91  Input(fineLevel, "lNodesPerDim");
92  Input(fineLevel, "Nullspace");
93  Input(fineLevel, "Coordinates");
94 
95  } // DeclareInput()
96 
97  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
99  Build(Level& fineLevel, Level& coarseLevel) const {
100 
101  // Set debug outputs based on environment variable
102  RCP<Teuchos::FancyOStream> out;
103  if(const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
104  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
105  out->setShowAllFrontMatter(false).setShowProcRank(true);
106  } else {
107  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
108  }
109 
110  *out << "Starting RegionRFactory_kokkos::Build." << std::endl;
111 
112  // First get the inputs from the fineLevel
113  const int numDimensions = Get<int>(fineLevel, "numDimensions");
114  Array<LO> lFineNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
115  {
116  Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel, "lNodesPerDim");
117  for(int dim = 0; dim < numDimensions; ++dim) {
118  lFineNodesPerDim[dim] = lNodesPerDim[dim];
119  }
120  }
121  *out << "numDimensions " << numDimensions << " and lFineNodesPerDim: " << lFineNodesPerDim
122  << std::endl;
123 
124  // Let us check that the inputs verify our assumptions
125  if(numDimensions < 1 || numDimensions > 3) {
126  throw std::runtime_error("numDimensions must be 1, 2 or 3!");
127  }
128  for(int dim = 0; dim < numDimensions; ++dim) {
129  if(lFineNodesPerDim[dim] % 3 != 1) {
130  throw std::runtime_error("The number of fine node in each direction need to be 3n+1");
131  }
132  }
133  Array<LO> lCoarseNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
134 
135  const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
136 
137  RCP<realvaluedmultivector_type> fineCoordinates, coarseCoordinates;
138  fineCoordinates = Get< RCP<realvaluedmultivector_type> >(fineLevel, "Coordinates");
139  if(static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
140  throw std::runtime_error("The number of vectors in the coordinates is not equal to numDimensions!");
141  }
142 
143  // Let us create R and pass it down to the
144  // appropriate specialization and see what we
145  // get back!
146  RCP<Matrix> R;
147 
148  if(numDimensions == 1) {
149  throw std::runtime_error("RegionRFactory_kokkos no implemented for 1D case yet.");
150  } else if(numDimensions == 2) {
151  throw std::runtime_error("RegionRFactory_kokkos no implemented for 2D case yet.");
152  } else if(numDimensions == 3) {
153  Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
154  R, coarseCoordinates, lCoarseNodesPerDim);
155  }
156 
157  const Teuchos::ParameterList& pL = GetParameterList();
158 
159  // Reuse pattern if available (multiple solve)
160  RCP<ParameterList> Tparams;
161  if(pL.isSublist("matrixmatrix: kernel params"))
162  Tparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
163  else
164  Tparams= rcp(new ParameterList);
165 
166  // R->describe(*out, Teuchos::VERB_EXTREME);
167  *out << "Compute P=R^t" << std::endl;
168  // By default, we don't need global constants for transpose
169  Tparams->set("compute global constants: temporaries",Tparams->get("compute global constants: temporaries", false));
170  Tparams->set("compute global constants", Tparams->get("compute global constants",false));
171  std::string label = "MueLu::RegionR-transR" + Teuchos::toString(coarseLevel.GetLevelID());
172  RCP<Matrix> P = Utilities::Transpose(*R, true, label, Tparams);
173 
174  *out << "Compute coarse nullspace" << std::endl;
175  RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
176  RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
177  fineNullspace->getNumVectors());
178  R->apply(*fineNullspace, *coarseNullspace, Teuchos::NO_TRANS, Teuchos::ScalarTraits<SC>::one(),
179  Teuchos::ScalarTraits<SC>::zero());
180 
181  *out << "Set data on coarse level" << std::endl;
182  Set(coarseLevel, "numDimensions", numDimensions);
183  Set(coarseLevel, "lNodesPerDim", lCoarseNodesPerDim);
184  Set(coarseLevel, "Nullspace", coarseNullspace);
185  Set(coarseLevel, "Coordinates", coarseCoordinates);
186  if(pL.get<bool>("keep coarse coords")) {
187  coarseLevel.Set<RCP<realvaluedmultivector_type> >("Coordinates2", coarseCoordinates, NoFactory::get());
188  }
189 
190  R->SetFixedBlockSize(A->GetFixedBlockSize());
191  P->SetFixedBlockSize(A->GetFixedBlockSize());
192 
193  Set(coarseLevel, "R", R);
194  Set(coarseLevel, "P", P);
195 
196  } // Build()
197 
198  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
200  Build3D(const int numDimensions,
201  Teuchos::Array<LocalOrdinal>& lFineNodesPerDim,
202  const RCP<Matrix>& A,
203  const RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& fineCoordinates,
204  RCP<Matrix>& R,
205  RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& coarseCoordinates,
206  Teuchos::Array<LocalOrdinal>& lCoarseNodesPerDim) const {
207  using local_matrix_type = typename CrsMatrix::local_matrix_type;
208  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
209  using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
210  using entries_type = typename local_matrix_type::index_type::non_const_type;
211  using values_type = typename local_matrix_type::values_type::non_const_type;
212  using impl_scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
213 
214  // Set debug outputs based on environment variable
215  RCP<Teuchos::FancyOStream> out;
216  if(const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
217  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
218  out->setShowAllFrontMatter(false).setShowProcRank(true);
219  } else {
220  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
221  }
222 
223  // Now compute number of coarse grid points
224  for(int dim = 0; dim < numDimensions; ++dim) {
225  lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
226  }
227  *out << "lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
228 
229  // Grab the block size here and multiply all existing offsets by it
230  const LO blkSize = A->GetFixedBlockSize();
231  *out << "blkSize " << blkSize << std::endl;
232 
233  // Based on lCoarseNodesPerDim and lFineNodesPerDim
234  // we can compute numRows, numCols and NNZ for R
235  const LO numRows = blkSize*lCoarseNodesPerDim[0]*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[2];
236  const LO numCols = blkSize*lFineNodesPerDim[0]*lFineNodesPerDim[1]*lFineNodesPerDim[2];
237 
238  // Create the coarse coordinates multivector
239  // so we can fill it on the fly while computing
240  // the restriction operator
241  RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
242  Teuchos::OrdinalTraits<GO>::invalid(),
243  numRows,
244  A->getRowMap()->getIndexBase(),
245  A->getRowMap()->getComm());
246 
247  RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
248  Teuchos::OrdinalTraits<GO>::invalid(),
249  lCoarseNodesPerDim[0]*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[2],
250  A->getRowMap()->getIndexBase(),
251  A->getRowMap()->getComm());
252 
253  coarseCoordinates = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(coordRowMap,
254  numDimensions);
255 
256  // Get device views of coordinates
257  auto fineCoordsView = fineCoordinates ->getDeviceLocalView(Xpetra::Access::ReadOnly);
258  auto coarseCoordsView = coarseCoordinates->getDeviceLocalView(Xpetra::Access::OverwriteAll);
259 
260 
261  Array<ArrayRCP<const real_type> > fineCoordData(numDimensions);
262  Array<ArrayRCP<real_type> > coarseCoordData(numDimensions);
263  for(int dim = 0; dim < numDimensions; ++dim) {
264  fineCoordData[dim] = fineCoordinates->getData(dim);
265  coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
266  }
267 
268  // Let us set some parameter that will be useful
269  // while constructing R
270 
271  // Length of interpolation stencils based on geometry
272  const LO cornerStencilLength = 27;
273  const LO edgeStencilLength = 45;
274  const LO faceStencilLength = 75;
275  const LO interiorStencilLength = 125;
276 
277  // Number of corner, edge, face and interior nodes
278  const LO numCorners = 8;
279  const LO numEdges = 4*(lCoarseNodesPerDim[0] - 2)
280  + 4*(lCoarseNodesPerDim[1] - 2)
281  + 4*(lCoarseNodesPerDim[2] - 2);
282  const LO numFaces = 2*(lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[1] - 2)
283  + 2*(lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[2] - 2)
284  + 2*(lCoarseNodesPerDim[1] - 2)*(lCoarseNodesPerDim[2] - 2);
285  const LO numInteriors = (lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[1] - 2)
286  *(lCoarseNodesPerDim[2] - 2);
287 
288  const LO nnz = (numCorners*cornerStencilLength + numEdges*edgeStencilLength
289  + numFaces*faceStencilLength + numInteriors*interiorStencilLength)*blkSize;
290 
291  // Having the number of rows and columns we can genrate
292  // the appropriate maps for our operator.
293 
294  *out << "R statistics:" << std::endl
295  << " -numRows= " << numRows << std::endl
296  << " -numCols= " << numCols << std::endl
297  << " -nnz= " << nnz << std::endl;
298 
299  row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing("row_map"), numRows + 1);
300  typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map);
301 
302  entries_type entries(Kokkos::ViewAllocateWithoutInitializing("entries"), nnz);
303  typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries);
304 
305  values_type values(Kokkos::ViewAllocateWithoutInitializing("values"), nnz);
306  typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
307 
308  // Compute the basic interpolation
309  // coefficients for 1D rate of 3
310  // coarsening.
311  Array<SC> coeffs({1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0});
312  row_map_h(0) = 0;
313 
314  // Define some offsets that
315  // will be needed often later on
316  const LO edgeLineOffset = 2*cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength;
317  const LO faceLineOffset = 2*edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength;
318  const LO interiorLineOffset = 2*faceStencilLength
319  + (lCoarseNodesPerDim[0] - 2)*interiorStencilLength;
320 
321  const LO facePlaneOffset = 2*edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset;
322  const LO interiorPlaneOffset = 2*faceLineOffset + (lCoarseNodesPerDim[1] - 2)*interiorLineOffset;
323 
324  // Let us take care of the corners
325  // first since we always have
326  // corners to deal with!
327  {
328  // Corner 1
329  LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
330  for(LO l = 0; l < blkSize; ++l) {
331  for(LO k = 0; k < 3; ++k) {
332  for(LO j = 0; j < 3; ++j) {
333  for(LO i = 0; i < 3; ++i) {
334  entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
335  + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
336  values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i + 2];
337  }
338  }
339  }
340  }
341  for(LO l = 0; l < blkSize; ++l) {
342  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
343  }
344  for(int dim = 0; dim <numDimensions; ++dim) {
345  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
346  }
347 
348  // Corner 5
349  coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
350  rowIdx = coordRowIdx*blkSize;
351  coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
352  columnOffset = coordColumnOffset*blkSize;
353  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
354  for(LO l = 0; l < blkSize; ++l) {
355  for(LO k = 0; k < 3; ++k) {
356  for(LO j = 0; j < 3; ++j) {
357  for(LO i = 0; i < 3; ++i) {
358  entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
359  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
360  values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i + 2];
361  }
362  }
363  }
364  }
365  for(LO l = 0; l < blkSize; ++l) {
366  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
367  }
368  for(int dim = 0; dim <numDimensions; ++dim) {
369  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
370  }
371 
372  // Corner 2
373  coordRowIdx = (lCoarseNodesPerDim[0] - 1);
374  rowIdx = coordRowIdx*blkSize;
375  coordColumnOffset = (lFineNodesPerDim[0] - 1);
376  columnOffset = coordColumnOffset*blkSize;
377  entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength)*blkSize;
378  for(LO l = 0; l < blkSize; ++l) {
379  for(LO k = 0; k < 3; ++k) {
380  for(LO j = 0; j < 3; ++j) {
381  for(LO i = 0; i < 3; ++i) {
382  entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
383  + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
384  + j*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
385  values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i];
386  }
387  }
388  }
389  }
390  for(LO l = 0; l < blkSize; ++l) {
391  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
392  }
393  for(int dim = 0; dim <numDimensions; ++dim) {
394  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
395  }
396 
397  // Corner 6
398  coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
399  rowIdx = coordRowIdx*blkSize;
400  coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
401  columnOffset = coordColumnOffset*blkSize;
402  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
403  for(LO l = 0; l < blkSize; ++l) {
404  for(LO k = 0; k < 3; ++k) {
405  for(LO j = 0; j < 3; ++j) {
406  for(LO i = 0; i < 3; ++i) {
407  entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
408  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
409  values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
410  }
411  }
412  }
413  }
414  for(LO l = 0; l < blkSize; ++l) {
415  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
416  }
417  for(int dim = 0; dim <numDimensions; ++dim) {
418  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
419  }
420 
421  // Corner 3
422  coordRowIdx = (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0];
423  rowIdx = coordRowIdx*blkSize;
424  coordColumnOffset = (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0];
425  columnOffset = coordColumnOffset*blkSize;
426  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset)*blkSize;
427  for(LO l = 0; l < blkSize; ++l) {
428  for(LO k = 0; k < 3; ++k) {
429  for(LO j = 0; j < 3; ++j) {
430  for(LO i = 0; i < 3; ++i) {
431  entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
432  + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
433  + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
434  values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i + 2];
435  }
436  }
437  }
438  }
439  for(LO l = 0; l < blkSize; ++l) {
440  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
441  }
442  for(int dim = 0; dim <numDimensions; ++dim) {
443  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
444  }
445 
446  // Corner 7
447  coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
448  rowIdx = coordRowIdx*blkSize;
449  coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
450  columnOffset = coordColumnOffset*blkSize;
451  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
452  for(LO l = 0; l < blkSize; ++l) {
453  for(LO k = 0; k < 3; ++k) {
454  for(LO j = 0; j < 3; ++j) {
455  for(LO i = 0; i < 3; ++i) {
456  entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
457  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
458  values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
459  }
460  }
461  }
462  }
463  for(LO l = 0; l < blkSize; ++l) {
464  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
465  }
466  for(int dim = 0; dim <numDimensions; ++dim) {
467  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
468  }
469 
470  // Corner 4
471  coordRowIdx = (lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0] - 1);
472  rowIdx = coordRowIdx*blkSize;
473  coordColumnOffset = (lFineNodesPerDim[1]*lFineNodesPerDim[0] - 1);
474  columnOffset = coordColumnOffset*blkSize;
475  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset +
476  cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength)*blkSize;
477  for(LO l = 0; l < blkSize; ++l) {
478  for(LO k = 0; k < 3; ++k) {
479  for(LO j = 0; j < 3; ++j) {
480  for(LO i = 0; i < 3; ++i) {
481  entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
482  + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
483  + (j - 2)*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
484  values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
485  }
486  }
487  }
488  }
489  for(LO l = 0; l < blkSize; ++l) {
490  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
491  }
492  for(int dim = 0; dim <numDimensions; ++dim) {
493  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
494  }
495 
496  // Corner 8
497  coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
498  rowIdx = coordRowIdx*blkSize;
499  coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
500  columnOffset = coordColumnOffset*blkSize;
501  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
502  for(LO l = 0; l < blkSize; ++l) {
503  for(LO k = 0; k < 3; ++k) {
504  for(LO j = 0; j < 3; ++j) {
505  for(LO i = 0; i < 3; ++i) {
506  entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
507  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
508  values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
509  }
510  }
511  }
512  }
513  for(LO l = 0; l < blkSize; ++l) {
514  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
515  }
516  for(int dim = 0; dim <numDimensions; ++dim) {
517  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
518  }
519  } // Corners are done!
520 
521  // Edges along 0 direction
522  if(lCoarseNodesPerDim[0] - 2 > 0) {
523 
524  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
525  for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
526 
527  // Edge 0
528  coordRowIdx = (edgeIdx + 1);
529  rowIdx = coordRowIdx*blkSize;
530  coordColumnOffset = (edgeIdx + 1)*3;
531  columnOffset = coordColumnOffset*blkSize;
532  entryOffset = (cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
533  for(LO l = 0; l < blkSize; ++l) {
534  for(LO k = 0; k < 3; ++k) {
535  for(LO j = 0; j < 3; ++j) {
536  for(LO i = 0; i < 5; ++i) {
537  entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
538  + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
539  values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i];
540  }
541  }
542  }
543  }
544  for(LO l = 0; l < blkSize; ++l) {
545  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
546  }
547  for(int dim = 0; dim <numDimensions; ++dim) {
548  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
549  }
550 
551  // Edge 1
552  coordRowIdx = ((lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0] + edgeIdx + 1);
553  rowIdx = coordRowIdx*blkSize;
554  coordColumnOffset = ((lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0] + (edgeIdx + 1)*3);
555  columnOffset = coordColumnOffset*blkSize;
556  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset
557  + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
558  for(LO l = 0; l < blkSize; ++l) {
559  for(LO k = 0; k < 3; ++k) {
560  for(LO j = 0; j < 3; ++j) {
561  for(LO i = 0; i < 5; ++i) {
562  entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
563  + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
564  values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
565  }
566  }
567  }
568  }
569  for(LO l = 0; l < blkSize; ++l) {
570  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
571  }
572  for(int dim = 0; dim <numDimensions; ++dim) {
573  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
574  }
575 
576  // Edge 2
577  coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
578  + edgeIdx + 1);
579  rowIdx = coordRowIdx*blkSize;
580  coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
581  + (edgeIdx + 1)*3);
582  columnOffset = coordColumnOffset*blkSize;
583  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
584  + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
585  for(LO l = 0; l < blkSize; ++l) {
586  for(LO k = 0; k < 3; ++k) {
587  for(LO j = 0; j < 3; ++j) {
588  for(LO i = 0; i < 5; ++i) {
589  entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
590  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
591  values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
592  }
593  }
594  }
595  }
596  for(LO l = 0; l < blkSize; ++l) {
597  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
598  }
599  for(int dim = 0; dim <numDimensions; ++dim) {
600  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
601  }
602 
603  // Edge 3
604  coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
605  + (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0] + edgeIdx + 1);
606  rowIdx = coordRowIdx*blkSize;
607  coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
608  + (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0] + (edgeIdx + 1)*3);
609  columnOffset = coordColumnOffset*blkSize;
610  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
611  + edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset
612  + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
613  for(LO l = 0; l < blkSize; ++l) {
614  for(LO k = 0; k < 3; ++k) {
615  for(LO j = 0; j < 3; ++j) {
616  for(LO i = 0; i < 5; ++i) {
617  entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
618  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
619  + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
620  values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
621  }
622  }
623  }
624  }
625  for(LO l = 0; l < blkSize; ++l) {
626  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
627  }
628  for(int dim = 0; dim <numDimensions; ++dim) {
629  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
630  }
631  }
632  }
633 
634  // Edges along 1 direction
635  if(lCoarseNodesPerDim[1] - 2 > 0) {
636 
637  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
638  for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
639 
640  // Edge 0
641  coordRowIdx = (edgeIdx + 1)*lCoarseNodesPerDim[0];
642  rowIdx = coordRowIdx*blkSize;
643  coordColumnOffset = (edgeIdx + 1)*3*lFineNodesPerDim[0];
644  columnOffset = coordColumnOffset*blkSize;
645  entryOffset = (edgeLineOffset + edgeIdx*faceLineOffset)*blkSize;
646  for(LO l = 0; l < blkSize; ++l) {
647  for(LO k = 0; k < 3; ++k) {
648  for(LO j = 0; j < 5; ++j) {
649  for(LO i = 0; i < 3; ++i) {
650  entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
651  + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
652  values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i + 2];
653  }
654  }
655  }
656  }
657  for(LO l = 0; l < blkSize; ++l) {
658  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
659  }
660  for(int dim = 0; dim <numDimensions; ++dim) {
661  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
662  }
663 
664  // Edge 1
665  coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
666  rowIdx = coordRowIdx*blkSize;
667  coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
668  columnOffset = coordColumnOffset*blkSize;
669  entryOffset = (edgeLineOffset + edgeIdx*faceLineOffset
670  + edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength)*blkSize;
671  for(LO l = 0; l < blkSize; ++l) {
672  for(LO k = 0; k < 3; ++k) {
673  for(LO j = 0; j < 5; ++j) {
674  for(LO i = 0; i < 3; ++i) {
675  entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
676  + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
677  values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
678  }
679  }
680  }
681  }
682  for(LO l = 0; l < blkSize; ++l) {
683  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
684  }
685  for(int dim = 0; dim <numDimensions; ++dim) {
686  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
687  }
688 
689  // Edge 2
690  coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
691  + (edgeIdx + 1)*lCoarseNodesPerDim[0]);
692  rowIdx = coordRowIdx*blkSize;
693  coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
694  + (edgeIdx + 1)*3*lFineNodesPerDim[0]);
695  columnOffset = coordColumnOffset*blkSize;
696  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
697  + edgeLineOffset + edgeIdx*faceLineOffset)*blkSize;
698  for(LO l = 0; l < blkSize; ++l) {
699  for(LO k = 0; k < 3; ++k) {
700  for(LO j = 0; j < 5; ++j) {
701  for(LO i = 0; i < 3; ++i) {
702  entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
703  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
704  values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
705  }
706  }
707  }
708  }
709  for(LO l = 0; l < blkSize; ++l) {
710  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
711  }
712  for(int dim = 0; dim <numDimensions; ++dim) {
713  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
714  }
715 
716  // Edge 3
717  coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
718  + (edgeIdx + 1)*lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
719  rowIdx = coordRowIdx*blkSize;
720  coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
721  + (edgeIdx + 1)*3*lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
722  columnOffset = coordColumnOffset*blkSize;
723  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
724  + edgeLineOffset + edgeIdx*faceLineOffset
725  + edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength)*blkSize;
726  for(LO l = 0; l < blkSize; ++l) {
727  for(LO k = 0; k < 3; ++k) {
728  for(LO j = 0; j < 5; ++j) {
729  for(LO i = 0; i < 3; ++i) {
730  entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
731  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
732  + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
733  values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
734  }
735  }
736  }
737  }
738  for(LO l = 0; l < blkSize; ++l) {
739  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
740  }
741  for(int dim = 0; dim <numDimensions; ++dim) {
742  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
743  }
744  }
745  }
746 
747  // Edges along 2 direction
748  if(lCoarseNodesPerDim[2] - 2 > 0) {
749 
750  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
751  for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
752 
753  // Edge 0
754  coordRowIdx = (edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
755  rowIdx = coordRowIdx*blkSize;
756  coordColumnOffset = (edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0];
757  columnOffset = coordColumnOffset*blkSize;
758  entryOffset = (facePlaneOffset + edgeIdx*interiorPlaneOffset)*blkSize;
759  for(LO l = 0; l < blkSize; ++l) {
760  for(LO k = 0; k < 5; ++k) {
761  for(LO j = 0; j < 3; ++j) {
762  for(LO i = 0; i < 3; ++i) {
763  entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
764  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
765  values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i + 2];
766  }
767  }
768  }
769  }
770  for(LO l = 0; l < blkSize; ++l) {
771  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
772  }
773  for(int dim = 0; dim <numDimensions; ++dim) {
774  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
775  }
776 
777  // Edge 1
778  coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
779  + lCoarseNodesPerDim[0] - 1);
780  rowIdx = coordRowIdx*blkSize;
781  coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
782  + lFineNodesPerDim[0] - 1);
783  columnOffset = coordColumnOffset*blkSize;
784  entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength
785  + edgeIdx*interiorPlaneOffset)*blkSize;
786  for(LO l = 0; l < blkSize; ++l) {
787  for(LO k = 0; k < 5; ++k) {
788  for(LO j = 0; j < 3; ++j) {
789  for(LO i = 0; i < 3; ++i) {
790  entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
791  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
792  values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
793  }
794  }
795  }
796  }
797  for(LO l = 0; l < blkSize; ++l) {
798  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
799  }
800  for(int dim = 0; dim <numDimensions; ++dim) {
801  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
802  }
803 
804  // Edge 2
805  coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
806  + (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0]);
807  rowIdx = coordRowIdx*blkSize;
808  coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
809  + (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0]);
810  columnOffset = coordColumnOffset*blkSize;
811  entryOffset = (facePlaneOffset + edgeIdx*interiorPlaneOffset + faceLineOffset
812  + (lCoarseNodesPerDim[1] - 2)*interiorLineOffset)*blkSize;
813  for(LO l = 0; l < blkSize; ++l) {
814  for(LO k = 0; k < 5; ++k) {
815  for(LO j = 0; j < 3; ++j) {
816  for(LO i = 0; i < 3; ++i) {
817  entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
818  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
819  values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
820  }
821  }
822  }
823  }
824  for(LO l = 0; l < blkSize; ++l) {
825  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
826  }
827  for(int dim = 0; dim <numDimensions; ++dim) {
828  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
829  }
830 
831  // Edge 3
832  coordRowIdx = ((edgeIdx + 2)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0] - 1);
833  rowIdx = coordRowIdx*blkSize;
834  coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
835  + lFineNodesPerDim[1]*lFineNodesPerDim[0] - 1);
836  columnOffset = coordColumnOffset*blkSize;
837  entryOffset = (facePlaneOffset + (edgeIdx + 1)*interiorPlaneOffset - edgeStencilLength)*blkSize;
838  for(LO l = 0; l < blkSize; ++l) {
839  for(LO k = 0; k < 5; ++k) {
840  for(LO j = 0; j < 3; ++j) {
841  for(LO i = 0; i < 3; ++i) {
842  entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
843  + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
844  + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
845  values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
846  }
847  }
848  }
849  }
850  for(LO l = 0; l < blkSize; ++l) {
851  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
852  }
853  for(int dim = 0; dim <numDimensions; ++dim) {
854  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
855  }
856  }
857  }
858 
859  //TODO: KOKKOS parallel_for used from here. Not sure if it should be used for edges.
860  Kokkos::deep_copy(row_map, row_map_h);
861  Kokkos::deep_copy(entries, entries_h);
862  Kokkos::deep_copy(values, values_h);
863 
864  // Create views on device for nodes per dim
865  LOTupleView lFineNodesPerDim_d("lFineNodesPerDim");
866  LOTupleView lCoarseNodesPerDim_d("lCoarseNodesPerDim");
867 
868  typename Kokkos::View<LO[3], device_type>::HostMirror lCoarseNodesPerDim_h = Kokkos::create_mirror_view( lCoarseNodesPerDim_d );
869  typename Kokkos::View<LO[3], device_type>::HostMirror lFineNodesPerDim_h = Kokkos::create_mirror_view( lFineNodesPerDim_d );
870 
871  for(int dim = 0; dim < numDimensions; ++dim) {
872  lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
873  lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
874  }
875 
876  Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
877  Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
878 
879 
880  // Faces in 0-1 plane
881  if((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
882 
883  Kokkos::parallel_for("Faces in 0-1 plane region R",
884  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1]-2)*(lCoarseNodesPerDim[0]-2) ),
885  KOKKOS_LAMBDA(const LO faceIdx) {
886  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
887  LO gridIdx[3] = {0,0,0};
888  impl_scalar_type coeffs_d[5] = {1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0};
889  // Last step in the loop
890  // update the grid indices
891  // for next grid point
892  for(LO i = 0; i < faceIdx; i++){
893  ++gridIdx[0];
894  if(gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
895  gridIdx[0] = 0;
896  ++gridIdx[1];
897  }
898  }
899 
900  // Face 0
901  coordRowIdx = ((gridIdx[1] + 1)*lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
902  rowIdx = coordRowIdx*blkSize;
903  coordColumnOffset = 3*((gridIdx[1] + 1)*lFineNodesPerDim_d(0) + gridIdx[0] + 1);
904  columnOffset = coordColumnOffset*blkSize;
905  entryOffset = (edgeLineOffset + edgeStencilLength
906  + gridIdx[1]*faceLineOffset + gridIdx[0]*faceStencilLength)*blkSize;
907  for(LO l = 0; l < blkSize; ++l) {
908  for(LO k = 0; k < 3; ++k) {
909  for(LO j = 0; j < 5; ++j) {
910  for(LO i = 0; i < 5; ++i) {
911  entries(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = columnOffset
912  + (k*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0) + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
913  values(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = coeffs_d[k + 2]*coeffs_d[j]*coeffs_d[i];
914  }
915  }
916  }
917  }
918  for(LO l = 0; l < blkSize; ++l) {
919  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
920  }
921  for(int dim = 0; dim <numDimensions; ++dim) {
922  coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
923  }
924 
925  // Face 1
926  coordRowIdx += (lCoarseNodesPerDim_d(2) - 1)*lCoarseNodesPerDim_d(1)*lCoarseNodesPerDim_d(0);
927  rowIdx = coordRowIdx*blkSize;
928  coordColumnOffset += (lFineNodesPerDim_d(2) - 1)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0);
929  columnOffset = coordColumnOffset*blkSize;
930  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2)*interiorPlaneOffset)*blkSize;
931  for(LO l = 0; l < blkSize; ++l) {
932  for(LO k = 0; k < 3; ++k) {
933  for(LO j = 0; j < 5; ++j) {
934  for(LO i = 0; i < 5; ++i) {
935  entries(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = columnOffset
936  + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
937  + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
938  values(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i];
939  }
940  }
941  }
942  }
943  for(LO l = 0; l < blkSize; ++l) {
944  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
945  }
946  for(int dim = 0; dim <numDimensions; ++dim) {
947  coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
948  }
949 
950  });// parallel_for faces in 0-1 plane
951  }
952 
953  // Faces in 0-2 plane
954  if((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
955 
956  Kokkos::parallel_for("Faces in 0-2 plane region R",
957  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2]-2)*(lCoarseNodesPerDim[0]-2) ),
958  KOKKOS_LAMBDA(const LO faceIdx) {
959  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
960  LO gridIdx[3] = {0,0,0};
961  impl_scalar_type coeffs_d[5] = {1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0};
962  // Last step in the loop
963  // update the grid indices
964  // for next grid point
965  for(LO i = 0; i < faceIdx; i++){
966  ++gridIdx[0];
967  if(gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
968  gridIdx[0] = 0;
969  ++gridIdx[2];
970  }
971  }
972 
973  // Face 0
974  coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim_d(1)*lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
975  rowIdx = coordRowIdx*blkSize;
976  coordColumnOffset = ((gridIdx[2] + 1)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
977  + gridIdx[0] + 1)*3;
978  columnOffset = coordColumnOffset*blkSize;
979  entryOffset = (facePlaneOffset + gridIdx[2]*interiorPlaneOffset + edgeStencilLength
980  + gridIdx[0]*faceStencilLength)*blkSize;
981  for(LO l = 0; l < blkSize; ++l) {
982  for(LO k = 0; k < 5; ++k) {
983  for(LO j = 0; j < 3; ++j) {
984  for(LO i = 0; i < 5; ++i) {
985  entries(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = columnOffset
986  + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0) + j*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
987  values(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j + 2]*coeffs_d[i];
988  }
989  }
990  }
991  }
992  for(LO l = 0; l < blkSize; ++l) {
993  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
994  }
995  for(int dim = 0; dim <numDimensions; ++dim) {
996  coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
997  }
998 
999  // Face 1
1000  coordRowIdx += (lCoarseNodesPerDim_d(1) - 1)*lCoarseNodesPerDim_d(0);
1001  rowIdx = coordRowIdx*blkSize;
1002  coordColumnOffset += (lFineNodesPerDim_d(1) - 1)*lFineNodesPerDim_d(0);
1003  columnOffset = coordColumnOffset*blkSize;
1004  entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2)*interiorLineOffset)*blkSize;
1005  for(LO l = 0; l < blkSize; ++l) {
1006  for(LO k = 0; k < 5; ++k) {
1007  for(LO j = 0; j < 3; ++j) {
1008  for(LO i = 0; i < 5; ++i) {
1009  entries(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = columnOffset
1010  + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1011  + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
1012  values(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i];
1013  }
1014  }
1015  }
1016  }
1017  for(LO l = 0; l < blkSize; ++l) {
1018  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1019  }
1020  for(int dim = 0; dim <numDimensions; ++dim) {
1021  coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1022  }
1023 
1024  });// parallel_for faces in 0-2 plane
1025  }
1026 
1027  // Faces in 1-2 plane
1028  if((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
1029 
1030  Kokkos::parallel_for("Faces in 1-2 plane region R",
1031  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2]-2)*(lCoarseNodesPerDim[1]-2) ),
1032  KOKKOS_LAMBDA(const LO faceIdx) {
1033  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1034  LO gridIdx[3] = {0,0,0};
1035  impl_scalar_type coeffs_d[5] = {1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0};
1036  // Last step in the loop
1037  // update the grid indices
1038  // for next grid point
1039  for(LO i = 0; i < faceIdx; i++){
1040  ++gridIdx[1];
1041  if(gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1042  gridIdx[1] = 0;
1043  ++gridIdx[2];
1044  }
1045  }
1046 
1047  // Face 0
1048  coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim_d(1)*lCoarseNodesPerDim_d(0)
1049  + (gridIdx[1] + 1)*lCoarseNodesPerDim_d(0));
1050  rowIdx = coordRowIdx*blkSize;
1051  coordColumnOffset = ((gridIdx[2] + 1)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1052  + (gridIdx[1] + 1)*lFineNodesPerDim_d(0))*3;
1053  columnOffset = coordColumnOffset*blkSize;
1054  entryOffset = (facePlaneOffset + gridIdx[2]*interiorPlaneOffset + faceLineOffset
1055  + gridIdx[1]*interiorLineOffset)*blkSize;
1056  for(LO l = 0; l < blkSize; ++l) {
1057  for(LO k = 0; k < 5; ++k) {
1058  for(LO j = 0; j < 5; ++j) {
1059  for(LO i = 0; i < 3; ++i) {
1060  entries(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = columnOffset
1061  + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0) + (j - 2)*lFineNodesPerDim_d(0) + i)*blkSize + l;
1062  values(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i + 2];
1063  }
1064  }
1065  }
1066  }
1067  for(LO l = 0; l < blkSize; ++l) {
1068  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1069  }
1070  for(int dim = 0; dim <numDimensions; ++dim) {
1071  coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1072  }
1073 
1074  // Face 1
1075  coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
1076  rowIdx = coordRowIdx*blkSize;
1077  coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
1078  columnOffset = coordColumnOffset*blkSize;
1079  entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2)*interiorStencilLength)*blkSize;
1080  for(LO l = 0; l < blkSize; ++l) {
1081  for(LO k = 0; k < 5; ++k) {
1082  for(LO j = 0; j < 5; ++j) {
1083  for(LO i = 0; i < 3; ++i) {
1084  entries(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = columnOffset
1085  + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1086  + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
1087  values(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i];
1088  }
1089  }
1090  }
1091  }
1092  for(LO l = 0; l < blkSize; ++l) {
1093  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1094  }
1095  for(int dim = 0; dim <numDimensions; ++dim) {
1096  coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1097  }
1098 
1099  });// parallel_for faces in 1-2 plane
1100  }
1101 
1102  if(numInteriors > 0) {
1103 
1104  // Allocate and compute arrays
1105  // containing column offsets
1106  // and values associated with
1107  // interior points
1108  LO countRowEntries = 0;
1109  Kokkos::View<LO[125]> coordColumnOffsets_d("coordColOffset");
1110  auto coordColumnOffsets_h = Kokkos::create_mirror_view( coordColumnOffsets_d );
1111 
1112  for(LO k = -2; k < 3; ++k) {
1113  for(LO j = -2; j < 3; ++j) {
1114  for(LO i = -2; i < 3; ++i) {
1115  coordColumnOffsets_h(countRowEntries) = k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
1116  + j*lFineNodesPerDim[0] + i;
1117  ++countRowEntries;
1118  }
1119  }
1120  }
1121  Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
1122 
1123  LO countValues = 0;
1124  Kokkos::View<impl_scalar_type*> interiorValues_d("interiorValues",125);
1125  auto interiorValues_h = Kokkos::create_mirror_view( interiorValues_d );
1126  for(LO k = 0; k < 5; ++k) {
1127  for(LO j = 0; j < 5; ++j) {
1128  for(LO i = 0; i < 5; ++i) {
1129  interiorValues_h(countValues) = coeffs[k]*coeffs[j]*coeffs[i];
1130  ++countValues;
1131  }
1132  }
1133  }
1134  Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1135 
1136  Kokkos::parallel_for("interior idx region R",Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1137  KOKKOS_LAMBDA(const LO interiorIdx) {
1138  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1139  LO gridIdx[3];
1140  gridIdx[0] = 0; gridIdx[1] = 0; gridIdx[2] = 0;
1141  // First step in the loop
1142  // update the grid indices
1143  // for the grid point
1144  for(LO i=0; i<interiorIdx; i++){
1145  ++gridIdx[0];
1146  if(gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1147  gridIdx[0] = 0;
1148  ++gridIdx[1];
1149  if(gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1150  gridIdx[1] = 0;
1151  ++gridIdx[2];
1152  }
1153  }
1154  }
1155 
1156  coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim_d(0)*lCoarseNodesPerDim_d(1)
1157  + (gridIdx[1] + 1)*lCoarseNodesPerDim_d(0)
1158  + gridIdx[0] + 1);
1159  rowIdx = coordRowIdx*blkSize;
1160  coordColumnOffset = ((gridIdx[2] + 1)*3*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1161  + (gridIdx[1] + 1)*3*lFineNodesPerDim_d(0) + (gridIdx[0] + 1)*3);
1162  columnOffset = coordColumnOffset*blkSize;
1163  entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength
1164  + gridIdx[2]*interiorPlaneOffset + gridIdx[1]*interiorLineOffset
1165  + gridIdx[0]*interiorStencilLength)*blkSize;
1166  for(LO l = 0; l < blkSize; ++l) {
1167  row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength*(l+1);
1168  }
1169  // Fill the column indices
1170  // and values in the approproate
1171  // views.
1172  for(LO l = 0; l < blkSize; ++l) {
1173  for(LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1174  entries(entryOffset + entryIdx + interiorStencilLength*l) = columnOffset + coordColumnOffsets_d(entryIdx)*blkSize + l;
1175  values(entryOffset + entryIdx + interiorStencilLength*l) = interiorValues_d(entryIdx);
1176  }
1177  }
1178  for(int dim = 0; dim <numDimensions; ++dim) {
1179  coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1180  }
1181 
1182  });// Kokkos::parallel_for interior idx
1183  //
1184  }
1185 
1186  local_graph_type localGraph(entries, row_map);
1187  local_matrix_type localR("R", numCols, values, localGraph);
1188 
1189  R = MatrixFactory::Build(localR, // the local data
1190  rowMap, // rowMap
1191  A->getRowMap(), // colMap
1192  A->getRowMap(), // domainMap == colMap
1193  rowMap, // rangeMap == rowMap
1194  Teuchos::null); // params for optimized construction
1195 
1196  } // Build3D()
1197 
1198 } //namespace MueLu
1199 
1200 #define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1201 #endif // MUELU_REGIONRFACTORY_DEF_HPP
typename Kokkos::View< LO[3], device_type > LOTupleView
MueLu::DefaultLocalOrdinal LocalOrdinal
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
RCP< const ParameterList > GetValidParameterList() const
Input.
Namespace for MueLu classes and methods.
MueLu::DefaultNode Node
static const NoFactory * get()
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.