MueLu  Version of the Day
MueLu_SaPFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_SAPFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_SAPFACTORY_KOKKOS_DEF_HPP
48 
49 #ifdef out
50 #include "KokkosKernels_Handle.hpp"
51 #include "KokkosSparse_spgemm.hpp"
52 #include "KokkosSparse_spmv.hpp"
53 #endif
55 
56 #include <Xpetra_Matrix.hpp>
57 #include <Xpetra_IteratorOps.hpp>
58 
60 #include "MueLu_Level.hpp"
61 #include "MueLu_MasterList.hpp"
62 #include "MueLu_Monitor.hpp"
63 #include "MueLu_PerfUtils.hpp"
65 #include "MueLu_TentativePFactory.hpp"
66 #include "MueLu_Utilities_kokkos.hpp"
67 
68 #include <sstream>
69 
70 namespace MueLu {
71 
72  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
74  RCP<ParameterList> validParamList = rcp(new ParameterList());
75 
76 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
77  SET_VALID_ENTRY("sa: damping factor");
78  SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
79  SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
80  SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
81  SET_VALID_ENTRY("sa: enforce constraints");
82  SET_VALID_ENTRY("tentative: calculate qr");
83  SET_VALID_ENTRY("sa: max eigenvalue");
84 #undef SET_VALID_ENTRY
85 
86  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
87  validParamList->set< RCP<const FactoryBase> >("P", Teuchos::null, "Tentative prolongator factory");
88 
89  // Make sure we don't recursively validate options for the matrixmatrix kernels
90  ParameterList norecurse;
91  norecurse.disableRecursiveValidation();
92  validParamList->set<ParameterList> ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
93 
94 
95  return validParamList;
96  }
97 
98  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
100  Input(fineLevel, "A");
101 
102  // Get default tentative prolongator factory
103  // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
104  RCP<const FactoryBase> initialPFact = GetFactory("P");
105  if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
106  coarseLevel.DeclareInput("P", initialPFact.get(), this);
107  }
108 
109  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
111  return BuildP(fineLevel, coarseLevel);
112  }
113 
114  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
116  FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
117 
118  typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;
119 
120  // Get default tentative prolongator factory
121  // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
122  // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
123  RCP<const FactoryBase> initialPFact = GetFactory("P");
124  if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
125  const ParameterList& pL = GetParameterList();
126 
127  // Level Get
128  RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
129  RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get());
130  RCP<Matrix> finalP;
131  // If Tentative facctory bailed out (e.g., number of global aggregates is 0), then SaPFactory bails
132  // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
133  if (Ptent == Teuchos::null) {
134  finalP = Teuchos::null;
135  Set(coarseLevel, "P", finalP);
136  return;
137  }
138 
139  if(restrictionMode_) {
140  SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
141 
142  A = Utilities_kokkos::Transpose(*A, true); // build transpose of A explicitly
143  }
144 
145  //Build final prolongator
146 
147  // Reuse pattern if available
148  RCP<ParameterList> APparams;
149  if(pL.isSublist("matrixmatrix: kernel params"))
150  APparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
151  else
152  APparams= rcp(new ParameterList);
153  if (coarseLevel.IsAvailable("AP reuse data", this)) {
154  GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
155 
156  APparams = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", this);
157 
158  if (APparams->isParameter("graph"))
159  finalP = APparams->get< RCP<Matrix> >("graph");
160  }
161  // By default, we don't need global constants for SaP
162  APparams->set("compute global constants: temporaries",APparams->get("compute global constants: temporaries",false));
163  APparams->set("compute global constants",APparams->get("compute global constants",false));
164 
165  const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
166  const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
167  const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
168  const bool useAbsValueRowSum = pL.get<bool> ("sa: use rowsumabs diagonal scaling");
169  const bool doQRStep = pL.get<bool>("tentative: calculate qr");
170  const bool enforceConstraints= pL.get<bool>("sa: enforce constraints");
171  const SC userDefinedMaxEigen = as<SC>(pL.get<double>("sa: max eigenvalue"));
172 
173  // Sanity checking
174  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints,Exceptions::RuntimeError,
175  "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
176 
177  if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
178 
179  SC lambdaMax;
180  RCP<Vector> invDiag;
181  if (Teuchos::ScalarTraits<SC>::real(userDefinedMaxEigen) == Teuchos::ScalarTraits<SC>::real(-1.0))
182  {
183  SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
184  lambdaMax = A->GetMaxEigenvalueEstimate();
185  if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
186  GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations <<
187  ( (useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
188  Magnitude stopTol = 1e-4;
189  invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, useAbsValueRowSum);
190  if (useAbsValueRowSum)
191  lambdaMax = Utilities_kokkos::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
192  else
193  lambdaMax = Utilities_kokkos::PowerMethod(*A, true, maxEigenIterations, stopTol);
194  A->SetMaxEigenvalueEstimate(lambdaMax);
195  } else {
196  GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
197  }
198  }
199  else {
200  lambdaMax = userDefinedMaxEigen;
201  A->SetMaxEigenvalueEstimate(lambdaMax);
202  }
203  GetOStream(Statistics0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
204 
205  {
206  SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
207  {
208  SubFactoryMonitor m3(*this, "Diagonal Extraction", coarseLevel);
209  if (useAbsValueRowSum)
210  GetOStream(Runtime0) << "Using rowSumAbs diagonal" << std::endl;
211  if (invDiag == Teuchos::null)
212  invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, useAbsValueRowSum);
213  }
214  SC omega = dampingFactor / lambdaMax;
215  TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
216 
217  {
218  SubFactoryMonitor m3(*this, "Xpetra::IteratorOps::Jacobi", coarseLevel);
219  // finalP = Ptent + (I - \omega D^{-1}A) Ptent
220  finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
221  if (enforceConstraints) SatisfyPConstraints( A, finalP);
222  }
223  }
224 
225  } else {
226  finalP = Ptent;
227  }
228 
229  // Level Set
230  if (!restrictionMode_) {
231  // prolongation factory is in prolongation mode
232  if(!finalP.is_null()) {std::ostringstream oss; oss << "P_" << coarseLevel.GetLevelID(); finalP->setObjectLabel(oss.str());}
233  Set(coarseLevel, "P", finalP);
234 
235  // NOTE: EXPERIMENTAL
236  if (Ptent->IsView("stridedMaps"))
237  finalP->CreateView("stridedMaps", Ptent);
238 
239  } else {
240  // prolongation factory is in restriction mode
241  RCP<Matrix> R = Utilities_kokkos::Transpose(*finalP, true);
242  Set(coarseLevel, "R", R);
243  if(!R.is_null()) {std::ostringstream oss; oss << "R_" << coarseLevel.GetLevelID(); R->setObjectLabel(oss.str());}
244 
245  // NOTE: EXPERIMENTAL
246  if (Ptent->IsView("stridedMaps"))
247  R->CreateView("stridedMaps", Ptent, true);
248  }
249 
250  if (IsPrint(Statistics2)) {
251  RCP<ParameterList> params = rcp(new ParameterList());
252  params->set("printLoadBalancingInfo", true);
253  params->set("printCommInfo", true);
254  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params);
255  }
256 
257  } //Build()
258 
259  // Analyze the grid transfer produced by smoothed aggregation and make
260  // modifications if it does not look right. In particular, if there are
261  // negative entries or entries larger than 1, modify P's rows.
262  //
263  // Note: this kind of evaluation probably only makes sense if not doing QR
264  // when constructing tentative P.
265  //
266  // For entries that do not satisfy the two constraints (>= 0 or <=1) we set
267  // these entries to the constraint value and modify the rest of the row
268  // so that the row sum remains the same as before by adding an equal
269  // amount to each remaining entry. However, if the original row sum value
270  // violates the constraints, we set the row sum back to 1 (the row sum of
271  // tentative P). After doing the modification to a row, we need to check
272  // again the entire row to make sure that the modified row does not violate
273  // the constraints.
274 
275 template<typename local_matrix_type>
277 
278  using Scalar= typename local_matrix_type::non_const_value_type;
279  using SC=Scalar;
280  using LO=typename local_matrix_type::non_const_ordinal_type;
281  using Device= typename local_matrix_type::device_type;
282  const Scalar zero = Kokkos::ArithTraits<SC>::zero();
283  const Scalar one = Kokkos::ArithTraits<SC>::one();
285  local_matrix_type localP;
286  Kokkos::View<SC**, Device> ConstraintViolationSum;
287  Kokkos::View<SC**, Device> Rsum;
288  Kokkos::View<size_t**, Device> nPositive;
289 
290  constraintKernel(LO nPDEs_,local_matrix_type localP_) : nPDEs(nPDEs_), localP(localP_)
291  {
292  ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
293  Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
294  nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
295  }
296 
297  KOKKOS_INLINE_FUNCTION
298  void operator() (const size_t rowIdx) const {
299 
300  auto rowPtr = localP.graph.row_map;
301  auto values = localP.values;
302 
303  bool checkRow = true;
304 
305  if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
306 
307 
308  while (checkRow) {
309 
310  // check constraints and compute the row sum
311 
312  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
313  Rsum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
314  if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) < Kokkos::ArithTraits<SC>::real(zero)) {
315 
316  ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
317  values(entryIdx) = zero;
318  }
319  else {
320  if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) != Kokkos::ArithTraits<SC>::real(zero))
321  nPositive(rowIdx, entryIdx%nPDEs) = nPositive(rowIdx, entryIdx%nPDEs) + 1;
322 
323  if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(1.00001 )) {
324  ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += (values(entryIdx) - one);
325  values(entryIdx) = one;
326  }
327  }
328  }
329 
330  checkRow = false;
331 
332  // take into account any row sum that violates the contraints
333 
334  for (size_t k=0; k < (size_t) nPDEs; k++) {
335 
336  if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) < Kokkos::ArithTraits<SC>::magnitude(zero)) {
337  ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
338  }
339  else if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) > Kokkos::ArithTraits<SC>::magnitude(1.00001)) {
340  ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k)+ (one - Rsum(rowIdx, k)); // rstumin
341  }
342  }
343 
344  // check if row need modification
345  for (size_t k=0; k < (size_t) nPDEs; k++) {
346  if (Kokkos::ArithTraits<SC>::magnitude(ConstraintViolationSum(rowIdx, k)) != Kokkos::ArithTraits<SC>::magnitude(zero))
347  checkRow = true;
348  }
349  // modify row
350  if (checkRow) {
351  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
352  if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(zero)) {
353  values(entryIdx) = values(entryIdx) +
354  (ConstraintViolationSum(rowIdx, entryIdx%nPDEs)/ (Scalar (nPositive(rowIdx, entryIdx%nPDEs)) != zero ? Scalar (nPositive(rowIdx, entryIdx%nPDEs)) : one));
355  }
356  }
357  for (size_t k=0; k < (size_t) nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
358  }
359  for (size_t k=0; k < (size_t) nPDEs; k++) Rsum(rowIdx, k) = zero;
360  for (size_t k=0; k < (size_t) nPDEs; k++) nPositive(rowIdx, k) = 0;
361  } // while (checkRow) ...
362 
363  }
364 };
365 
366 template<typename local_matrix_type>
368 
369  using Scalar= typename local_matrix_type::non_const_value_type;
370  using SC=Scalar;
371  using LO=typename local_matrix_type::non_const_ordinal_type;
372  using Device= typename local_matrix_type::device_type;
373  using KAT = Kokkos::ArithTraits<SC>;
374  const Scalar zero = Kokkos::ArithTraits<SC>::zero();
375  const Scalar one = Kokkos::ArithTraits<SC>::one();
377  local_matrix_type localP;
378  Kokkos::View<SC**, Device> origSorted;
379  Kokkos::View<SC**, Device> fixedSorted;
380  Kokkos::View<LO**, Device> inds;
381 
382  optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_) : nPDEs(nPDEs_), localP(localP_)
383  {
384  origSorted = Kokkos::View<SC**, Device>("origSorted" , localP_.numRows(), maxRowEntries_);
385  fixedSorted = Kokkos::View<SC**, Device>("fixedSorted", localP_.numRows(), maxRowEntries_);
386  inds = Kokkos::View<LO**, Device>("inds" , localP_.numRows(), maxRowEntries_);
387  }
388 
389  KOKKOS_INLINE_FUNCTION
390  void operator() (const size_t rowIdx) const {
391 
392  auto rowPtr = localP.graph.row_map;
393  auto values = localP.values;
394 
395  auto nnz = rowPtr(rowIdx+1) - rowPtr(rowIdx);
396 
397  if (nnz != 0) {
398 
399  Scalar rsumTarget = zero;
400  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
401 
402  {
403  SC aBigNumber;
404  SC rowSumDeviation, temp, delta;
405  SC closestToLeftBoundDist, closestToRghtBoundDist;
406  LO closestToLeftBound, closestToRghtBound;
407  bool flipped;
408 
409  SC leftBound = zero;
410  SC rghtBound = one;
411  if ((KAT::real(rsumTarget) >= KAT::real(leftBound*(static_cast<SC>(nnz)))) &&
412  (KAT::real(rsumTarget) <= KAT::real(rghtBound*(static_cast<SC>(nnz))))){ // has Feasible solution
413 
414  flipped = false;
415  // compute aBigNumber to handle some corner cases where we need
416  // something large so that an if statement will be false
417  aBigNumber = KAT::zero();
418  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++){
419  if ( KAT::magnitude( values(entryIdx) ) > KAT::magnitude(aBigNumber))
420  aBigNumber = KAT::magnitude( values(entryIdx) );
421  }
422  aBigNumber = aBigNumber+ (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound))*(100*one);
423 
424  LO ind = 0;
425  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++){
426  origSorted(rowIdx, ind) = values(entryIdx);
427  inds(rowIdx, ind) = ind;
428  ind++;
429  }
430 
431  auto sortVals = Kokkos::subview( origSorted, rowIdx, Kokkos::ALL() );
432  auto sortInds = Kokkos::subview( inds, rowIdx, Kokkos::make_pair(0,ind));
433  // Need permutation indices to sort row values from smallest to largest,
434  // and unsort once row constraints are applied.
435  // serial insertion sort workaround from https://github.com/kokkos/kokkos/issues/645
436  for (LO i = 1; i < static_cast<LO>(nnz); ++i){
437  ind = sortInds(i);
438  LO j = i;
439 
440  if ( KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0))) ){
441  for ( ; j > 0; --j) sortInds(j) = sortInds(j - 1);
442 
443  sortInds(0) = ind;
444  } else {
445  for ( ; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j-1))); --j) sortInds(j) = sortInds(j-1);
446 
447  sortInds(j) = ind;
448  }
449  }
450 
451 
452  for (LO i = 0; i < static_cast<LO>(nnz); i++) origSorted(rowIdx, i) = values(rowPtr(rowIdx) + inds(rowIdx, i)); //values is no longer used
453  // find entry in origSorted just to the right of the leftBound
454  closestToLeftBound = 0;
455  while ((closestToLeftBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
456 
457  // find entry in origSorted just to the right of the rghtBound
458  closestToRghtBound = closestToLeftBound;
459  while ((closestToRghtBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
460 
461  // compute distance between closestToLeftBound and the left bound and the
462  // distance between closestToRghtBound and the right bound.
463 
464  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
465  if (closestToRghtBound == static_cast<LO>(nnz)) closestToRghtBoundDist= aBigNumber;
466  else closestToRghtBoundDist= origSorted(rowIdx, closestToRghtBound) - rghtBound;
467 
468  // compute how far the rowSum is off from the target row sum taking into account
469  // numbers that have been shifted to satisfy bound constraint
470 
471  rowSumDeviation = leftBound*(static_cast<SC>(closestToLeftBound)) + (static_cast<SC>(nnz-closestToRghtBound))*rghtBound - rsumTarget;
472  for (LO i=closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted(rowIdx, i);
473 
474  // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
475  // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
476  // Later we will flip the data back to its original form.
477  if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
478  flipped = true;
479  temp = leftBound; leftBound = -rghtBound; rghtBound = temp;
480 
481  /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
482 
483  //TODO: the following is bad for GPU performance. Switch to bit shifting if brave.
484  if ((nnz%2) == 1) origSorted(rowIdx, (nnz/2) ) = -origSorted(rowIdx, (nnz/2) );
485  for (LO i=0; i < static_cast<LO>(nnz/2); i++) {
486  temp=origSorted(rowIdx, i);
487  origSorted(rowIdx, i) = -origSorted(rowIdx, nnz-1-i);
488  origSorted(rowIdx, nnz-i-1) = -temp;
489  }
490 
491  /* reverse bounds */
492 
493  LO itemp = closestToLeftBound;
494  closestToLeftBound = nnz-closestToRghtBound;
495  closestToRghtBound = nnz-itemp;
496  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
497  if (closestToRghtBound == static_cast<LO>(nnz)) closestToRghtBoundDist= aBigNumber;
498  else closestToRghtBoundDist= origSorted(rowIdx, closestToRghtBound) - rghtBound;
499 
500  rowSumDeviation = -rowSumDeviation;
501  }
502 
503  // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
504 
505  for (LO i = 0; i < closestToLeftBound; i++) fixedSorted(rowIdx, i) = leftBound;
506  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i);
507  for (LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++) fixedSorted(rowIdx, i) = rghtBound;
508 
509  while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((one*1.e-10)*rsumTarget))){ // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
510  if (closestToRghtBound != closestToLeftBound)
511  delta = rowSumDeviation/ static_cast<SC>(closestToRghtBound - closestToLeftBound);
512  else delta = aBigNumber;
513 
514  if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
515  if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
516  rowSumDeviation = zero;
517  for (LO i = closestToLeftBound; i < closestToRghtBound ; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
518  }
519  else {
520  rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
521  fixedSorted(rowIdx, closestToLeftBound) = leftBound;
522  closestToLeftBound++;
523  if (closestToLeftBound < static_cast<LO>(nnz)) closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
524  else closestToLeftBoundDist = aBigNumber;
525  }
526  }
527  else {
528  if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
529  rowSumDeviation = 0;
530  for (LO i = closestToLeftBound; i < closestToRghtBound ; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
531  }
532  else {
533  rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
534  // if (closestToRghtBound < nEntries) {
535  fixedSorted(rowIdx, closestToRghtBound) = origSorted(rowIdx, closestToRghtBound);
536  closestToRghtBound++;
537  // }
538  if (closestToRghtBound >= static_cast<LO>(nnz)) closestToRghtBoundDist = aBigNumber;
539  else closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
540  }
541  }
542  }
543 
544  auto rowStart = rowPtr(rowIdx);
545  if (flipped) {
546  /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
547 
548  if ((nnz%2) == 1) fixedSorted(rowIdx, (nnz/2) ) = -fixedSorted(rowIdx, (nnz/2) );
549  for (LO i=0; i < static_cast<LO>(nnz/2); i++) {
550  temp=fixedSorted(rowIdx, i);
551  fixedSorted(rowIdx, i) = -fixedSorted(rowIdx, nnz-1-i);
552  fixedSorted(rowIdx, nnz-i-1) = -temp;
553  }
554  }
555  // unsort and update row values with new values
556  for (LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart + inds(rowIdx, i)) = fixedSorted(rowIdx, i);
557 
558  } else { // row does not have feasible solution to match constraint
559  // just set all entries to the same value giving a row sum of 1
560  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) = one/( static_cast<SC>(nnz) );
561  }
562  }
563 
564  }
565 
566  }
567 };
568 
569 
570  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
571  void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::SatisfyPConstraints(const RCP<Matrix> A, RCP<Matrix>& P) const {
572 
573  using Device = typename Matrix::local_matrix_type::device_type;
574  LO nPDEs = A->GetFixedBlockSize();
575 
576  using local_mat_type = typename Matrix::local_matrix_type;
577  constraintKernel<local_mat_type> myKernel(nPDEs,P->getLocalMatrixDevice() );
578  Kokkos::parallel_for("enforce constraint",Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements() ),
579  myKernel );
580 
581  } //SatsifyPConstraints()
582 
583  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
584  void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::optimalSatisfyPConstraintsForScalarPDEs( RCP<Matrix>& P) const {
585 
586  using Device = typename Matrix::local_matrix_type::device_type;
587  LO nPDEs = 1;//A->GetFixedBlockSize();
588 
589  using local_mat_type = typename Matrix::local_matrix_type;
590  optimalSatisfyConstraintsForScalarPDEsKernel<local_mat_type> myKernel(nPDEs,P->getLocalMaxNumRowEntries(),P->getLocalMatrixDevice() );
591  Kokkos::parallel_for("enforce constraint",Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows() ),
592  myKernel );
593 
594  } //SatsifyPConstraints()
595 
596 } //namespace MueLu
597 
598 #endif // MUELU_SAPFACTORY_KOKKOS_DEF_HPP
599 
600 //TODO: restrictionMode_ should use the parameter list.
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
typename local_matrix_type::non_const_value_type Scalar
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level->Get< RCP<Matrix> >("A", factory) if factory == NULL => use default factory.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
scalar_type dampingFactor
static RCP< Matrix > Transpose(Matrix &Op, bool optimizeTranspose=false, const std::string &label=std::string())
Transpose a Xpetra::Matrix.
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
Timer to be used in factories. Similar to Monitor but with additional timers.
Print more statistics.
typename local_matrix_type::non_const_ordinal_type LO
constraintKernel(LO nPDEs_, local_matrix_type localP_)
One-liner description of what is happening.
Namespace for MueLu classes and methods.
#define SET_VALID_ENTRY(name)
Print even more statistics.
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
Kokkos::View< size_t **, Device > nPositive
typename local_matrix_type::device_type Device
Print statistics that do not involve significant additional computation.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Kokkos::View< SC **, Device > ConstraintViolationSum
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Factory for building Smoothed Aggregation prolongators.Input/output of SaPFactory_kokkos
static SC PowerMethod(const Matrix &A, bool scaleByDiag=true, LO niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Simple transpose for Tpetra::CrsMatrix types.
typename local_matrix_type::non_const_value_type Scalar
typename local_matrix_type::non_const_ordinal_type LO
Kokkos::View< SC **, Device > Rsum
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:96
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
Exception throws to report errors in the internal logical of the program.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=TST::eps() *100, const bool doLumped=false)
Extract Matrix Diagonal.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()