Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_Relaxation_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ***********************************************************************
38 //@HEADER
39 */
40 
41 #ifndef IFPACK2_RELAXATION_DEF_HPP
42 #define IFPACK2_RELAXATION_DEF_HPP
43 
44 #include "Teuchos_StandardParameterEntryValidators.hpp"
45 #include "Teuchos_TimeMonitor.hpp"
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_BlockCrsMatrix.hpp"
48 #include "Tpetra_BlockView.hpp"
49 #include "Ifpack2_Utilities.hpp"
50 #include "Ifpack2_Details_getCrsMatrix.hpp"
51 #include "MatrixMarket_Tpetra.hpp"
52 #include "Tpetra_Details_residual.hpp"
53 #include <cstdlib>
54 #include <memory>
55 #include <sstream>
56 #include "KokkosSparse_gauss_seidel.hpp"
57 
58 namespace {
59  // Validate that a given int is nonnegative.
60  class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
61  public:
62  // Constructor (does nothing).
63  NonnegativeIntValidator () {}
64 
65  // ParameterEntryValidator wants this method.
66  Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
67  return Teuchos::null;
68  }
69 
70  // Actually validate the parameter's value.
71  void
72  validate (const Teuchos::ParameterEntry& entry,
73  const std::string& paramName,
74  const std::string& sublistName) const
75  {
76  using std::endl;
77  Teuchos::any anyVal = entry.getAny (true);
78  const std::string entryName = entry.getAny (false).typeName ();
79 
80  TEUCHOS_TEST_FOR_EXCEPTION(
81  anyVal.type () != typeid (int),
82  Teuchos::Exceptions::InvalidParameterType,
83  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
84  << "\" has the wrong type." << endl << "Parameter: " << paramName
85  << endl << "Type specified: " << entryName << endl
86  << "Type required: int" << endl);
87 
88  const int val = Teuchos::any_cast<int> (anyVal);
89  TEUCHOS_TEST_FOR_EXCEPTION(
90  val < 0, Teuchos::Exceptions::InvalidParameterValue,
91  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
92  << "\" is negative." << endl << "Parameter: " << paramName
93  << endl << "Value specified: " << val << endl
94  << "Required range: [0, INT_MAX]" << endl);
95  }
96 
97  // ParameterEntryValidator wants this method.
98  const std::string getXMLTypeName () const {
99  return "NonnegativeIntValidator";
100  }
101 
102  // ParameterEntryValidator wants this method.
103  void
104  printDoc (const std::string& docString,
105  std::ostream &out) const
106  {
107  Teuchos::StrUtils::printLines (out, "# ", docString);
108  out << "#\tValidator Used: " << std::endl;
109  out << "#\t\tNonnegativeIntValidator" << std::endl;
110  }
111  };
112 
113  // A way to get a small positive number (eps() for floating-point
114  // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
115  // define it (for example, for integer values).
116  template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
117  class SmallTraits {
118  public:
119  // Return eps if Scalar is a floating-point type, else return 1.
120  static const Scalar eps ();
121  };
122 
123  // Partial specialization for when Scalar is not a floating-point type.
124  template<class Scalar>
125  class SmallTraits<Scalar, true> {
126  public:
127  static const Scalar eps () {
128  return Teuchos::ScalarTraits<Scalar>::one ();
129  }
130  };
131 
132  // Partial specialization for when Scalar is a floating-point type.
133  template<class Scalar>
134  class SmallTraits<Scalar, false> {
135  public:
136  static const Scalar eps () {
137  return Teuchos::ScalarTraits<Scalar>::eps ();
138  }
139  };
140 
141  // Work-around for GitHub Issue #5269.
142  template<class Scalar,
143  const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
144  struct RealTraits {};
145 
146  template<class Scalar>
147  struct RealTraits<Scalar, false> {
148  using val_type = Scalar;
149  using mag_type = Scalar;
150  static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
151  return z;
152  }
153  };
154 
155  template<class Scalar>
156  struct RealTraits<Scalar, true> {
157  using val_type = Scalar;
158  using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
159  static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
160  return Kokkos::ArithTraits<val_type>::real (z);
161  }
162  };
163 
164  template<class Scalar>
165  KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
166  getRealValue (const Scalar& z) {
167  return RealTraits<Scalar>::real (z);
168  }
169 
170 } // namespace (anonymous)
171 
172 namespace Ifpack2 {
173 
174 template<class MatrixType>
175 void
176 Relaxation<MatrixType>::
177 updateCachedMultiVector (const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
178  size_t numVecs) const
179 {
180  // Allocate a multivector if the cached one isn't perfect. Checking
181  // for map pointer equality is much cheaper than Map::isSameAs.
182  if (cachedMV_.is_null () ||
183  map.get () != cachedMV_->getMap ().get () ||
184  cachedMV_->getNumVectors () != numVecs) {
185  using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
186  global_ordinal_type, node_type>;
187  cachedMV_ = Teuchos::rcp (new MV (map, numVecs, false));
188  }
189 }
190 
191 template<class MatrixType>
193 setMatrix(const Teuchos::RCP<const row_matrix_type>& A)
194 {
195  if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
196  Importer_ = Teuchos::null;
197  pointImporter_ = Teuchos::null;
198  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
199  isInitialized_ = false;
200  IsComputed_ = false;
201  diagOffsets_ = Kokkos::View<size_t*, device_type>();
202  savedDiagOffsets_ = false;
203  hasBlockCrsMatrix_ = false;
204  serialGaussSeidel_ = Teuchos::null;
205  if (! A.is_null ()) {
206  IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
207  }
208  A_ = A;
209  }
210 }
211 
212 template<class MatrixType>
214 Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
215 : A_ (A),
216  IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
217  false : // a reasonable default if there's no communicator
218  A->getRowMap ()->getComm ()->getSize () > 1)
219 {
220  this->setObjectLabel ("Ifpack2::Relaxation");
221 }
222 
223 
224 template<class MatrixType>
225 Teuchos::RCP<const Teuchos::ParameterList>
227 {
228  using Teuchos::Array;
229  using Teuchos::ParameterList;
230  using Teuchos::parameterList;
231  using Teuchos::RCP;
232  using Teuchos::rcp;
233  using Teuchos::rcp_const_cast;
234  using Teuchos::rcp_implicit_cast;
235  using Teuchos::setStringToIntegralParameter;
236  typedef Teuchos::ParameterEntryValidator PEV;
237 
238  if (validParams_.is_null ()) {
239  RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
240 
241  // Set a validator that automatically converts from the valid
242  // string options to their enum values.
243  Array<std::string> precTypes (8);
244  precTypes[0] = "Jacobi";
245  precTypes[1] = "Gauss-Seidel";
246  precTypes[2] = "Symmetric Gauss-Seidel";
247  precTypes[3] = "MT Gauss-Seidel";
248  precTypes[4] = "MT Symmetric Gauss-Seidel";
249  precTypes[5] = "Richardson";
250  precTypes[6] = "Two-stage Gauss-Seidel";
251  precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
252  Array<Details::RelaxationType> precTypeEnums (8);
253  precTypeEnums[0] = Details::JACOBI;
254  precTypeEnums[1] = Details::GS;
255  precTypeEnums[2] = Details::SGS;
256  precTypeEnums[3] = Details::MTGS;
257  precTypeEnums[4] = Details::MTSGS;
258  precTypeEnums[5] = Details::RICHARDSON;
259  precTypeEnums[6] = Details::GS2;
260  precTypeEnums[7] = Details::SGS2;
261  const std::string defaultPrecType ("Jacobi");
262  setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
263  defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
264  pl.getRawPtr ());
265 
266  const int numSweeps = 1;
267  RCP<PEV> numSweepsValidator =
268  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
269  pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
270  rcp_const_cast<const PEV> (numSweepsValidator));
271 
272  // number of 'local' outer sweeps for two-stage GS
273  const int numOuterSweeps = 1;
274  RCP<PEV> numOuterSweepsValidator =
275  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
276  pl->set ("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
277  rcp_const_cast<const PEV> (numOuterSweepsValidator));
278  // number of 'local' inner sweeps for two-stage GS
279  const int numInnerSweeps = 1;
280  RCP<PEV> numInnerSweepsValidator =
281  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
282  pl->set ("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
283  rcp_const_cast<const PEV> (numInnerSweepsValidator));
284  // specify damping factor for the inner sweeps of two-stage GS
285  const scalar_type innerDampingFactor = STS::one ();
286  pl->set ("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
287  // specify whether to use sptrsv instead of inner-iterations for two-stage GS
288  const bool innerSpTrsv = false;
289  pl->set ("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
290  // specify whether to use compact form of recurrence for two-stage GS
291  const bool compactForm = false;
292  pl->set ("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
293 
294  const scalar_type dampingFactor = STS::one ();
295  pl->set ("relaxation: damping factor", dampingFactor);
296 
297  const bool zeroStartingSolution = true;
298  pl->set ("relaxation: zero starting solution", zeroStartingSolution);
299 
300  const bool doBackwardGS = false;
301  pl->set ("relaxation: backward mode", doBackwardGS);
302 
303  const bool doL1Method = false;
304  pl->set ("relaxation: use l1", doL1Method);
305 
306  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
307  (STM::one() + STM::one()); // 1.5
308  pl->set ("relaxation: l1 eta", l1eta);
309 
310  const scalar_type minDiagonalValue = STS::zero ();
311  pl->set ("relaxation: min diagonal value", minDiagonalValue);
312 
313  const bool fixTinyDiagEntries = false;
314  pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
315 
316  const bool checkDiagEntries = false;
317  pl->set ("relaxation: check diagonal entries", checkDiagEntries);
318 
319  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
320  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
321 
322  const bool is_matrix_structurally_symmetric = false;
323  pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
324 
325  const bool ifpack2_dump_matrix = false;
326  pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
327 
328  const int cluster_size = 1;
329  pl->set("relaxation: mtgs cluster size", cluster_size);
330 
331  pl->set("relaxation: mtgs coloring algorithm", "Default");
332 
333  const int long_row_threshold = 0;
334  pl->set("relaxation: long row threshold", long_row_threshold);
335 
336  validParams_ = rcp_const_cast<const ParameterList> (pl);
337  }
338  return validParams_;
339 }
340 
341 
342 template<class MatrixType>
343 void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
344 {
345  using Teuchos::getIntegralValue;
346  using Teuchos::ParameterList;
347  using Teuchos::RCP;
348  typedef scalar_type ST; // just to make code below shorter
349 
350  if (pl.isType<double>("relaxation: damping factor")) {
351  // Make sure that ST=complex can run with a damping factor that is
352  // a double.
353  ST df = pl.get<double>("relaxation: damping factor");
354  pl.remove("relaxation: damping factor");
355  pl.set("relaxation: damping factor",df);
356  }
357 
358  pl.validateParametersAndSetDefaults (* getValidParameters ());
359 
360  const Details::RelaxationType precType =
361  getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
362  const int numSweeps = pl.get<int> ("relaxation: sweeps");
363  const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
364  const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
365  const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
366  const bool doL1Method = pl.get<bool> ("relaxation: use l1");
367  const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
368  const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
369  const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
370  const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
371  const bool is_matrix_structurally_symmetric = pl.get<bool> ("relaxation: symmetric matrix structure");
372  const bool ifpack2_dump_matrix = pl.get<bool> ("relaxation: ifpack2 dump matrix");
373  int cluster_size = 1;
374  if(pl.isParameter ("relaxation: mtgs cluster size")) //optional parameter
375  cluster_size = pl.get<int> ("relaxation: mtgs cluster size");
376  int long_row_threshold = 0;
377  if(pl.isParameter ("relaxation: long row threshold")) //optional parameter
378  long_row_threshold = pl.get<int> ("relaxation: long row threshold");
379  std::string color_algo_name = pl.get<std::string>("relaxation: mtgs coloring algorithm");
380  //convert to lowercase
381  for(char& c : color_algo_name)
382  c = tolower(c);
383  if(color_algo_name == "default")
384  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_DEFAULT;
385  else if(color_algo_name == "serial")
386  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_SERIAL;
387  else if(color_algo_name == "vb")
388  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VB;
389  else if(color_algo_name == "vbbit")
390  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBBIT;
391  else if(color_algo_name == "vbcs")
392  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBCS;
393  else if(color_algo_name == "vbd")
394  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBD;
395  else if(color_algo_name == "vbdbit")
396  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBDBIT;
397  else if(color_algo_name == "eb")
398  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_EB;
399  else
400  {
401  std::ostringstream msg;
402  msg << "Ifpack2::Relaxation: 'relaxation: mtgs coloring algorithm' = '" << color_algo_name << "' is not valid.\n";
403  msg << "Choices (not case sensitive) are: Default, Serial, VB, VBBIT, VBCS, VBD, VBDBIT, EB.";
404  TEUCHOS_TEST_FOR_EXCEPTION(
405  true, std::invalid_argument, msg.str());
406  }
407 
408  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
409 
410  // for Two-stage Gauss-Seidel
411  if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
412  // Make sure that ST=complex can run with a damping factor that is
413  // a double.
414  ST df = pl.get<double>("relaxation: inner damping factor");
415  pl.remove("relaxation: inner damping factor");
416  pl.set("relaxation: inner damping factor",df);
417  }
418  //If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
419  if (long_row_threshold > 0) {
420  TEUCHOS_TEST_FOR_EXCEPTION(
421  cluster_size != 1, std::invalid_argument, "Ifpack2::Relaxation: "
422  "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
423  TEUCHOS_TEST_FOR_EXCEPTION(
424  precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
425  std::invalid_argument, "Ifpack2::Relaxation: "
426  "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
427  "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
428  }
429 
430  const ST innerDampingFactor = pl.get<ST> ("relaxation: inner damping factor");
431  const int numInnerSweeps = pl.get<int> ("relaxation: inner sweeps");
432  const int numOuterSweeps = pl.get<int> ("relaxation: outer sweeps");
433  const bool innerSpTrsv = pl.get<bool> ("relaxation: inner sparse-triangular solve");
434  const bool compactForm = pl.get<bool> ("relaxation: compact form");
435 
436  // "Commit" the changes, now that we've validated everything.
437  PrecType_ = precType;
438  NumSweeps_ = numSweeps;
439  DampingFactor_ = dampingFactor;
440  ZeroStartingSolution_ = zeroStartSol;
441  DoBackwardGS_ = doBackwardGS;
442  DoL1Method_ = doL1Method;
443  L1Eta_ = l1Eta;
444  MinDiagonalValue_ = minDiagonalValue;
445  fixTinyDiagEntries_ = fixTinyDiagEntries;
446  checkDiagEntries_ = checkDiagEntries;
447  clusterSize_ = cluster_size;
448  longRowThreshold_ = long_row_threshold;
449  is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
450  ifpack2_dump_matrix_ = ifpack2_dump_matrix;
451  localSmoothingIndices_ = localSmoothingIndices;
452  // for Two-stage GS
453  NumInnerSweeps_ = numInnerSweeps;
454  NumOuterSweeps_ = numOuterSweeps;
455  InnerSpTrsv_ = innerSpTrsv;
456  InnerDampingFactor_ = innerDampingFactor;
457  CompactForm_ = compactForm;
458 }
459 
460 
461 template<class MatrixType>
462 void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
463 {
464  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
465  // but otherwise, we will get [unused] in pl
466  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
467 }
468 
469 
470 template<class MatrixType>
471 Teuchos::RCP<const Teuchos::Comm<int> >
473  TEUCHOS_TEST_FOR_EXCEPTION(
474  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
475  "The input matrix A is null. Please call setMatrix() with a nonnull "
476  "input matrix before calling this method.");
477  return A_->getRowMap ()->getComm ();
478 }
479 
480 
481 template<class MatrixType>
482 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
484  return A_;
485 }
486 
487 
488 template<class MatrixType>
489 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
490  typename MatrixType::global_ordinal_type,
491  typename MatrixType::node_type> >
493  TEUCHOS_TEST_FOR_EXCEPTION(
494  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
495  "The input matrix A is null. Please call setMatrix() with a nonnull "
496  "input matrix before calling this method.");
497  return A_->getDomainMap ();
498 }
499 
500 
501 template<class MatrixType>
502 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
503  typename MatrixType::global_ordinal_type,
504  typename MatrixType::node_type> >
506  TEUCHOS_TEST_FOR_EXCEPTION(
507  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
508  "The input matrix A is null. Please call setMatrix() with a nonnull "
509  "input matrix before calling this method.");
510  return A_->getRangeMap ();
511 }
512 
513 
514 template<class MatrixType>
516  return true;
517 }
518 
519 
520 template<class MatrixType>
522  return(NumInitialize_);
523 }
524 
525 
526 template<class MatrixType>
528  return(NumCompute_);
529 }
530 
531 
532 template<class MatrixType>
534  return(NumApply_);
535 }
536 
537 
538 template<class MatrixType>
540  return(InitializeTime_);
541 }
542 
543 
544 template<class MatrixType>
546  return(ComputeTime_);
547 }
548 
549 
550 template<class MatrixType>
552  return(ApplyTime_);
553 }
554 
555 
556 template<class MatrixType>
558  return(ComputeFlops_);
559 }
560 
561 
562 template<class MatrixType>
564  return(ApplyFlops_);
565 }
566 
567 
568 
569 template<class MatrixType>
571  TEUCHOS_TEST_FOR_EXCEPTION(
572  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
573  "The input matrix A is null. Please call setMatrix() with a nonnull "
574  "input matrix, then call compute(), before calling this method.");
575  // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
576  return A_->getLocalNumRows() + A_->getLocalNumEntries();
577 }
578 
579 
580 template<class MatrixType>
581 void
583 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
584  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
585  Teuchos::ETransp /* mode */,
586  scalar_type alpha,
587  scalar_type beta) const
588 {
589  using Teuchos::as;
590  using Teuchos::RCP;
591  using Teuchos::rcp;
592  using Teuchos::rcpFromRef;
593  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
595  TEUCHOS_TEST_FOR_EXCEPTION(
596  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
597  "The input matrix A is null. Please call setMatrix() with a nonnull "
598  "input matrix, then call compute(), before calling this method.");
599  TEUCHOS_TEST_FOR_EXCEPTION(
600  ! isComputed (),
601  std::runtime_error,
602  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
603  "preconditioner instance before you may call apply(). You may call "
604  "isComputed() to find out if compute() has been called already.");
605  TEUCHOS_TEST_FOR_EXCEPTION(
606  X.getNumVectors() != Y.getNumVectors(),
607  std::runtime_error,
608  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
609  "X has " << X.getNumVectors() << " columns, but Y has "
610  << Y.getNumVectors() << " columns.");
611  TEUCHOS_TEST_FOR_EXCEPTION(
612  beta != STS::zero (), std::logic_error,
613  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
614  "implemented.");
615 
616  const std::string timerName ("Ifpack2::Relaxation::apply");
617  Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
618  if (timer.is_null ()) {
619  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
620  }
621 
622  double startTime = timer->wallTime();
623  {
624  Teuchos::TimeMonitor timeMon (*timer);
625  // Special case: alpha == 0.
626  if (alpha == STS::zero ()) {
627  // No floating-point operations, so no need to update a count.
628  Y.putScalar (STS::zero ());
629  }
630  else {
631  // If X and Y alias one another, then we need to create an
632  // auxiliary vector, Xcopy (a deep copy of X).
633  RCP<const MV> Xcopy;
634  {
635  if (X.aliases(Y)) {
636  Xcopy = rcp (new MV (X, Teuchos::Copy));
637  } else {
638  Xcopy = rcpFromRef (X);
639  }
640  }
641 
642  // Each of the following methods updates the flop count itself.
643  // All implementations handle zeroing out the starting solution
644  // (if necessary) themselves.
645  switch (PrecType_) {
646  case Ifpack2::Details::JACOBI:
647  ApplyInverseJacobi(*Xcopy,Y);
648  break;
649  case Ifpack2::Details::GS:
650  ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
651  break;
652  case Ifpack2::Details::SGS:
653  ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
654  break;
655  case Ifpack2::Details::MTGS:
656  case Ifpack2::Details::GS2:
657  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
658  break;
659  case Ifpack2::Details::MTSGS:
660  case Ifpack2::Details::SGS2:
661  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
662  break;
663  case Ifpack2::Details::RICHARDSON:
664  ApplyInverseRichardson(*Xcopy,Y);
665  break;
666 
667  default:
668  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
669  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
670  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
671  }
672  if (alpha != STS::one ()) {
673  Y.scale (alpha);
674  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
675  const double numVectors = as<double> (Y.getNumVectors ());
676  ApplyFlops_ += numGlobalRows * numVectors;
677  }
678  }
679  }
680  ApplyTime_ += (timer->wallTime() - startTime);
681  ++NumApply_;
682 }
683 
684 
685 template<class MatrixType>
686 void
688 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
689  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
690  Teuchos::ETransp mode) const
691 {
692  TEUCHOS_TEST_FOR_EXCEPTION(
693  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
694  "The input matrix A is null. Please call setMatrix() with a nonnull "
695  "input matrix, then call compute(), before calling this method.");
696  TEUCHOS_TEST_FOR_EXCEPTION(
697  ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
698  "isComputed() must be true before you may call applyMat(). "
699  "Please call compute() before calling this method.");
700  TEUCHOS_TEST_FOR_EXCEPTION(
701  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
702  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
703  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
704  A_->apply (X, Y, mode);
705 }
706 
707 
708 template<class MatrixType>
710 {
711  const char methodName[] = "Ifpack2::Relaxation::initialize";
712 
713  TEUCHOS_TEST_FOR_EXCEPTION
714  (A_.is_null (), std::runtime_error, methodName << ": The "
715  "input matrix A is null. Please call setMatrix() with "
716  "a nonnull input matrix before calling this method.");
717 
718  Teuchos::RCP<Teuchos::Time> timer =
719  Teuchos::TimeMonitor::getNewCounter (methodName);
720 
721  double startTime = timer->wallTime();
722 
723  { // Timing of initialize starts here
724  Teuchos::TimeMonitor timeMon (*timer);
725  isInitialized_ = false;
726 
727  {
728  auto rowMap = A_->getRowMap ();
729  auto comm = rowMap.is_null () ? Teuchos::null : rowMap->getComm ();
730  IsParallel_ = ! comm.is_null () && comm->getSize () > 1;
731  }
732 
733  // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
734  // but in that case, the domain and column Maps are the same and
735  // we don't need to Import anyway.
736  //
737  // mfh 07 May 2019: A_->getGraph() might be an
738  // OverlappingRowGraph, which doesn't have an Import object.
739  // However, in that case, the comm should have size 1.
740  Importer_ = IsParallel_ ? A_->getGraph ()->getImporter () :
741  Teuchos::null;
742 
743  {
744  Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
745  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
746  hasBlockCrsMatrix_ = ! A_bcrs.is_null ();
747  }
748 
749  serialGaussSeidel_ = Teuchos::null;
750 
751  if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
752  PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
753  auto crsMat = Details::getCrsMatrix(A_);
754  TEUCHOS_TEST_FOR_EXCEPTION
755  (crsMat.is_null(), std::logic_error, methodName << ": "
756  "Multithreaded Gauss-Seidel methods currently only work "
757  "when the input matrix is a Tpetra::CrsMatrix.");
758 
759  // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
760  // compute, not initialize, since users may change the matrix's
761  // values at any time before calling compute.
762  if (ifpack2_dump_matrix_) {
763  static int sequence_number = 0;
764  const std::string file_name = "Ifpack2_MT_GS_" +
765  std::to_string (sequence_number++) + ".mtx";
766  using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
767  writer_type::writeSparseFile (file_name, crsMat);
768  }
769 
770  this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
771  if (mtKernelHandle_->get_gs_handle () == nullptr) {
772  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
773  mtKernelHandle_->create_gs_handle (KokkosSparse::GS_TWOSTAGE);
774  else if(this->clusterSize_ == 1) {
775  mtKernelHandle_->create_gs_handle (KokkosSparse::GS_DEFAULT, this->mtColoringAlgorithm_);
776  mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
777  }
778  else
779  mtKernelHandle_->create_gs_handle (KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_, this->mtColoringAlgorithm_);
780  }
781  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
782  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
783  // set parameters for two-stage GS
784  mtKernelHandle_->set_gs_set_num_inner_sweeps (NumInnerSweeps_);
785  mtKernelHandle_->set_gs_set_num_outer_sweeps (NumOuterSweeps_);
786  mtKernelHandle_->set_gs_set_inner_damp_factor (InnerDampingFactor_);
787  mtKernelHandle_->set_gs_twostage (!InnerSpTrsv_, A_->getLocalNumRows ());
788  mtKernelHandle_->set_gs_twostage_compact_form (CompactForm_);
789  }
790 
791  KokkosSparse::Experimental::gauss_seidel_symbolic(
792  mtKernelHandle_.getRawPtr (),
793  A_->getLocalNumRows (),
794  A_->getLocalNumCols (),
795  kcsr.graph.row_map,
796  kcsr.graph.entries,
797  is_matrix_structurally_symmetric_);
798  }
799  } // timing of initialize stops here
800 
801  InitializeTime_ += (timer->wallTime() - startTime);
802  ++NumInitialize_;
803  isInitialized_ = true;
804 }
805 
806 namespace Impl {
807 template <typename BlockDiagView>
808 struct InvertDiagBlocks {
809  typedef typename BlockDiagView::size_type Size;
810 
811 private:
812  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
813  template <typename View>
814  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
815  typename View::device_type, Unmanaged>;
816 
817  typedef typename BlockDiagView::non_const_value_type Scalar;
818  typedef typename BlockDiagView::device_type Device;
819  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
820  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
821 
822  UnmanagedView<BlockDiagView> block_diag_;
823  // TODO Use thread team and scratch memory space. In this first
824  // pass, provide workspace for each block.
825  RWrk rwrk_buf_;
826  UnmanagedView<RWrk> rwrk_;
827  IWrk iwrk_buf_;
828  UnmanagedView<IWrk> iwrk_;
829 
830 public:
831  InvertDiagBlocks (BlockDiagView& block_diag)
832  : block_diag_(block_diag)
833  {
834  const auto blksz = block_diag.extent(1);
835  Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
836  rwrk_ = rwrk_buf_;
837  Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
838  iwrk_ = iwrk_buf_;
839  }
840 
841  KOKKOS_INLINE_FUNCTION
842  void operator() (const Size i, int& jinfo) const {
843  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
844  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
845  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
846  int info = 0;
847  Tpetra::GETF2(D_cur, ipiv, info);
848  if (info) {
849  ++jinfo;
850  return;
851  }
852  Tpetra::GETRI(D_cur, ipiv, work, info);
853  if (info) ++jinfo;
854  }
855 };
856 }
857 
858 template<class MatrixType>
859 void Relaxation<MatrixType>::computeBlockCrs ()
860 {
861  using Kokkos::ALL;
862  using Teuchos::Array;
863  using Teuchos::ArrayRCP;
864  using Teuchos::ArrayView;
865  using Teuchos::as;
866  using Teuchos::Comm;
867  using Teuchos::RCP;
868  using Teuchos::rcp;
869  using Teuchos::REDUCE_MAX;
870  using Teuchos::REDUCE_MIN;
871  using Teuchos::REDUCE_SUM;
872  using Teuchos::rcp_dynamic_cast;
873  using Teuchos::reduceAll;
874  typedef local_ordinal_type LO;
875 
876  const std::string timerName ("Ifpack2::Relaxation::computeBlockCrs");
877  Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
878  if (timer.is_null ()) {
879  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
880  }
881  double startTime = timer->wallTime();
882  {
883  Teuchos::TimeMonitor timeMon (*timer);
884 
885  TEUCHOS_TEST_FOR_EXCEPTION(
886  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
887  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
888  "with a nonnull input matrix, then call initialize(), before calling "
889  "this method.");
890  auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type> (A_);
891  TEUCHOS_TEST_FOR_EXCEPTION(
892  blockCrsA.is_null(), std::logic_error, "Ifpack2::Relaxation::"
893  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
894  "got this far. Please report this bug to the Ifpack2 developers.");
895 
896  const scalar_type one = STS::one ();
897 
898  // Reset state.
899  IsComputed_ = false;
900 
901  const LO lclNumMeshRows =
902  blockCrsA->getCrsGraph ().getLocalNumRows ();
903  const LO blockSize = blockCrsA->getBlockSize ();
904 
905  if (! savedDiagOffsets_) {
906  blockDiag_ = block_diag_type (); // clear it before reallocating
907  blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
908  lclNumMeshRows, blockSize, blockSize);
909  if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
910  // Clear diagOffsets_ first (by assigning an empty View to it)
911  // to save memory, before reallocating.
912  diagOffsets_ = Kokkos::View<size_t*, device_type> ();
913  diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
914  }
915  blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
916  TEUCHOS_TEST_FOR_EXCEPTION
917  (static_cast<size_t> (diagOffsets_.extent (0)) !=
918  static_cast<size_t> (blockDiag_.extent (0)),
919  std::logic_error, "diagOffsets_.extent(0) = " <<
920  diagOffsets_.extent (0) << " != blockDiag_.extent(0) = "
921  << blockDiag_.extent (0) <<
922  ". Please report this bug to the Ifpack2 developers.");
923  savedDiagOffsets_ = true;
924  }
925  blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
926 
927  // Use an unmanaged View in this method, so that when we take
928  // subviews of it (to get each diagonal block), we don't have to
929  // touch the reference count. Reference count updates are a
930  // thread scalability bottleneck and have a performance cost even
931  // without using threads.
932  unmanaged_block_diag_type blockDiag = blockDiag_;
933 
934  if (DoL1Method_ && IsParallel_) {
935  const scalar_type two = one + one;
936  const size_t maxLength = A_->getLocalMaxNumRowEntries ();
937  nonconst_local_inds_host_view_type indices ("indices",maxLength);
938  nonconst_values_host_view_type values_ ("values",maxLength * blockSize * blockSize);
939  size_t numEntries = 0;
940 
941  for (LO i = 0; i < lclNumMeshRows; ++i) {
942  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
943  blockCrsA->getLocalRowCopy (i, indices, values_, numEntries);
944  scalar_type * values = reinterpret_cast<scalar_type*>(values_.data());
945 
946  auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
947  for (LO subRow = 0; subRow < blockSize; ++subRow) {
948  magnitude_type diagonal_boost = STM::zero ();
949  for (size_t k = 0 ; k < numEntries ; ++k) {
950  if (indices[k] >= lclNumMeshRows) {
951  const size_t offset = blockSize*blockSize*k + subRow*blockSize;
952  for (LO subCol = 0; subCol < blockSize; ++subCol) {
953  diagonal_boost += STS::magnitude (values[offset+subCol] / two);
954  }
955  }
956  }
957  if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
958  diagBlock(subRow, subRow) += diagonal_boost;
959  }
960  }
961  }
962  }
963 
964  int info = 0;
965  {
966  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
967  typedef typename unmanaged_block_diag_type::execution_space exec_space;
968  typedef Kokkos::RangePolicy<exec_space, LO> range_type;
969 
970  Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
971  // FIXME (mfh 19 May 2017) Different processes may not
972  // necessarily have this error all at the same time, so it would
973  // be better just to preserve a local error state and let users
974  // check.
975  TEUCHOS_TEST_FOR_EXCEPTION
976  (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
977  << " diagonal blocks.");
978  }
979 
980  // In a debug build, do an extra test to make sure that all the
981  // factorizations were computed correctly.
982 #ifdef HAVE_IFPACK2_DEBUG
983  const int numResults = 2;
984  // Use "max = -min" trick to get min and max in a single all-reduce.
985  int lclResults[2], gblResults[2];
986  lclResults[0] = info;
987  lclResults[1] = -info;
988  gblResults[0] = 0;
989  gblResults[1] = 0;
990  reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
991  numResults, lclResults, gblResults);
992  TEUCHOS_TEST_FOR_EXCEPTION
993  (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
994  "Ifpack2::Relaxation::compute: When processing the input "
995  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
996  "failed on one or more (MPI) processes.");
997 #endif // HAVE_IFPACK2_DEBUG
998  serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
999  } // end TimeMonitor scope
1000 
1001  ComputeTime_ += (timer->wallTime() - startTime);
1002  ++NumCompute_;
1003  IsComputed_ = true;
1004 }
1005 
1006 template<class MatrixType>
1008 {
1009  using Teuchos::Array;
1010  using Teuchos::ArrayRCP;
1011  using Teuchos::ArrayView;
1012  using Teuchos::as;
1013  using Teuchos::Comm;
1014  using Teuchos::RCP;
1015  using Teuchos::rcp;
1016  using Teuchos::REDUCE_MAX;
1017  using Teuchos::REDUCE_MIN;
1018  using Teuchos::REDUCE_SUM;
1019  using Teuchos::reduceAll;
1020  using LO = local_ordinal_type;
1021  using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
1023  using IST = typename vector_type::impl_scalar_type;
1024  using KAT = Kokkos::ArithTraits<IST>;
1025 
1026  const char methodName[] = "Ifpack2::Relaxation::compute";
1027  const scalar_type zero = STS::zero ();
1028  const scalar_type one = STS::one ();
1029 
1030  TEUCHOS_TEST_FOR_EXCEPTION
1031  (A_.is_null (), std::runtime_error, methodName << ": "
1032  "The input matrix A is null. Please call setMatrix() with a nonnull "
1033  "input matrix, then call initialize(), before calling this method.");
1034 
1035  // We don't count initialization in compute() time.
1036  if (! isInitialized ()) {
1037  initialize ();
1038  }
1039 
1040  if (hasBlockCrsMatrix_) {
1041  computeBlockCrs ();
1042  return;
1043  }
1044 
1045  Teuchos::RCP<Teuchos::Time> timer =
1046  Teuchos::TimeMonitor::getNewCounter (methodName);
1047  double startTime = timer->wallTime();
1048 
1049  { // Timing of compute starts here.
1050  Teuchos::TimeMonitor timeMon (*timer);
1051 
1052  // Precompute some quantities for "fixing" zero or tiny diagonal
1053  // entries. We'll only use them if this "fixing" is enabled.
1054  //
1055  // SmallTraits covers for the lack of eps() in
1056  // Teuchos::ScalarTraits when its template parameter is not a
1057  // floating-point type. (Ifpack2 sometimes gets instantiated for
1058  // integer Scalar types.)
1059  IST oneOverMinDiagVal = KAT::one () / static_cast<IST> (SmallTraits<scalar_type>::eps ());
1060  if ( MinDiagonalValue_ != zero)
1061  oneOverMinDiagVal = KAT::one () / static_cast<IST> (MinDiagonalValue_);
1062 
1063  // It's helpful not to have to recompute this magnitude each time.
1064  const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1065 
1066  const LO numMyRows = static_cast<LO> (A_->getLocalNumRows ());
1067 
1068  TEUCHOS_TEST_FOR_EXCEPTION
1069  (NumSweeps_ < 0, std::logic_error, methodName
1070  << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1071  "Please report this bug to the Ifpack2 developers.");
1072  IsComputed_ = false;
1073 
1074  if (Diagonal_.is_null()) {
1075  // A_->getLocalDiagCopy fills in all Vector entries, even if the
1076  // matrix has no stored entries in the corresponding rows.
1077  Diagonal_ = rcp (new vector_type (A_->getRowMap (), false));
1078  }
1079 
1080  if (checkDiagEntries_) {
1081  //
1082  // Check diagonal elements, replace zero elements with the minimum
1083  // diagonal value, and store their inverses. Count the number of
1084  // "small" and zero diagonal entries while we're at it.
1085  //
1086  size_t numSmallDiagEntries = 0; // "small" includes zero
1087  size_t numZeroDiagEntries = 0; // # zero diagonal entries
1088  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1089  magnitude_type minMagDiagEntryMag = STM::zero ();
1090  magnitude_type maxMagDiagEntryMag = STM::zero ();
1091 
1092  // FIXME: We are extracting the diagonal more than once. That
1093  // isn't very efficient, but we shouldn't be checking
1094  // diagonal entries at scale anyway.
1095  A_->getLocalDiagCopy (*Diagonal_); // slow path for row matrix
1096  std::unique_ptr<vector_type> origDiag;
1097  origDiag = std::unique_ptr<vector_type> (new vector_type (*Diagonal_, Teuchos::Copy));
1098  auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1099  auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1100  // As we go, keep track of the diagonal entries with the
1101  // least and greatest magnitude. We could use the trick of
1102  // starting min with +Inf and max with -Inf, but that
1103  // doesn't work if scalar_type is a built-in integer type.
1104  // Thus, we have to start by reading the first diagonal
1105  // entry redundantly.
1106  if (numMyRows != 0) {
1107  const magnitude_type d_0_mag = KAT::abs (diag(0));
1108  minMagDiagEntryMag = d_0_mag;
1109  maxMagDiagEntryMag = d_0_mag;
1110  }
1111 
1112  // Go through all the diagonal entries. Compute counts of
1113  // small-magnitude, zero, and negative-real-part entries.
1114  // Invert the diagonal entries that aren't too small. For
1115  // those too small in magnitude, replace them with
1116  // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1117  // happens to be zero).
1118  for (LO i = 0; i < numMyRows; ++i) {
1119  const IST d_i = diag(i);
1120  const magnitude_type d_i_mag = KAT::abs (d_i);
1121  // Work-around for GitHub Issue #5269.
1122  //const magnitude_type d_i_real = KAT::real (d_i);
1123  const auto d_i_real = getRealValue (d_i);
1124 
1125  // We can't compare complex numbers, but we can compare their
1126  // real parts.
1127  if (d_i_real < STM::zero ()) {
1128  ++numNegDiagEntries;
1129  }
1130  if (d_i_mag < minMagDiagEntryMag) {
1131  minMagDiagEntryMag = d_i_mag;
1132  }
1133  if (d_i_mag > maxMagDiagEntryMag) {
1134  maxMagDiagEntryMag = d_i_mag;
1135  }
1136 
1137  if (fixTinyDiagEntries_) {
1138  // <= not <, in case minDiagValMag is zero.
1139  if (d_i_mag <= minDiagValMag) {
1140  ++numSmallDiagEntries;
1141  if (d_i_mag == STM::zero ()) {
1142  ++numZeroDiagEntries;
1143  }
1144  diag(i) = oneOverMinDiagVal;
1145  }
1146  else {
1147  diag(i) = KAT::one () / d_i;
1148  }
1149  }
1150  else { // Don't fix zero or tiny diagonal entries.
1151  // <= not <, in case minDiagValMag is zero.
1152  if (d_i_mag <= minDiagValMag) {
1153  ++numSmallDiagEntries;
1154  if (d_i_mag == STM::zero ()) {
1155  ++numZeroDiagEntries;
1156  }
1157  }
1158  diag(i) = KAT::one () / d_i;
1159  }
1160  }
1161 
1162  // Collect global data about the diagonal entries. Only do this
1163  // (which involves O(1) all-reduces) if the user asked us to do
1164  // the extra work.
1165  //
1166  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1167  // zero rows. Fixing this requires one of two options:
1168  //
1169  // 1. Use a custom reduction operation that ignores processes
1170  // with zero diagonal entries.
1171  // 2. Split the communicator, compute all-reduces using the
1172  // subcommunicator over processes that have a nonzero number
1173  // of diagonal entries, and then broadcast from one of those
1174  // processes (if there is one) to the processes in the other
1175  // subcommunicator.
1176  const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1177 
1178  // Compute global min and max magnitude of entries.
1179  Array<magnitude_type> localVals (2);
1180  localVals[0] = minMagDiagEntryMag;
1181  // (- (min (- x))) is the same as (max x).
1182  localVals[1] = -maxMagDiagEntryMag;
1183  Array<magnitude_type> globalVals (2);
1184  reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1185  localVals.getRawPtr (),
1186  globalVals.getRawPtr ());
1187  globalMinMagDiagEntryMag_ = globalVals[0];
1188  globalMaxMagDiagEntryMag_ = -globalVals[1];
1189 
1190  Array<size_t> localCounts (3);
1191  localCounts[0] = numSmallDiagEntries;
1192  localCounts[1] = numZeroDiagEntries;
1193  localCounts[2] = numNegDiagEntries;
1194  Array<size_t> globalCounts (3);
1195  reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1196  localCounts.getRawPtr (),
1197  globalCounts.getRawPtr ());
1198  globalNumSmallDiagEntries_ = globalCounts[0];
1199  globalNumZeroDiagEntries_ = globalCounts[1];
1200  globalNumNegDiagEntries_ = globalCounts[2];
1201 
1202  // Compute and save the difference between the computed inverse
1203  // diagonal, and the original diagonal's inverse.
1204  vector_type diff (A_->getRowMap ());
1205  diff.reciprocal (*origDiag);
1206  diff.update (-one, *Diagonal_, one);
1207  globalDiagNormDiff_ = diff.norm2 ();
1208  }
1209 
1210  // Extract the diagonal entries. The CrsMatrix graph version is
1211  // faster than the row matrix version for subsequent calls to
1212  // compute(), since it caches the diagonal offsets.
1213 
1214  bool debugAgainstSlowPath = false;
1215 
1216  auto crsMat = Details::getCrsMatrix(A_);
1217 
1218  if (crsMat.get() && crsMat->isFillComplete ()) {
1219  // The invDiagKernel object computes diagonal offsets if
1220  // necessary. The "compute" call extracts diagonal enties,
1221  // optionally applies the L1 method and replacement of small
1222  // entries, and then inverts.
1223  if (invDiagKernel_.is_null())
1224  invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(crsMat));
1225  else
1226  invDiagKernel_->setMatrix(crsMat);
1227  invDiagKernel_->compute(*Diagonal_,
1228  DoL1Method_ && IsParallel_, L1Eta_,
1229  fixTinyDiagEntries_, minDiagValMag);
1230  savedDiagOffsets_ = true;
1231 
1232  // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1233  // environment variable to control this behavior at run time.
1234 #ifdef HAVE_IFPACK2_DEBUG
1235  debugAgainstSlowPath = true;
1236 #endif
1237  }
1238 
1239  if (crsMat.is_null() || ! crsMat->isFillComplete () || debugAgainstSlowPath) {
1240  // We could not call the CrsMatrix version above, or want to
1241  // debug by comparing against the slow path.
1242 
1243  // FIXME: The L1 method in this code path runs on host, since we
1244  // don't have device access for row matrices.
1245 
1246  RCP<vector_type> Diagonal;
1247  if (debugAgainstSlowPath)
1248  Diagonal = rcp(new vector_type(A_->getRowMap ()));
1249  else
1250  Diagonal = Diagonal_;
1251 
1252  A_->getLocalDiagCopy (*Diagonal); // slow path for row matrix
1253 
1254  // Setup for L1 Methods.
1255  // Here we add half the value of the off-processor entries in the row,
1256  // but only if diagonal isn't sufficiently large.
1257  //
1258  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1259  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1260  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1261  //
1262  if (DoL1Method_ && IsParallel_) {
1263  const row_matrix_type& A_row = *A_;
1264  auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1265  const magnitude_type two = STM::one () + STM::one ();
1266  const size_t maxLength = A_row.getLocalMaxNumRowEntries ();
1267  nonconst_local_inds_host_view_type indices("indices",maxLength);
1268  nonconst_values_host_view_type values("values",maxLength);
1269  size_t numEntries;
1270 
1271  for (LO i = 0; i < numMyRows; ++i) {
1272  A_row.getLocalRowCopy (i, indices, values, numEntries);
1273  magnitude_type diagonal_boost = STM::zero ();
1274  for (size_t k = 0 ; k < numEntries; ++k) {
1275  if (indices[k] >= numMyRows) {
1276  diagonal_boost += STS::magnitude (values[k] / two);
1277  }
1278  }
1279  if (KAT::magnitude (diag(i, 0)) < L1Eta_ * diagonal_boost) {
1280  diag(i, 0) += diagonal_boost;
1281  }
1282  }
1283  }
1284 
1285  //
1286  // Invert the matrix's diagonal entries (in Diagonal).
1287  //
1288  if (fixTinyDiagEntries_) {
1289  // Go through all the diagonal entries. Invert those that
1290  // aren't too small in magnitude. For those that are too
1291  // small in magnitude, replace them with oneOverMinDiagVal.
1292  auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1293  Kokkos::parallel_for(Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1294  KOKKOS_LAMBDA (local_ordinal_type i) {
1295  auto d_i = localDiag(i, 0);
1296  const magnitude_type d_i_mag = KAT::magnitude (d_i);
1297  // <= not <, in case minDiagValMag is zero.
1298  if (d_i_mag <= minDiagValMag) {
1299  d_i = oneOverMinDiagVal;
1300  }
1301  else {
1302  // For Stokhos types, operator/ returns an expression
1303  // type. Explicitly convert to IST before returning.
1304  d_i = IST (KAT::one () / d_i);
1305  }
1306  localDiag(i, 0) = d_i;
1307  });
1308  }
1309  else { // don't fix tiny or zero diagonal entries
1310  Diagonal->reciprocal (*Diagonal);
1311  }
1312 
1313  if (debugAgainstSlowPath) {
1314  // Validate the fast-path diagonal against the slow-path diagonal.
1315  Diagonal->update (STS::one (), *Diagonal_, -STS::one ());
1316  const magnitude_type err = Diagonal->normInf ();
1317  // The two diagonals should be exactly the same, so their
1318  // difference should be exactly zero.
1319  TEUCHOS_TEST_FOR_EXCEPTION
1320  (err > 100*STM::eps(), std::logic_error, methodName << ": "
1321  << "\"fast-path\" diagonal computation failed. "
1322  "\\|D1 - D2\\|_inf = " << err << ".");
1323  }
1324  }
1325 
1326  // Count floating-point operations of computing the inverse diagonal.
1327  //
1328  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1329  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1330  ComputeFlops_ += 4.0 * numMyRows;
1331  }
1332  else {
1333  ComputeFlops_ += numMyRows;
1334  }
1335 
1336  if (PrecType_ == Ifpack2::Details::MTGS ||
1337  PrecType_ == Ifpack2::Details::MTSGS ||
1338  PrecType_ == Ifpack2::Details::GS2 ||
1339  PrecType_ == Ifpack2::Details::SGS2) {
1340 
1341  //KokkosKernels GaussSeidel Initialization.
1342  using scalar_view_t = typename local_matrix_device_type::values_type;
1343 
1344  TEUCHOS_TEST_FOR_EXCEPTION
1345  (crsMat.is_null(), std::logic_error, methodName << ": "
1346  "Multithreaded Gauss-Seidel methods currently only work "
1347  "when the input matrix is a Tpetra::CrsMatrix.");
1348  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
1349 
1350  //TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1351  //const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1352  auto diagView_2d = Diagonal_->getLocalViewDevice (Tpetra::Access::ReadWrite);
1353  scalar_view_t diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1354  KokkosSparse::Experimental::gauss_seidel_numeric(
1355  mtKernelHandle_.getRawPtr (),
1356  A_->getLocalNumRows (),
1357  A_->getLocalNumCols (),
1358  kcsr.graph.row_map,
1359  kcsr.graph.entries,
1360  kcsr.values,
1361  diagView_1d,
1362  is_matrix_structurally_symmetric_);
1363  }
1364  else if(PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1365  if(crsMat)
1366  serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1367  else
1368  serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1369  }
1370  } // end TimeMonitor scope
1371 
1372  ComputeTime_ += (timer->wallTime() - startTime);
1373  ++NumCompute_;
1374  IsComputed_ = true;
1375 }
1376 
1377 
1378 template<class MatrixType>
1379 void
1381 ApplyInverseRichardson (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1382  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1383 {
1384  using Teuchos::as;
1385  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1386  const double numVectors = as<double> (X.getNumVectors ());
1387  if (ZeroStartingSolution_) {
1388  // For the first Richardson sweep, if we are allowed to assume that
1389  // the initial guess is zero, then Richardson is just alpha times the RHS
1390  // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1391  Y.scale(DampingFactor_,X);
1392 
1393  // Count (global) floating-point operations. Ifpack2 represents
1394  // this as a floating-point number rather than an integer, so that
1395  // overflow (for a very large number of calls, or a very large
1396  // problem) is approximate instead of catastrophic.
1397  double flopUpdate = 0.0;
1398  if (DampingFactor_ == STS::one ()) {
1399  // Y(i,j) = X(i,j): one multiply for each entry of Y.
1400  flopUpdate = numGlobalRows * numVectors;
1401  } else {
1402  // Y(i,j) = DampingFactor_ * X(i,j):
1403  // One multiplies per entry of Y.
1404  flopUpdate = numGlobalRows * numVectors;
1405  }
1406  ApplyFlops_ += flopUpdate;
1407  if (NumSweeps_ == 1) {
1408  return;
1409  }
1410  }
1411  // If we were allowed to assume that the starting guess was zero,
1412  // then we have already done the first sweep above.
1413  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1414 
1415  // Allocate a multivector if the cached one isn't perfect
1416  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1417 
1418  for (int j = startSweep; j < NumSweeps_; ++j) {
1419  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1420  Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1421  Y.update(DampingFactor_,*cachedMV_,STS::one());
1422  }
1423 
1424  // For each column of output, for each pass over the matrix:
1425  //
1426  // - One + and one * for each matrix entry
1427  // - One / and one + for each row of the matrix
1428  // - If the damping factor is not one: one * for each row of the
1429  // matrix. (It's not fair to count this if the damping factor is
1430  // one, since the implementation could skip it. Whether it does
1431  // or not is the implementation's choice.)
1432 
1433  // Floating-point operations due to the damping factor, per matrix
1434  // row, per direction, per columm of output.
1435  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1436  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1437  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1438  (2.0 * numGlobalNonzeros + dampingFlops);
1439 }
1440 
1441 
1442 template<class MatrixType>
1443 void
1444 Relaxation<MatrixType>::
1445 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1446  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1447 {
1448  using Teuchos::as;
1449  if (hasBlockCrsMatrix_) {
1450  ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1451  return;
1452  }
1453 
1454  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1455  const double numVectors = as<double> (X.getNumVectors ());
1456  if (ZeroStartingSolution_) {
1457  // For the first Jacobi sweep, if we are allowed to assume that
1458  // the initial guess is zero, then Jacobi is just diagonal
1459  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1460  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1461  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1462 
1463  // Count (global) floating-point operations. Ifpack2 represents
1464  // this as a floating-point number rather than an integer, so that
1465  // overflow (for a very large number of calls, or a very large
1466  // problem) is approximate instead of catastrophic.
1467  double flopUpdate = 0.0;
1468  if (DampingFactor_ == STS::one ()) {
1469  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1470  flopUpdate = numGlobalRows * numVectors;
1471  } else {
1472  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1473  // Two multiplies per entry of Y.
1474  flopUpdate = 2.0 * numGlobalRows * numVectors;
1475  }
1476  ApplyFlops_ += flopUpdate;
1477  if (NumSweeps_ == 1) {
1478  return;
1479  }
1480  }
1481  // If we were allowed to assume that the starting guess was zero,
1482  // then we have already done the first sweep above.
1483  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1484 
1485  // Allocate a multivector if the cached one isn't perfect
1486  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1487 
1488  for (int j = startSweep; j < NumSweeps_; ++j) {
1489  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1490  Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1491  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1492  }
1493 
1494  // For each column of output, for each pass over the matrix:
1495  //
1496  // - One + and one * for each matrix entry
1497  // - One / and one + for each row of the matrix
1498  // - If the damping factor is not one: one * for each row of the
1499  // matrix. (It's not fair to count this if the damping factor is
1500  // one, since the implementation could skip it. Whether it does
1501  // or not is the implementation's choice.)
1502 
1503  // Floating-point operations due to the damping factor, per matrix
1504  // row, per direction, per columm of output.
1505  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1506  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1507  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1508  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1509 }
1510 
1511 template<class MatrixType>
1512 void
1513 Relaxation<MatrixType>::
1514 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1515  local_ordinal_type,
1516  global_ordinal_type,
1517  node_type>& X,
1518  Tpetra::MultiVector<scalar_type,
1519  local_ordinal_type,
1520  global_ordinal_type,
1521  node_type>& Y) const
1522 {
1523  using Tpetra::BlockMultiVector;
1524  using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1525  global_ordinal_type, node_type>;
1526 
1527  const block_crs_matrix_type* blockMatConst =
1528  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1529  TEUCHOS_TEST_FOR_EXCEPTION
1530  (blockMatConst == nullptr, std::logic_error, "This method should "
1531  "never be called if the matrix A_ is not a BlockCrsMatrix. "
1532  "Please report this bug to the Ifpack2 developers.");
1533  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1534  // This is because applyBlock() is nonconst (more accurate), while
1535  // apply() is const (required by Tpetra::Operator interface, but a
1536  // lie, because it possibly allocates temporary buffers).
1537  block_crs_matrix_type* blockMat =
1538  const_cast<block_crs_matrix_type*> (blockMatConst);
1539 
1540  auto meshRowMap = blockMat->getRowMap ();
1541  auto meshColMap = blockMat->getColMap ();
1542  const local_ordinal_type blockSize = blockMat->getBlockSize ();
1543 
1544  BMV X_blk (X, *meshColMap, blockSize);
1545  BMV Y_blk (Y, *meshRowMap, blockSize);
1546 
1547  if (ZeroStartingSolution_) {
1548  // For the first sweep, if we are allowed to assume that the
1549  // initial guess is zero, then block Jacobi is just block diagonal
1550  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1551  Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1552  if (NumSweeps_ == 1) {
1553  return;
1554  }
1555  }
1556 
1557  auto pointRowMap = Y.getMap ();
1558  const size_t numVecs = X.getNumVectors ();
1559 
1560  // We don't need to initialize the result MV, since the sparse
1561  // mat-vec will clobber its contents anyway.
1562  BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1563 
1564  // If we were allowed to assume that the starting guess was zero,
1565  // then we have already done the first sweep above.
1566  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1567 
1568  for (int j = startSweep; j < NumSweeps_; ++j) {
1569  blockMat->applyBlock (Y_blk, A_times_Y);
1570  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1571  Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1572  X_blk, A_times_Y, STS::one ());
1573  }
1574 }
1575 
1576 template<class MatrixType>
1577 void
1578 Relaxation<MatrixType>::
1579 ApplyInverseSerialGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1580  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1581  Tpetra::ESweepDirection direction) const
1582 {
1583  using this_type = Relaxation<MatrixType>;
1584  // The CrsMatrix version is faster, because it can access the sparse
1585  // matrix data directly, rather than by copying out each row's data
1586  // in turn. Thus, we check whether the RowMatrix is really a
1587  // CrsMatrix.
1588  //
1589  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1590  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1591  // will still be correct if the cast fails, but it will use an
1592  // unoptimized kernel.
1593  auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
1594  auto crsMat = Details::getCrsMatrix(A_);
1595  if (blockCrsMat.get()) {
1596  const_cast<this_type&> (*this).ApplyInverseSerialGS_BlockCrsMatrix (*blockCrsMat, X, Y, direction);
1597  }
1598  else if (crsMat.get()) {
1599  ApplyInverseSerialGS_CrsMatrix (*crsMat, X, Y, direction);
1600  }
1601  else {
1602  ApplyInverseSerialGS_RowMatrix (X, Y, direction);
1603  }
1604 }
1605 
1606 
1607 template<class MatrixType>
1608 void
1609 Relaxation<MatrixType>::
1610 ApplyInverseSerialGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1611  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1612  Tpetra::ESweepDirection direction) const {
1613  using Teuchos::Array;
1614  using Teuchos::ArrayRCP;
1615  using Teuchos::ArrayView;
1616  using Teuchos::as;
1617  using Teuchos::RCP;
1618  using Teuchos::rcp;
1619  using Teuchos::rcpFromRef;
1620  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1621 
1622  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1623  // starting multivector itself. The generic RowMatrix version here
1624  // does not, so we have to zero out Y here.
1625  if (ZeroStartingSolution_) {
1626  Y.putScalar (STS::zero ());
1627  }
1628 
1629  size_t NumVectors = X.getNumVectors();
1630 
1631  RCP<MV> Y2;
1632  if (IsParallel_) {
1633  if (Importer_.is_null ()) { // domain and column Maps are the same.
1634  updateCachedMultiVector (Y.getMap (), NumVectors);
1635  }
1636  else {
1637  updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
1638  }
1639  Y2 = cachedMV_;
1640  }
1641  else {
1642  Y2 = rcpFromRef (Y);
1643  }
1644 
1645  for (int j = 0; j < NumSweeps_; ++j) {
1646  // data exchange is here, once per sweep
1647  if (IsParallel_) {
1648  if (Importer_.is_null ()) {
1649  // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1650  // clear if this is correct. Reevaluate at some point.
1651 
1652  *Y2 = Y; // just copy, since domain and column Maps are the same
1653  } else {
1654  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1655  }
1656  }
1657  serialGaussSeidel_->apply(*Y2, X, direction);
1658 
1659  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1660  if (IsParallel_) {
1661  Tpetra::deep_copy (Y, *Y2);
1662  }
1663  }
1664 
1665  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1666  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1667  const double numVectors = as<double> (X.getNumVectors ());
1668  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1669  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1670  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1671  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1672 }
1673 
1674 
1675 template<class MatrixType>
1676 void
1677 Relaxation<MatrixType>::
1678 ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1679  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1680  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1681  Tpetra::ESweepDirection direction) const
1682 {
1683  using Teuchos::null;
1684  using Teuchos::RCP;
1685  using Teuchos::rcp;
1686  using Teuchos::rcpFromRef;
1687  using Teuchos::rcp_const_cast;
1688  typedef scalar_type Scalar;
1689  const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1690  const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1691 
1692  TEUCHOS_TEST_FOR_EXCEPTION(
1693  ! A.isFillComplete (), std::runtime_error,
1694  prefix << "The matrix is not fill complete.");
1695  TEUCHOS_TEST_FOR_EXCEPTION(
1696  NumSweeps_ < 0, std::invalid_argument,
1697  prefix << "The number of sweeps must be nonnegative, "
1698  "but you provided numSweeps = " << NumSweeps_ << " < 0.");
1699 
1700  if (NumSweeps_ == 0) {
1701  return;
1702  }
1703 
1704  RCP<const import_type> importer = A.getGraph ()->getImporter ();
1705 
1706  RCP<const map_type> domainMap = A.getDomainMap ();
1707  RCP<const map_type> rangeMap = A.getRangeMap ();
1708  RCP<const map_type> rowMap = A.getGraph ()->getRowMap ();
1709  RCP<const map_type> colMap = A.getGraph ()->getColMap ();
1710 
1711 #ifdef HAVE_IFPACK2_DEBUG
1712  {
1713  // The relation 'isSameAs' is transitive. It's also a
1714  // collective, so we don't have to do a "shared" test for
1715  // exception (i.e., a global reduction on the test value).
1716  TEUCHOS_TEST_FOR_EXCEPTION(
1717  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1718  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1719  "multivector X be in the domain Map of the matrix.");
1720  TEUCHOS_TEST_FOR_EXCEPTION(
1721  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1722  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1723  "B be in the range Map of the matrix.");
1724  TEUCHOS_TEST_FOR_EXCEPTION(
1725  ! Diagonal_->getMap ()->isSameAs (*rowMap), std::runtime_error,
1726  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1727  "D be in the row Map of the matrix.");
1728  TEUCHOS_TEST_FOR_EXCEPTION(
1729  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1730  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1731  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1732  TEUCHOS_TEST_FOR_EXCEPTION(
1733  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1734  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1735  "the range Map of the matrix be the same.");
1736  }
1737 #endif
1738 
1739  // Fetch a (possibly cached) temporary column Map multivector
1740  // X_colMap, and a domain Map view X_domainMap of it. Both have
1741  // constant stride by construction. We know that the domain Map
1742  // must include the column Map, because our Gauss-Seidel kernel
1743  // requires that the row Map, domain Map, and range Map are all
1744  // the same, and that each process owns all of its own diagonal
1745  // entries of the matrix.
1746 
1747  RCP<multivector_type> X_colMap;
1748  RCP<multivector_type> X_domainMap;
1749  bool copyBackOutput = false;
1750  if (importer.is_null ()) {
1751  X_colMap = Teuchos::rcpFromRef (X);
1752  X_domainMap = Teuchos::rcpFromRef (X);
1753  // Column Map and domain Map are the same, so there are no
1754  // remote entries. Thus, if we are not setting the initial
1755  // guess to zero, we don't have to worry about setting remote
1756  // entries to zero, even though we are not doing an Import in
1757  // this case.
1758  if (ZeroStartingSolution_) {
1759  X_colMap->putScalar (ZERO);
1760  }
1761  // No need to copy back to X at end.
1762  }
1763  else { // Column Map and domain Map are _not_ the same.
1764  updateCachedMultiVector(colMap, X.getNumVectors());
1765  X_colMap = cachedMV_;
1766  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1767 
1768  if (ZeroStartingSolution_) {
1769  // No need for an Import, since we're filling with zeros.
1770  X_colMap->putScalar (ZERO);
1771  } else {
1772  // We could just copy X into X_domainMap. However, that
1773  // wastes a copy, because the Import also does a copy (plus
1774  // communication). Since the typical use case for
1775  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1776  // don't want to waste that copy. Thus, we do the Import
1777  // here, and skip the first Import in the first sweep.
1778  // Importing directly from X effects the copy into X_domainMap
1779  // (which is a view of X_colMap).
1780  X_colMap->doImport (X, *importer, Tpetra::INSERT);
1781  }
1782  copyBackOutput = true; // Don't forget to copy back at end.
1783  } // if column and domain Maps are (not) the same
1784 
1785  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1786  if (! importer.is_null () && sweep > 0) {
1787  // We already did the first Import for the zeroth sweep above,
1788  // if it was necessary.
1789  X_colMap->doImport (*X_domainMap, *importer, Tpetra::INSERT);
1790  }
1791  // Do local Gauss-Seidel (forward, backward or symmetric)
1792  serialGaussSeidel_->apply(*X_colMap, B, direction);
1793  }
1794 
1795  if (copyBackOutput) {
1796  try {
1797  deep_copy (X , *X_domainMap); // Copy result back into X.
1798  } catch (std::exception& e) {
1799  TEUCHOS_TEST_FOR_EXCEPTION(
1800  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1801  "threw an exception: " << e.what ());
1802  }
1803  }
1804 
1805  // For each column of output, for each sweep over the matrix:
1806  //
1807  // - One + and one * for each matrix entry
1808  // - One / and one + for each row of the matrix
1809  // - If the damping factor is not one: one * for each row of the
1810  // matrix. (It's not fair to count this if the damping factor is
1811  // one, since the implementation could skip it. Whether it does
1812  // or not is the implementation's choice.)
1813 
1814  // Floating-point operations due to the damping factor, per matrix
1815  // row, per direction, per columm of output.
1816  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1817  const double numVectors = X.getNumVectors ();
1818  const double numGlobalRows = A_->getGlobalNumRows ();
1819  const double numGlobalNonzeros = A_->getGlobalNumEntries ();
1820  ApplyFlops_ += NumSweeps_ * numVectors *
1821  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1822 }
1823 
1824 template<class MatrixType>
1825 void
1826 Relaxation<MatrixType>::
1827 ApplyInverseSerialGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1828  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1829  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1830  Tpetra::ESweepDirection direction)
1831 {
1832  using Tpetra::INSERT;
1833  using Teuchos::RCP;
1834  using Teuchos::rcp;
1835  using Teuchos::rcpFromRef;
1836 
1837  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1838  //
1839  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1840  // multiple right-hand sides, unless the input or output MultiVector
1841  // does not have constant stride. We should check for that case
1842  // here, in case it doesn't work in localGaussSeidel (which is
1843  // entirely possible).
1844  block_multivector_type yBlock(Y, *(A.getGraph ()->getDomainMap()), A.getBlockSize());
1845  const block_multivector_type xBlock(X, *(A.getColMap ()), A.getBlockSize());
1846 
1847  bool performImport = false;
1848  RCP<block_multivector_type> yBlockCol;
1849  if (Importer_.is_null()) {
1850  yBlockCol = rcpFromRef(yBlock);
1851  }
1852  else {
1853  if (yBlockColumnPointMap_.is_null() ||
1854  yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1855  yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1856  yBlockColumnPointMap_ =
1857  rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1858  static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1859  }
1860  yBlockCol = yBlockColumnPointMap_;
1861  if (pointImporter_.is_null()) {
1862  auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1863  auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1864  pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1865  }
1866  performImport = true;
1867  }
1868 
1869  multivector_type yBlock_mv;
1870  multivector_type yBlockCol_mv;
1871  RCP<const multivector_type> yBlockColPointDomain;
1872  if (performImport) { // create views (shallow copies)
1873  yBlock_mv = yBlock.getMultiVectorView();
1874  yBlockCol_mv = yBlockCol->getMultiVectorView();
1875  yBlockColPointDomain =
1876  yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1877  }
1878 
1879  if (ZeroStartingSolution_) {
1880  yBlockCol->putScalar(STS::zero ());
1881  }
1882  else if (performImport) {
1883  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1884  }
1885 
1886  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1887  if (performImport && sweep > 0) {
1888  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1889  }
1890  serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1891  if (performImport) {
1892  Tpetra::deep_copy(Y, *yBlockColPointDomain);
1893  }
1894  }
1895 }
1896 
1897 template<class MatrixType>
1898 void
1899 Relaxation<MatrixType>::
1900 ApplyInverseMTGS_CrsMatrix(
1901  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1902  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1903  Tpetra::ESweepDirection direction) const
1904 {
1905  using Teuchos::null;
1906  using Teuchos::RCP;
1907  using Teuchos::rcp;
1908  using Teuchos::rcpFromRef;
1909  using Teuchos::rcp_const_cast;
1910  using Teuchos::as;
1911 
1912  typedef scalar_type Scalar;
1913 
1914  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1915  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1916 
1917  auto crsMat = Details::getCrsMatrix(A_);
1918  TEUCHOS_TEST_FOR_EXCEPTION
1919  (crsMat.is_null(), std::logic_error, "Ifpack2::Relaxation::apply: "
1920  "Multithreaded Gauss-Seidel methods currently only work when the "
1921  "input matrix is a Tpetra::CrsMatrix.");
1922 
1923  //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1924  TEUCHOS_TEST_FOR_EXCEPTION
1925  (! localSmoothingIndices_.is_null (), std::logic_error,
1926  "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1927  "implement the case where the user supplies an iteration order. "
1928  "This error used to appear as \"MT GaussSeidel ignores the given "
1929  "order\". "
1930  "I tried to add more explanation, but I didn't implement \"MT "
1931  "GaussSeidel\" [sic]. "
1932  "You'll have to ask the person who did.");
1933 
1934  TEUCHOS_TEST_FOR_EXCEPTION
1935  (! crsMat->isFillComplete (), std::runtime_error, prefix << "The "
1936  "input CrsMatrix is not fill complete. Please call fillComplete "
1937  "on the matrix, then do setup again, before calling apply(). "
1938  "\"Do setup\" means that if only the matrix's values have changed "
1939  "since last setup, you need only call compute(). If the matrix's "
1940  "structure may also have changed, you must first call initialize(), "
1941  "then call compute(). If you have not set up this preconditioner "
1942  "for this matrix before, you must first call initialize(), then "
1943  "call compute().");
1944  TEUCHOS_TEST_FOR_EXCEPTION
1945  (NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = "
1946  << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1947  "developers.");
1948  if (NumSweeps_ == 0) {
1949  return;
1950  }
1951 
1952  RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1953  TEUCHOS_TEST_FOR_EXCEPTION(
1954  ! crsMat->getGraph ()->getExporter ().is_null (), std::runtime_error,
1955  "This method's implementation currently requires that the matrix's row, "
1956  "domain, and range Maps be the same. This cannot be the case, because "
1957  "the matrix has a nontrivial Export object.");
1958 
1959  RCP<const map_type> domainMap = crsMat->getDomainMap ();
1960  RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1961  RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1962  RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1963 
1964 #ifdef HAVE_IFPACK2_DEBUG
1965  {
1966  // The relation 'isSameAs' is transitive. It's also a
1967  // collective, so we don't have to do a "shared" test for
1968  // exception (i.e., a global reduction on the test value).
1969  TEUCHOS_TEST_FOR_EXCEPTION(
1970  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1971  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1972  "multivector X be in the domain Map of the matrix.");
1973  TEUCHOS_TEST_FOR_EXCEPTION(
1974  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1975  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1976  "B be in the range Map of the matrix.");
1977  TEUCHOS_TEST_FOR_EXCEPTION(
1978  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1979  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1980  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1981  TEUCHOS_TEST_FOR_EXCEPTION(
1982  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1983  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1984  "the range Map of the matrix be the same.");
1985  }
1986 #else
1987  // Forestall any compiler warnings for unused variables.
1988  (void) rangeMap;
1989  (void) rowMap;
1990 #endif // HAVE_IFPACK2_DEBUG
1991 
1992  // Fetch a (possibly cached) temporary column Map multivector
1993  // X_colMap, and a domain Map view X_domainMap of it. Both have
1994  // constant stride by construction. We know that the domain Map
1995  // must include the column Map, because our Gauss-Seidel kernel
1996  // requires that the row Map, domain Map, and range Map are all
1997  // the same, and that each process owns all of its own diagonal
1998  // entries of the matrix.
1999 
2000  RCP<multivector_type> X_colMap;
2001  RCP<multivector_type> X_domainMap;
2002  bool copyBackOutput = false;
2003  if (importer.is_null ()) {
2004  if (X.isConstantStride ()) {
2005  X_colMap = rcpFromRef (X);
2006  X_domainMap = rcpFromRef (X);
2007 
2008  // Column Map and domain Map are the same, so there are no
2009  // remote entries. Thus, if we are not setting the initial
2010  // guess to zero, we don't have to worry about setting remote
2011  // entries to zero, even though we are not doing an Import in
2012  // this case.
2013  if (ZeroStartingSolution_) {
2014  X_colMap->putScalar (ZERO);
2015  }
2016  // No need to copy back to X at end.
2017  }
2018  else {
2019  // We must copy X into a constant stride multivector.
2020  // Just use the cached column Map multivector for that.
2021  // force=true means fill with zeros, so no need to fill
2022  // remote entries (not in domain Map) with zeros.
2023  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2024  X_colMap = cachedMV_;
2025  // X_domainMap is always a domain Map view of the column Map
2026  // multivector. In this case, the domain and column Maps are
2027  // the same, so X_domainMap _is_ X_colMap.
2028  X_domainMap = X_colMap;
2029  if (! ZeroStartingSolution_) { // Don't copy if zero initial guess
2030  try {
2031  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
2032  } catch (std::exception& e) {
2033  std::ostringstream os;
2034  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2035  "deep_copy(*X_domainMap, X) threw an exception: "
2036  << e.what () << ".";
2037  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2038  }
2039  }
2040  copyBackOutput = true; // Don't forget to copy back at end.
2041  /*
2042  TPETRA_EFFICIENCY_WARNING(
2043  ! X.isConstantStride (),
2044  std::runtime_error,
2045  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
2046  "kernel requires that X and B both have constant stride. Since X "
2047  "does not have constant stride, we had to make a copy. This is a "
2048  "limitation of the current implementation and not your fault, but we "
2049  "still report it as an efficiency warning for your information.");
2050  */
2051  }
2052  }
2053  else { // Column Map and domain Map are _not_ the same.
2054  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2055  X_colMap = cachedMV_;
2056 
2057  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
2058 
2059  if (ZeroStartingSolution_) {
2060  // No need for an Import, since we're filling with zeros.
2061  X_colMap->putScalar (ZERO);
2062  } else {
2063  // We could just copy X into X_domainMap. However, that
2064  // wastes a copy, because the Import also does a copy (plus
2065  // communication). Since the typical use case for
2066  // Gauss-Seidel is a small number of sweeps (2 is typical), we
2067  // don't want to waste that copy. Thus, we do the Import
2068  // here, and skip the first Import in the first sweep.
2069  // Importing directly from X effects the copy into X_domainMap
2070  // (which is a view of X_colMap).
2071  X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
2072  }
2073  copyBackOutput = true; // Don't forget to copy back at end.
2074  } // if column and domain Maps are (not) the same
2075 
2076  // The Gauss-Seidel / SOR kernel expects multivectors of constant
2077  // stride. X_colMap is by construction, but B might not be. If
2078  // it's not, we have to make a copy.
2079  RCP<const multivector_type> B_in;
2080  if (B.isConstantStride ()) {
2081  B_in = rcpFromRef (B);
2082  }
2083  else {
2084  // Range Map and row Map are the same in this case, so we can
2085  // use the cached row Map multivector to store a constant stride
2086  // copy of B.
2087  RCP<multivector_type> B_in_nonconst = rcp (new multivector_type(rowMap, B.getNumVectors()));
2088  try {
2089  deep_copy (*B_in_nonconst, B);
2090  } catch (std::exception& e) {
2091  std::ostringstream os;
2092  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2093  "deep_copy(*B_in_nonconst, B) threw an exception: "
2094  << e.what () << ".";
2095  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2096  }
2097  B_in = rcp_const_cast<const multivector_type> (B_in_nonconst);
2098 
2099  /*
2100  TPETRA_EFFICIENCY_WARNING(
2101  ! B.isConstantStride (),
2102  std::runtime_error,
2103  "MTGaussSeidel: The current implementation requires that B have "
2104  "constant stride. Since B does not have constant stride, we had to "
2105  "copy it into a separate constant-stride multivector. This is a "
2106  "limitation of the current implementation and not your fault, but we "
2107  "still report it as an efficiency warning for your information.");
2108  */
2109  }
2110 
2111  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
2112 
2113  bool update_y_vector = true;
2114  //false as it was done up already, and we dont want to zero it in each sweep.
2115  bool zero_x_vector = false;
2116 
2117  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2118  if (! importer.is_null () && sweep > 0) {
2119  // We already did the first Import for the zeroth sweep above,
2120  // if it was necessary.
2121  X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2122  }
2123 
2124  if (direction == Tpetra::Symmetric) {
2125  KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2126  (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2127  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2128  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2129  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2130  zero_x_vector, update_y_vector, DampingFactor_, 1);
2131  }
2132  else if (direction == Tpetra::Forward) {
2133  KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2134  (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2135  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2136  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2137  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2138  zero_x_vector, update_y_vector, DampingFactor_, 1);
2139  }
2140  else if (direction == Tpetra::Backward) {
2141  KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2142  (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2143  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2144  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2145  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2146  zero_x_vector, update_y_vector, DampingFactor_, 1);
2147  }
2148  else {
2149  TEUCHOS_TEST_FOR_EXCEPTION(
2150  true, std::invalid_argument,
2151  prefix << "The 'direction' enum does not have any of its valid "
2152  "values: Forward, Backward, or Symmetric.");
2153  }
2154  update_y_vector = false;
2155  }
2156 
2157  if (copyBackOutput) {
2158  try {
2159  deep_copy (X , *X_domainMap); // Copy result back into X.
2160  } catch (std::exception& e) {
2161  TEUCHOS_TEST_FOR_EXCEPTION(
2162  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2163  "threw an exception: " << e.what ());
2164  }
2165  }
2166 
2167  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2168  const double numVectors = as<double> (X.getNumVectors ());
2169  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2170  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2171  double ApplyFlops = NumSweeps_ * numVectors *
2172  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2173  if (direction == Tpetra::Symmetric)
2174  ApplyFlops *= 2.0;
2175  ApplyFlops_ += ApplyFlops;
2176 }
2177 
2178 template<class MatrixType>
2180 {
2181  std::ostringstream os;
2182 
2183  // Output is a valid YAML dictionary in flow style. If you don't
2184  // like everything on a single line, you should call describe()
2185  // instead.
2186  os << "\"Ifpack2::Relaxation\": {";
2187 
2188  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2189  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2190 
2191  // It's useful to print this instance's relaxation method (Jacobi,
2192  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2193  // than that, call describe() instead.
2194  os << "Type: ";
2195  if (PrecType_ == Ifpack2::Details::JACOBI) {
2196  os << "Jacobi";
2197  } else if (PrecType_ == Ifpack2::Details::GS) {
2198  os << "Gauss-Seidel";
2199  } else if (PrecType_ == Ifpack2::Details::SGS) {
2200  os << "Symmetric Gauss-Seidel";
2201  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2202  os << "MT Gauss-Seidel";
2203  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2204  os << "MT Symmetric Gauss-Seidel";
2205  } else if (PrecType_ == Ifpack2::Details::GS2) {
2206  os << "Two-stage Gauss-Seidel";
2207  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2208  os << "Two-stage Symmetric Gauss-Seidel";
2209  }
2210  else {
2211  os << "INVALID";
2212  }
2213  if(hasBlockCrsMatrix_)
2214  os<<", BlockCrs";
2215 
2216  os << ", " << "sweeps: " << NumSweeps_ << ", "
2217  << "damping factor: " << DampingFactor_ << ", ";
2218 
2219  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2220  os << "\"relaxation: mtgs cluster size\": " << clusterSize_ << ", ";
2221  os << "\"relaxation: long row threshold\": " << longRowThreshold_ << ", ";
2222  os << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << ", ";
2223  os << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2224  switch(mtColoringAlgorithm_)
2225  {
2226  case KokkosGraph::COLORING_DEFAULT:
2227  os << "DEFAULT"; break;
2228  case KokkosGraph::COLORING_SERIAL:
2229  os << "SERIAL"; break;
2230  case KokkosGraph::COLORING_VB:
2231  os << "VB"; break;
2232  case KokkosGraph::COLORING_VBBIT:
2233  os << "VBBIT"; break;
2234  case KokkosGraph::COLORING_VBCS:
2235  os << "VBCS"; break;
2236  case KokkosGraph::COLORING_VBD:
2237  os << "VBD"; break;
2238  case KokkosGraph::COLORING_VBDBIT:
2239  os << "VBDBIT"; break;
2240  case KokkosGraph::COLORING_EB:
2241  os << "EB"; break;
2242  default:
2243  os << "*Invalid*";
2244  }
2245  os << ", ";
2246  }
2247 
2248  if (PrecType_ == Ifpack2::Details::GS2 ||
2249  PrecType_ == Ifpack2::Details::SGS2) {
2250  os << "outer sweeps: " << NumOuterSweeps_ << ", "
2251  << "inner sweeps: " << NumInnerSweeps_ << ", "
2252  << "inner damping factor: " << InnerDampingFactor_ << ", ";
2253  }
2254 
2255  if (DoL1Method_) {
2256  os << "use l1: " << DoL1Method_ << ", "
2257  << "l1 eta: " << L1Eta_ << ", ";
2258  }
2259 
2260  if (A_.is_null ()) {
2261  os << "Matrix: null";
2262  }
2263  else {
2264  os << "Global matrix dimensions: ["
2265  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2266  << ", Global nnz: " << A_->getGlobalNumEntries();
2267  }
2268 
2269  os << "}";
2270  return os.str ();
2271 }
2272 
2273 
2274 template<class MatrixType>
2275 void
2277 describe (Teuchos::FancyOStream &out,
2278  const Teuchos::EVerbosityLevel verbLevel) const
2279 {
2280  using Teuchos::OSTab;
2281  using Teuchos::TypeNameTraits;
2282  using Teuchos::VERB_DEFAULT;
2283  using Teuchos::VERB_NONE;
2284  using Teuchos::VERB_LOW;
2285  using Teuchos::VERB_MEDIUM;
2286  using Teuchos::VERB_HIGH;
2287  using Teuchos::VERB_EXTREME;
2288  using std::endl;
2289 
2290  const Teuchos::EVerbosityLevel vl =
2291  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2292 
2293  const int myRank = this->getComm ()->getRank ();
2294 
2295  // none: print nothing
2296  // low: print O(1) info from Proc 0
2297  // medium:
2298  // high:
2299  // extreme:
2300 
2301  if (vl != VERB_NONE && myRank == 0) {
2302  // Describable interface asks each implementation to start with a tab.
2303  OSTab tab1 (out);
2304  // Output is valid YAML; hence the quotes, to protect the colons.
2305  out << "\"Ifpack2::Relaxation\":" << endl;
2306  OSTab tab2 (out);
2307  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2308  << endl;
2309  if (this->getObjectLabel () != "") {
2310  out << "Label: " << this->getObjectLabel () << endl;
2311  }
2312  out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2313  << "Computed: " << (isComputed () ? "true" : "false") << endl
2314  << "Parameters: " << endl;
2315  {
2316  OSTab tab3 (out);
2317  out << "\"relaxation: type\": ";
2318  if (PrecType_ == Ifpack2::Details::JACOBI) {
2319  out << "Jacobi";
2320  } else if (PrecType_ == Ifpack2::Details::GS) {
2321  out << "Gauss-Seidel";
2322  } else if (PrecType_ == Ifpack2::Details::SGS) {
2323  out << "Symmetric Gauss-Seidel";
2324  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2325  out << "MT Gauss-Seidel";
2326  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2327  out << "MT Symmetric Gauss-Seidel";
2328  } else if (PrecType_ == Ifpack2::Details::GS2) {
2329  out << "Two-stage Gauss-Seidel";
2330  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2331  out << "Two-stage Symmetric Gauss-Seidel";
2332  } else {
2333  out << "INVALID";
2334  }
2335  // We quote these parameter names because they contain colons.
2336  // YAML uses the colon to distinguish key from value.
2337  out << endl
2338  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2339  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2340  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2341  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2342  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2343  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2344  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2345  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2346  out << "\"relaxation: mtgs cluster size\": " << clusterSize_ << endl;
2347  out << "\"relaxation: long row threshold\": " << longRowThreshold_ << endl;
2348  out << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << endl;
2349  out << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2350  switch(mtColoringAlgorithm_)
2351  {
2352  case KokkosGraph::COLORING_DEFAULT:
2353  out << "DEFAULT"; break;
2354  case KokkosGraph::COLORING_SERIAL:
2355  out << "SERIAL"; break;
2356  case KokkosGraph::COLORING_VB:
2357  out << "VB"; break;
2358  case KokkosGraph::COLORING_VBBIT:
2359  out << "VBBIT"; break;
2360  case KokkosGraph::COLORING_VBCS:
2361  out << "VBCS"; break;
2362  case KokkosGraph::COLORING_VBD:
2363  out << "VBD"; break;
2364  case KokkosGraph::COLORING_VBDBIT:
2365  out << "VBDBIT"; break;
2366  case KokkosGraph::COLORING_EB:
2367  out << "EB"; break;
2368  default:
2369  out << "*Invalid*";
2370  }
2371  out << endl;
2372  }
2373  if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2374  out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2375  out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2376  out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2377  }
2378  }
2379  out << "Computed quantities:" << endl;
2380  {
2381  OSTab tab3 (out);
2382  out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2383  << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2384  }
2385  if (checkDiagEntries_ && isComputed ()) {
2386  out << "Properties of input diagonal entries:" << endl;
2387  {
2388  OSTab tab3 (out);
2389  out << "Magnitude of minimum-magnitude diagonal entry: "
2390  << globalMinMagDiagEntryMag_ << endl
2391  << "Magnitude of maximum-magnitude diagonal entry: "
2392  << globalMaxMagDiagEntryMag_ << endl
2393  << "Number of diagonal entries with small magnitude: "
2394  << globalNumSmallDiagEntries_ << endl
2395  << "Number of zero diagonal entries: "
2396  << globalNumZeroDiagEntries_ << endl
2397  << "Number of diagonal entries with negative real part: "
2398  << globalNumNegDiagEntries_ << endl
2399  << "Abs 2-norm diff between computed and actual inverse "
2400  << "diagonal: " << globalDiagNormDiff_ << endl;
2401  }
2402  }
2403  if (isComputed ()) {
2404  out << "Saved diagonal offsets: "
2405  << (savedDiagOffsets_ ? "true" : "false") << endl;
2406  }
2407  out << "Call counts and total times (in seconds): " << endl;
2408  {
2409  OSTab tab3 (out);
2410  out << "initialize: " << endl;
2411  {
2412  OSTab tab4 (out);
2413  out << "Call count: " << NumInitialize_ << endl;
2414  out << "Total time: " << InitializeTime_ << endl;
2415  }
2416  out << "compute: " << endl;
2417  {
2418  OSTab tab4 (out);
2419  out << "Call count: " << NumCompute_ << endl;
2420  out << "Total time: " << ComputeTime_ << endl;
2421  }
2422  out << "apply: " << endl;
2423  {
2424  OSTab tab4 (out);
2425  out << "Call count: " << NumApply_ << endl;
2426  out << "Total time: " << ApplyTime_ << endl;
2427  }
2428  }
2429  }
2430 }
2431 
2432 
2433 } // namespace Ifpack2
2434 
2435 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2436  template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2437 
2438 #endif // IFPACK2_RELAXATION_DEF_HPP
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2179
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:505
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:521
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:551
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:214
void compute()
Compute the preconditioner ("numeric setup");.
Definition: Ifpack2_Relaxation_def.hpp:1007
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:583
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:515
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:533
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:539
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:193
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:492
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:483
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:263
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:462
File for utility functions.
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:570
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:563
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:226
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:260
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:557
Compute scaled damped residual for Chebyshev.
Definition: Ifpack2_Details_InverseDiagonalKernel_decl.hpp:77
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:545
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:527
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:257
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:472
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:254
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:688
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:237
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2277
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:74
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:51
void initialize()
Initialize the preconditioner ("symbolic setup").
Definition: Ifpack2_Relaxation_def.hpp:709
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Relaxation_decl.hpp:273
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:269