Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_RILUK_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ***********************************************************************
38 //@HEADER
39 */
40 
41 #ifndef IFPACK2_CRSRILUK_DEF_HPP
42 #define IFPACK2_CRSRILUK_DEF_HPP
43 
44 #include "Ifpack2_LocalFilter.hpp"
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Teuchos_StandardParameterEntryValidators.hpp"
47 #include "Ifpack2_LocalSparseTriangularSolver.hpp"
48 #include "Ifpack2_Details_getParamTryingTypes.hpp"
49 #include "Ifpack2_Details_getCrsMatrix.hpp"
50 #include "Kokkos_Sort.hpp"
51 #include "KokkosSparse_Utils.hpp"
52 #include "KokkosKernels_Sorting.hpp"
53 
54 namespace Ifpack2 {
55 
56 namespace Details {
57 struct IlukImplType {
58  enum Enum {
59  Serial,
60  KSPILUK
61  };
62 
63  static void loadPLTypeOption (Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
64  type_strs.resize(2);
65  type_strs[0] = "Serial";
66  type_strs[1] = "KSPILUK";
67  type_enums.resize(2);
68  type_enums[0] = Serial;
69  type_enums[1] = KSPILUK;
70  }
71 };
72 }
73 
74 template<class MatrixType>
75 RILUK<MatrixType>::RILUK (const Teuchos::RCP<const row_matrix_type>& Matrix_in)
76  : A_ (Matrix_in),
77  LevelOfFill_ (0),
78  Overalloc_ (2.),
79  isAllocated_ (false),
80  isInitialized_ (false),
81  isComputed_ (false),
82  numInitialize_ (0),
83  numCompute_ (0),
84  numApply_ (0),
85  initializeTime_ (0.0),
86  computeTime_ (0.0),
87  applyTime_ (0.0),
88  RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
89  Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
90  Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
91  isKokkosKernelsSpiluk_(false)
92 {
93  allocateSolvers();
94 }
95 
96 
97 template<class MatrixType>
98 RILUK<MatrixType>::RILUK (const Teuchos::RCP<const crs_matrix_type>& Matrix_in)
99  : A_ (Matrix_in),
100  LevelOfFill_ (0),
101  Overalloc_ (2.),
102  isAllocated_ (false),
103  isInitialized_ (false),
104  isComputed_ (false),
105  numInitialize_ (0),
106  numCompute_ (0),
107  numApply_ (0),
108  initializeTime_ (0.0),
109  computeTime_ (0.0),
110  applyTime_ (0.0),
111  RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
112  Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
113  Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
114  isKokkosKernelsSpiluk_(false)
115 {
116  allocateSolvers();
117 }
118 
119 
120 template<class MatrixType>
122 {
123  if (Teuchos::nonnull (KernelHandle_))
124  {
125  KernelHandle_->destroy_spiluk_handle();
126  }
127 }
128 
129 template<class MatrixType>
131 {
132  L_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> ());
133  L_solver_->setObjectLabel("lower");
134  U_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> ());
135  U_solver_->setObjectLabel("upper");
136 }
137 
138 template<class MatrixType>
139 void
140 RILUK<MatrixType>::setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
141 {
142  // It's legal for A to be null; in that case, you may not call
143  // initialize() until calling setMatrix() with a nonnull input.
144  // Regardless, setting the matrix invalidates any previous
145  // factorization.
146  if (A.getRawPtr () != A_.getRawPtr ()) {
147  isAllocated_ = false;
148  isInitialized_ = false;
149  isComputed_ = false;
150  A_local_ = Teuchos::null;
151  Graph_ = Teuchos::null;
152 
153  // The sparse triangular solvers get a triangular factor as their
154  // input matrix. The triangular factors L_ and U_ are getting
155  // reset, so we reset the solvers' matrices to null. Do that
156  // before setting L_ and U_ to null, so that latter step actually
157  // frees the factors.
158  if (! L_solver_.is_null ()) {
159  L_solver_->setMatrix (Teuchos::null);
160  }
161  if (! U_solver_.is_null ()) {
162  U_solver_->setMatrix (Teuchos::null);
163  }
164 
165  L_ = Teuchos::null;
166  U_ = Teuchos::null;
167  D_ = Teuchos::null;
168  A_ = A;
169  }
170 }
171 
172 
173 
174 template<class MatrixType>
177 {
178  TEUCHOS_TEST_FOR_EXCEPTION(
179  L_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
180  "is null. Please call initialize() (and preferably also compute()) "
181  "before calling this method. If the input matrix has not yet been set, "
182  "you must first call setMatrix() with a nonnull input matrix before you "
183  "may call initialize() or compute().");
184  return *L_;
185 }
186 
187 
188 template<class MatrixType>
189 const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
194 {
195  TEUCHOS_TEST_FOR_EXCEPTION(
196  D_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
197  "(of diagonal entries) is null. Please call initialize() (and "
198  "preferably also compute()) before calling this method. If the input "
199  "matrix has not yet been set, you must first call setMatrix() with a "
200  "nonnull input matrix before you may call initialize() or compute().");
201  return *D_;
202 }
203 
204 
205 template<class MatrixType>
208 {
209  TEUCHOS_TEST_FOR_EXCEPTION(
210  U_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
211  "is null. Please call initialize() (and preferably also compute()) "
212  "before calling this method. If the input matrix has not yet been set, "
213  "you must first call setMatrix() with a nonnull input matrix before you "
214  "may call initialize() or compute().");
215  return *U_;
216 }
217 
218 
219 template<class MatrixType>
221  TEUCHOS_TEST_FOR_EXCEPTION(
222  A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getNodeSmootherComplexity: "
223  "The input matrix A is null. Please call setMatrix() with a nonnull "
224  "input matrix, then call compute(), before calling this method.");
225  // RILUK methods cost roughly one apply + the nnz in the upper+lower triangles
226  if(!L_.is_null() && !U_.is_null())
227  return A_->getLocalNumEntries() + L_->getLocalNumEntries() + U_->getLocalNumEntries();
228  else
229  return 0;
230 }
231 
232 
233 template<class MatrixType>
234 Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
236  typename RILUK<MatrixType>::node_type> >
238  TEUCHOS_TEST_FOR_EXCEPTION(
239  A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getDomainMap: "
240  "The matrix is null. Please call setMatrix() with a nonnull input "
241  "before calling this method.");
242 
243  // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
244  TEUCHOS_TEST_FOR_EXCEPTION(
245  Graph_.is_null (), std::runtime_error, "Ifpack2::RILUK::getDomainMap: "
246  "The computed graph is null. Please call initialize() before calling "
247  "this method.");
248  return Graph_->getL_Graph ()->getDomainMap ();
249 }
250 
251 
252 template<class MatrixType>
253 Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
255  typename RILUK<MatrixType>::node_type> >
257  TEUCHOS_TEST_FOR_EXCEPTION(
258  A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getRangeMap: "
259  "The matrix is null. Please call setMatrix() with a nonnull input "
260  "before calling this method.");
261 
262  // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
263  TEUCHOS_TEST_FOR_EXCEPTION(
264  Graph_.is_null (), std::runtime_error, "Ifpack2::RILUK::getRangeMap: "
265  "The computed graph is null. Please call initialize() before calling "
266  "this method.");
267  return Graph_->getL_Graph ()->getRangeMap ();
268 }
269 
270 
271 template<class MatrixType>
273 {
274  using Teuchos::null;
275  using Teuchos::rcp;
276 
277  if (! isAllocated_) {
278  // Deallocate any existing storage. This avoids storing 2x
279  // memory, since RCP op= does not deallocate until after the
280  // assignment.
281  L_ = null;
282  U_ = null;
283  D_ = null;
284 
285  // Allocate Matrix using ILUK graphs
286  L_ = rcp (new crs_matrix_type (Graph_->getL_Graph ()));
287  U_ = rcp (new crs_matrix_type (Graph_->getU_Graph ()));
288  L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
289  U_->setAllToScalar (STS::zero ());
290 
291  // FIXME (mfh 24 Jan 2014) This assumes domain == range Map for L and U.
292  L_->fillComplete ();
293  U_->fillComplete ();
294  D_ = rcp (new vec_type (Graph_->getL_Graph ()->getRowMap ()));
295  }
296  isAllocated_ = true;
297 }
298 
299 
300 template<class MatrixType>
301 void
302 RILUK<MatrixType>::
303 setParameters (const Teuchos::ParameterList& params)
304 {
305  using Teuchos::RCP;
306  using Teuchos::ParameterList;
307  using Teuchos::Array;
308  using Details::getParamTryingTypes;
309  const char prefix[] = "Ifpack2::RILUK: ";
310 
311  // Default values of the various parameters.
312  int fillLevel = 0;
313  magnitude_type absThresh = STM::zero ();
314  magnitude_type relThresh = STM::one ();
315  magnitude_type relaxValue = STM::zero ();
316  double overalloc = 2.;
317 
318  // "fact: iluk level-of-fill" parsing is more complicated, because
319  // we want to allow as many types as make sense. int is the native
320  // type, but we also want to accept double (for backwards
321  // compatibilty with ILUT). You can't cast arbitrary magnitude_type
322  // (e.g., Sacado::MP::Vector) to int, so we use float instead, to
323  // get coverage of the most common magnitude_type cases. Weirdly,
324  // there's an Ifpack2 test that sets the fill level as a
325  // global_ordinal_type.
326  {
327  const std::string paramName ("fact: iluk level-of-fill");
328  getParamTryingTypes<int, int, global_ordinal_type, double, float>
329  (fillLevel, params, paramName, prefix);
330  }
331  // For the other parameters, we prefer magnitude_type, but allow
332  // double for backwards compatibility.
333  {
334  const std::string paramName ("fact: absolute threshold");
335  getParamTryingTypes<magnitude_type, magnitude_type, double>
336  (absThresh, params, paramName, prefix);
337  }
338  {
339  const std::string paramName ("fact: relative threshold");
340  getParamTryingTypes<magnitude_type, magnitude_type, double>
341  (relThresh, params, paramName, prefix);
342  }
343  {
344  const std::string paramName ("fact: relax value");
345  getParamTryingTypes<magnitude_type, magnitude_type, double>
346  (relaxValue, params, paramName, prefix);
347  }
348  {
349  const std::string paramName ("fact: iluk overalloc");
350  getParamTryingTypes<double, double>
351  (overalloc, params, paramName, prefix);
352  }
353 
354  // Parsing implementation type
355  Details::IlukImplType::Enum ilukimplType = Details::IlukImplType::Serial;
356  do {
357  static const char typeName[] = "fact: type";
358 
359  if ( ! params.isType<std::string>(typeName)) break;
360 
361  // Map std::string <-> IlukImplType::Enum.
362  Array<std::string> ilukimplTypeStrs;
363  Array<Details::IlukImplType::Enum> ilukimplTypeEnums;
364  Details::IlukImplType::loadPLTypeOption (ilukimplTypeStrs, ilukimplTypeEnums);
365  Teuchos::StringToIntegralParameterEntryValidator<Details::IlukImplType::Enum>
366  s2i(ilukimplTypeStrs (), ilukimplTypeEnums (), typeName, false);
367 
368  ilukimplType = s2i.getIntegralValue(params.get<std::string>(typeName));
369  } while (0);
370 
371  if (ilukimplType == Details::IlukImplType::KSPILUK) {
372  this->isKokkosKernelsSpiluk_ = true;
373  }
374  else {
375  this->isKokkosKernelsSpiluk_ = false;
376  }
377 
378  // Forward to trisolvers.
379  L_solver_->setParameters(params);
380  U_solver_->setParameters(params);
381 
382  // "Commit" the values only after validating all of them. This
383  // ensures that there are no side effects if this routine throws an
384  // exception.
385 
386  LevelOfFill_ = fillLevel;
387  Overalloc_ = overalloc;
388 
389  // mfh 28 Nov 2012: The previous code would not assign Athresh_,
390  // Rthresh_, or RelaxValue_, if the read-in value was -1. I don't
391  // know if keeping this behavior is correct, but I'll keep it just
392  // so as not to change previous behavior.
393 
394  if (absThresh != -STM::one ()) {
395  Athresh_ = absThresh;
396  }
397  if (relThresh != -STM::one ()) {
398  Rthresh_ = relThresh;
399  }
400  if (relaxValue != -STM::one ()) {
401  RelaxValue_ = relaxValue;
402  }
403 }
404 
405 
406 template<class MatrixType>
407 Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
409  return Teuchos::rcp_implicit_cast<const row_matrix_type> (A_);
410 }
411 
412 
413 template<class MatrixType>
414 Teuchos::RCP<const typename RILUK<MatrixType>::crs_matrix_type>
416  return Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_, true);
417 }
418 
419 
420 template<class MatrixType>
421 Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
422 RILUK<MatrixType>::makeLocalFilter (const Teuchos::RCP<const row_matrix_type>& A)
423 {
424  using Teuchos::RCP;
425  using Teuchos::rcp;
426  using Teuchos::rcp_dynamic_cast;
427  using Teuchos::rcp_implicit_cast;
428 
429  // If A_'s communicator only has one process, or if its column and
430  // row Maps are the same, then it is already local, so use it
431  // directly.
432  if (A->getRowMap ()->getComm ()->getSize () == 1 ||
433  A->getRowMap ()->isSameAs (* (A->getColMap ()))) {
434  return A;
435  }
436 
437  // If A_ is already a LocalFilter, then use it directly. This
438  // should be the case if RILUK is being used through
439  // AdditiveSchwarz, for example.
440  RCP<const LocalFilter<row_matrix_type> > A_lf_r =
441  rcp_dynamic_cast<const LocalFilter<row_matrix_type> > (A);
442  if (! A_lf_r.is_null ()) {
443  return rcp_implicit_cast<const row_matrix_type> (A_lf_r);
444  }
445  else {
446  // A_'s communicator has more than one process, its row Map and
447  // its column Map differ, and A_ is not a LocalFilter. Thus, we
448  // have to wrap it in a LocalFilter.
449  return rcp (new LocalFilter<row_matrix_type> (A));
450  }
451 }
452 
453 
454 template<class MatrixType>
456 {
457  using Teuchos::RCP;
458  using Teuchos::rcp;
459  using Teuchos::rcp_const_cast;
460  using Teuchos::rcp_dynamic_cast;
461  using Teuchos::rcp_implicit_cast;
462  using Teuchos::Array;
463  using Teuchos::ArrayView;
464  typedef Tpetra::CrsGraph<local_ordinal_type,
466  node_type> crs_graph_type;
467  const char prefix[] = "Ifpack2::RILUK::initialize: ";
468 
469  TEUCHOS_TEST_FOR_EXCEPTION
470  (A_.is_null (), std::runtime_error, prefix << "The matrix is null. Please "
471  "call setMatrix() with a nonnull input before calling this method.");
472  TEUCHOS_TEST_FOR_EXCEPTION
473  (! A_->isFillComplete (), std::runtime_error, prefix << "The matrix is not "
474  "fill complete. You may not invoke initialize() or compute() with this "
475  "matrix until the matrix is fill complete. If your matrix is a "
476  "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
477  "range Maps, if appropriate) before calling this method.");
478 
479  Teuchos::Time timer ("RILUK::initialize");
480  double startTime = timer.wallTime();
481  { // Start timing
482  Teuchos::TimeMonitor timeMon (timer);
483 
484  // Calling initialize() means that the user asserts that the graph
485  // of the sparse matrix may have changed. We must not just reuse
486  // the previous graph in that case.
487  //
488  // Regarding setting isAllocated_ to false: Eventually, we may want
489  // some kind of clever memory reuse strategy, but it's always
490  // correct just to blow everything away and start over.
491  isInitialized_ = false;
492  isAllocated_ = false;
493  isComputed_ = false;
494  Graph_ = Teuchos::null;
495 
496  A_local_ = makeLocalFilter (A_);
497  TEUCHOS_TEST_FOR_EXCEPTION(
498  A_local_.is_null (), std::logic_error, "Ifpack2::RILUK::initialize: "
499  "makeLocalFilter returned null; it failed to compute A_local. "
500  "Please report this bug to the Ifpack2 developers.");
501 
502  // FIXME (mfh 24 Jan 2014, 26 Mar 2014) It would be more efficient
503  // to rewrite RILUK so that it works with any RowMatrix input, not
504  // just CrsMatrix. (That would require rewriting IlukGraph to
505  // handle a Tpetra::RowGraph.) However, to make it work for now,
506  // we just copy the input matrix if it's not a CrsMatrix.
507 
508  if (this->isKokkosKernelsSpiluk_) {
509  this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
510  KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
511  A_local_->getLocalNumRows(),
512  2*A_local_->getLocalNumEntries()*(LevelOfFill_+1),
513  2*A_local_->getLocalNumEntries()*(LevelOfFill_+1) );
514  }
515 
516  {
517  RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(A_local_);
518  if(A_local_crs.is_null()) {
519  local_ordinal_type numRows = A_local_->getLocalNumRows();
520  Array<size_t> entriesPerRow(numRows);
521  for(local_ordinal_type i = 0; i < numRows; i++) {
522  entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
523  }
524  RCP<crs_matrix_type> A_local_crs_nc =
525  rcp (new crs_matrix_type (A_local_->getRowMap (),
526  A_local_->getColMap (),
527  entriesPerRow()));
528  // copy entries into A_local_crs
529  nonconst_local_inds_host_view_type indices("indices",A_local_->getLocalMaxNumRowEntries());
530  nonconst_values_host_view_type values("values",A_local_->getLocalMaxNumRowEntries());
531  for(local_ordinal_type i = 0; i < numRows; i++) {
532  size_t numEntries = 0;
533  A_local_->getLocalRowCopy(i, indices, values, numEntries);
534  A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
535  }
536  A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
537  A_local_crs = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
538  }
539  Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type,kk_handle_type> (A_local_crs->getCrsGraph (),
540  LevelOfFill_, 0, Overalloc_));
541  }
542 
543  if (this->isKokkosKernelsSpiluk_) Graph_->initialize (KernelHandle_);
544  else Graph_->initialize ();
545 
546  allocate_L_and_U ();
547  checkOrderingConsistency (*A_local_);
548  L_solver_->setMatrix (L_);
549  L_solver_->initialize ();
550  //NOTE (Nov-09-2022):
551  //For Cuda >= 11.3 (using cusparseSpSV), skip trisolve computes here.
552  //Instead, call trisolve computes within RILUK compute
553 #if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030)
554  L_solver_->compute ();//NOTE: It makes sense to do compute here because only the nonzero pattern is involved in trisolve compute
555 #endif
556  U_solver_->setMatrix (U_);
557  U_solver_->initialize ();
558 #if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030)
559  U_solver_->compute ();//NOTE: It makes sense to do compute here because only the nonzero pattern is involved in trisolve compute
560 #endif
561 
562  // Do not call initAllValues. compute() always calls initAllValues to
563  // fill L and U with possibly new numbers. initialize() is concerned
564  // only with the nonzero pattern.
565  } // Stop timing
566 
567  isInitialized_ = true;
568  ++numInitialize_;
569  initializeTime_ += (timer.wallTime() - startTime);
570 }
571 
572 template<class MatrixType>
573 void
575 checkOrderingConsistency (const row_matrix_type& A)
576 {
577  // First check that the local row map ordering is the same as the local portion of the column map.
578  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
579  // implicitly assume that this is the case.
580  Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getLocalElementList();
581  Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getLocalElementList();
582  bool gidsAreConsistentlyOrdered=true;
583  global_ordinal_type indexOfInconsistentGID=0;
584  for (global_ordinal_type i=0; i<rowGIDs.size(); ++i) {
585  if (rowGIDs[i] != colGIDs[i]) {
586  gidsAreConsistentlyOrdered=false;
587  indexOfInconsistentGID=i;
588  break;
589  }
590  }
591  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
592  "The ordering of the local GIDs in the row and column maps is not the same"
593  << std::endl << "at index " << indexOfInconsistentGID
594  << ". Consistency is required, as all calculations are done with"
595  << std::endl << "local indexing.");
596 }
597 
598 template<class MatrixType>
599 void
600 RILUK<MatrixType>::
601 initAllValues (const row_matrix_type& A)
602 {
603  using Teuchos::ArrayRCP;
604  using Teuchos::Comm;
605  using Teuchos::ptr;
606  using Teuchos::RCP;
607  using Teuchos::REDUCE_SUM;
608  using Teuchos::reduceAll;
609  typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
610 
611  size_t NumIn = 0, NumL = 0, NumU = 0;
612  bool DiagFound = false;
613  size_t NumNonzeroDiags = 0;
614  size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
615 
616  // Allocate temporary space for extracting the strictly
617  // lower and upper parts of the matrix A.
618  nonconst_local_inds_host_view_type InI("InI",MaxNumEntries);
619  Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
620  Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
621  nonconst_values_host_view_type InV("InV",MaxNumEntries);
622  Teuchos::Array<scalar_type> LV(MaxNumEntries);
623  Teuchos::Array<scalar_type> UV(MaxNumEntries);
624 
625  // Check if values should be inserted or replaced
626  const bool ReplaceValues = L_->isStaticGraph () || L_->isLocallyIndexed ();
627 
628  L_->resumeFill ();
629  U_->resumeFill ();
630  if (ReplaceValues) {
631  L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
632  U_->setAllToScalar (STS::zero ());
633  }
634 
635  D_->putScalar (STS::zero ()); // Set diagonal values to zero
636  auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
637 
638  RCP<const map_type> rowMap = L_->getRowMap ();
639 
640  // First we copy the user's matrix into L and U, regardless of fill level.
641  // It is important to note that L and U are populated using local indices.
642  // This means that if the row map GIDs are not monotonically increasing
643  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
644  // matrix is not the one that you would get if you based L (U) on GIDs.
645  // This is ok, as the *order* of the GIDs in the rowmap is a better
646  // expression of the user's intent than the GIDs themselves.
647 
648  Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getLocalElementList();
649  for (size_t myRow=0; myRow<A.getLocalNumRows(); ++myRow) {
650  local_ordinal_type local_row = myRow;
651 
652  //TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
653  // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
654  A.getLocalRowCopy (local_row, InI, InV, NumIn); // Get Values and Indices
655 
656  // Split into L and U (we don't assume that indices are ordered).
657 
658  NumL = 0;
659  NumU = 0;
660  DiagFound = false;
661 
662  for (size_t j = 0; j < NumIn; ++j) {
663  const local_ordinal_type k = InI[j];
664 
665  if (k == local_row) {
666  DiagFound = true;
667  // Store perturbed diagonal in Tpetra::Vector D_
668  DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
669  }
670  else if (k < 0) { // Out of range
671  TEUCHOS_TEST_FOR_EXCEPTION(
672  true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
673  "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
674  "probably an artifact of the undocumented assumptions of the "
675  "original implementation (likely copied and pasted from Ifpack). "
676  "Nevertheless, the code I found here insisted on this being an error "
677  "state, so I will throw an exception here.");
678  }
679  else if (k < local_row) {
680  LI[NumL] = k;
681  LV[NumL] = InV[j];
682  NumL++;
683  }
684  else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
685  UI[NumU] = k;
686  UV[NumU] = InV[j];
687  NumU++;
688  }
689  }
690 
691  // Check in things for this row of L and U
692 
693  if (DiagFound) {
694  ++NumNonzeroDiags;
695  } else {
696  DV(local_row) = Athresh_;
697  }
698 
699  if (NumL) {
700  if (ReplaceValues) {
701  L_->replaceLocalValues(local_row, LI(0, NumL), LV(0,NumL));
702  } else {
703  //FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
704  //FIXME in this row in the column locations corresponding to UI.
705  L_->insertLocalValues(local_row, LI(0,NumL), LV(0,NumL));
706  }
707  }
708 
709  if (NumU) {
710  if (ReplaceValues) {
711  U_->replaceLocalValues(local_row, UI(0,NumU), UV(0,NumU));
712  } else {
713  //FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
714  //FIXME in this row in the column locations corresponding to UI.
715  U_->insertLocalValues(local_row, UI(0,NumU), UV(0,NumU));
716  }
717  }
718  }
719 
720  // At this point L and U have the values of A in the structure of L
721  // and U, and diagonal vector D
722 
723  isInitialized_ = true;
724 }
725 
726 
727 template<class MatrixType>
729 {
730  using Teuchos::RCP;
731  using Teuchos::rcp;
732  using Teuchos::rcp_const_cast;
733  using Teuchos::rcp_dynamic_cast;
734  using Teuchos::Array;
735  using Teuchos::ArrayView;
736  const char prefix[] = "Ifpack2::RILUK::compute: ";
737 
738  // initialize() checks this too, but it's easier for users if the
739  // error shows them the name of the method that they actually
740  // called, rather than the name of some internally called method.
741  TEUCHOS_TEST_FOR_EXCEPTION
742  (A_.is_null (), std::runtime_error, prefix << "The matrix is null. Please "
743  "call setMatrix() with a nonnull input before calling this method.");
744  TEUCHOS_TEST_FOR_EXCEPTION
745  (! A_->isFillComplete (), std::runtime_error, prefix << "The matrix is not "
746  "fill complete. You may not invoke initialize() or compute() with this "
747  "matrix until the matrix is fill complete. If your matrix is a "
748  "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
749  "range Maps, if appropriate) before calling this method.");
750 
751  if (! isInitialized ()) {
752  initialize (); // Don't count this in the compute() time
753  }
754 
755  Teuchos::Time timer ("RILUK::compute");
756 
757  // Start timing
758  Teuchos::TimeMonitor timeMon (timer);
759  double startTime = timer.wallTime();
760 
761  isComputed_ = false;
762 
763  if (!this->isKokkosKernelsSpiluk_) {
764  // Fill L and U with numbers. This supports nonzero pattern reuse by calling
765  // initialize() once and then compute() multiple times.
766  initAllValues (*A_local_);
767 
768  // MinMachNum should be officially defined, for now pick something a little
769  // bigger than IEEE underflow value
770 
771  const scalar_type MinDiagonalValue = STS::rmin ();
772  const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
773 
774  size_t NumIn, NumL, NumU;
775 
776  // Get Maximum Row length
777  const size_t MaxNumEntries =
778  L_->getLocalMaxNumRowEntries () + U_->getLocalMaxNumRowEntries () + 1;
779 
780  Teuchos::Array<local_ordinal_type> InI(MaxNumEntries); // Allocate temp space
781  Teuchos::Array<scalar_type> InV(MaxNumEntries);
782  size_t num_cols = U_->getColMap()->getLocalNumElements();
783  Teuchos::Array<int> colflag(num_cols);
784 
785  auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
786 
787  // Now start the factorization.
788 
789  for (size_t j = 0; j < num_cols; ++j) {
790  colflag[j] = -1;
791  }
792  using IST = typename row_matrix_type::impl_scalar_type;
793  for (size_t i = 0; i < L_->getLocalNumRows (); ++i) {
794  local_ordinal_type local_row = i;
795  // Need some integer workspace and pointers
796  size_t NumUU;
797  local_inds_host_view_type UUI;
798  values_host_view_type UUV;
799 
800  // Fill InV, InI with current row of L, D and U combined
801 
802  NumIn = MaxNumEntries;
803  nonconst_local_inds_host_view_type InI_v(InI.data(),MaxNumEntries);
804  nonconst_values_host_view_type InV_v(reinterpret_cast<IST*>(InV.data()),MaxNumEntries);
805 
806  L_->getLocalRowCopy (local_row, InI_v , InV_v, NumL);
807 
808  InV[NumL] = DV(i); // Put in diagonal
809  InI[NumL] = local_row;
810 
811  nonconst_local_inds_host_view_type InI_sub(InI.data()+NumL+1,MaxNumEntries-NumL-1);
812  nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data())+NumL+1,MaxNumEntries-NumL-1);
813 
814  U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU);
815  NumIn = NumL+NumU+1;
816 
817  // Set column flags
818  for (size_t j = 0; j < NumIn; ++j) {
819  colflag[InI[j]] = j;
820  }
821 
822  scalar_type diagmod = STS::zero (); // Off-diagonal accumulator
823 
824  for (size_t jj = 0; jj < NumL; ++jj) {
825  local_ordinal_type j = InI[jj];
826  IST multiplier = InV[jj]; // current_mults++;
827 
828  InV[jj] *= static_cast<scalar_type>(DV(j));
829 
830  U_->getLocalRowView(j, UUI, UUV); // View of row above
831  NumUU = UUI.size();
832 
833  if (RelaxValue_ == STM::zero ()) {
834  for (size_t k = 0; k < NumUU; ++k) {
835  const int kk = colflag[UUI[k]];
836  // FIXME (mfh 23 Dec 2013) Wait a second, we just set
837  // colflag above using size_t (which is generally unsigned),
838  // but now we're querying it using int (which is signed).
839  if (kk > -1) {
840  InV[kk] -= static_cast<scalar_type>(multiplier * UUV[k]);
841  }
842  }
843 
844  }
845  else {
846  for (size_t k = 0; k < NumUU; ++k) {
847  // FIXME (mfh 23 Dec 2013) Wait a second, we just set
848  // colflag above using size_t (which is generally unsigned),
849  // but now we're querying it using int (which is signed).
850  const int kk = colflag[UUI[k]];
851  if (kk > -1) {
852  InV[kk] -= static_cast<scalar_type>(multiplier*UUV[k]);
853  }
854  else {
855  diagmod -= static_cast<scalar_type>(multiplier*UUV[k]);
856  }
857  }
858  }
859  }
860 
861  if (NumL) {
862  // Replace current row of L
863  L_->replaceLocalValues (local_row, InI (0, NumL), InV (0, NumL));
864  }
865 
866  DV(i) = InV[NumL]; // Extract Diagonal value
867 
868  if (RelaxValue_ != STM::zero ()) {
869  DV(i) += RelaxValue_*diagmod; // Add off diagonal modifications
870  }
871 
872  if (STS::magnitude (DV(i)) > STS::magnitude (MaxDiagonalValue)) {
873  if (STS::real (DV(i)) < STM::zero ()) {
874  DV(i) = -MinDiagonalValue;
875  }
876  else {
877  DV(i) = MinDiagonalValue;
878  }
879  }
880  else {
881  DV(i) = static_cast<impl_scalar_type>(STS::one ()) / DV(i); // Invert diagonal value
882  }
883 
884  for (size_t j = 0; j < NumU; ++j) {
885  InV[NumL+1+j] *= static_cast<scalar_type>(DV(i)); // Scale U by inverse of diagonal
886  }
887 
888  if (NumU) {
889  // Replace current row of L and U
890  U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU));
891  }
892 
893  // Reset column flags
894  for (size_t j = 0; j < NumIn; ++j) {
895  colflag[InI[j]] = -1;
896  }
897  }
898 
899  // The domain of L and the range of U are exactly their own row maps
900  // (there is no communication). The domain of U and the range of L
901  // must be the same as those of the original matrix, However if the
902  // original matrix is a VbrMatrix, these two latter maps are
903  // translation from a block map to a point map.
904  // FIXME (mfh 23 Dec 2013) Do we know that the column Map of L_ is
905  // always one-to-one?
906  L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
907  U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
908 
909  // If L_solver_ or U_solver store modified factors internally, we need to reset those
910  L_solver_->setMatrix (L_);
911  L_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
912  U_solver_->setMatrix (U_);
913  U_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
914  }
915  else {
916  {//Make sure values in A is picked up even in case of pattern reuse
917  RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(A_local_);
918  if(A_local_crs.is_null()) {
919  local_ordinal_type numRows = A_local_->getLocalNumRows();
920  Array<size_t> entriesPerRow(numRows);
921  for(local_ordinal_type i = 0; i < numRows; i++) {
922  entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
923  }
924  RCP<crs_matrix_type> A_local_crs_nc =
925  rcp (new crs_matrix_type (A_local_->getRowMap (),
926  A_local_->getColMap (),
927  entriesPerRow()));
928  // copy entries into A_local_crs
929  nonconst_local_inds_host_view_type indices("indices",A_local_->getLocalMaxNumRowEntries());
930  nonconst_values_host_view_type values("values",A_local_->getLocalMaxNumRowEntries());
931  for(local_ordinal_type i = 0; i < numRows; i++) {
932  size_t numEntries = 0;
933  A_local_->getLocalRowCopy(i, indices, values, numEntries);
934  A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()),indices.data());
935  }
936  A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
937  A_local_crs = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
938  }
939  auto lclMtx = A_local_crs->getLocalMatrixDevice();
940  A_local_rowmap_ = lclMtx.graph.row_map;
941  A_local_entries_ = lclMtx.graph.entries;
942  A_local_values_ = lclMtx.values;
943  }
944 
945  L_->resumeFill ();
946  U_->resumeFill ();
947 
948  if (L_->isStaticGraph () || L_->isLocallyIndexed ()) {
949  L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
950  U_->setAllToScalar (STS::zero ());
951  }
952 
953  using row_map_type = typename crs_matrix_type::local_matrix_device_type::row_map_type;
954 
955  {
956  auto lclL = L_->getLocalMatrixDevice();
957  row_map_type L_rowmap = lclL.graph.row_map;
958  auto L_entries = lclL.graph.entries;
959  auto L_values = lclL.values;
960 
961  auto lclU = U_->getLocalMatrixDevice();
962  row_map_type U_rowmap = lclU.graph.row_map;
963  auto U_entries = lclU.graph.entries;
964  auto U_values = lclU.values;
965 
966  KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
967  A_local_rowmap_, A_local_entries_, A_local_values_,
968  L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
969  }
970 
971  L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
972  U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
973 
974  L_solver_->setMatrix (L_);
975  L_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
976  U_solver_->setMatrix (U_);
977  U_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
978  }
979 
980  isComputed_ = true;
981  ++numCompute_;
982  computeTime_ += (timer.wallTime() - startTime);
983 }
984 
985 
986 template<class MatrixType>
987 void
989 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
990  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
991  Teuchos::ETransp mode,
992  scalar_type alpha,
993  scalar_type beta) const
994 {
995  using Teuchos::RCP;
996  using Teuchos::rcpFromRef;
997 
998  TEUCHOS_TEST_FOR_EXCEPTION(
999  A_.is_null (), std::runtime_error, "Ifpack2::RILUK::apply: The matrix is "
1000  "null. Please call setMatrix() with a nonnull input, then initialize() "
1001  "and compute(), before calling this method.");
1002  TEUCHOS_TEST_FOR_EXCEPTION(
1003  ! isComputed (), std::runtime_error,
1004  "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1005  "you must call compute() before calling this method.");
1006  TEUCHOS_TEST_FOR_EXCEPTION(
1007  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
1008  "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1009  "X.getNumVectors() = " << X.getNumVectors ()
1010  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
1011  TEUCHOS_TEST_FOR_EXCEPTION(
1012  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1013  "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1014  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1015  "fixed. There is a FIXME in this file about this very issue.");
1016 #ifdef HAVE_IFPACK2_DEBUG
1017  {
1018  const magnitude_type D_nrm1 = D_->norm1 ();
1019  TEUCHOS_TEST_FOR_EXCEPTION( STM::isnaninf (D_nrm1), std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1020  Teuchos::Array<magnitude_type> norms (X.getNumVectors ());
1021  X.norm1 (norms ());
1022  bool good = true;
1023  for (size_t j = 0; j < X.getNumVectors (); ++j) {
1024  if (STM::isnaninf (norms[j])) {
1025  good = false;
1026  break;
1027  }
1028  }
1029  TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1030  }
1031 #endif // HAVE_IFPACK2_DEBUG
1032 
1033  const scalar_type one = STS::one ();
1034  const scalar_type zero = STS::zero ();
1035 
1036  Teuchos::Time timer ("RILUK::apply");
1037  double startTime = timer.wallTime();
1038  { // Start timing
1039  Teuchos::TimeMonitor timeMon (timer);
1040  if (alpha == one && beta == zero) {
1041  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
1042 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1043  //NOTE (Nov-15-2022):
1044  //This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
1045  //since cusparseSpSV_solve() does not support in-place computation
1046  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1047 
1048  // Start by solving L Y_tmp = X for Y_tmp.
1049  L_solver_->apply (X, Y_tmp, mode);
1050 
1051  if (!this->isKokkosKernelsSpiluk_) {
1052  // Solve D Y = Y. The operation lets us do this in place in Y, so we can
1053  // write "solve D Y = Y for Y."
1054  Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero);
1055  }
1056 
1057  U_solver_->apply (Y_tmp, Y, mode); // Solve U Y = Y_tmp.
1058 #else
1059  // Start by solving L Y = X for Y.
1060  L_solver_->apply (X, Y, mode);
1061 
1062  if (!this->isKokkosKernelsSpiluk_) {
1063  // Solve D Y = Y. The operation lets us do this in place in Y, so we can
1064  // write "solve D Y = Y for Y."
1065  Y.elementWiseMultiply (one, *D_, Y, zero);
1066  }
1067 
1068  U_solver_->apply (Y, Y, mode); // Solve U Y = Y.
1069 #endif
1070  }
1071  else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1072 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1073  //NOTE (Nov-15-2022):
1074  //This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
1075  //since cusparseSpSV_solve() does not support in-place computation
1076  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1077 
1078  // Start by solving U^P Y_tmp = X for Y_tmp.
1079  U_solver_->apply (X, Y_tmp, mode);
1080 
1081  if (!this->isKokkosKernelsSpiluk_) {
1082  // Solve D^P Y = Y.
1083  //
1084  // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1085  // need to do an elementwise multiply with the conjugate of
1086  // D_, not just with D_ itself.
1087  Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero);
1088  }
1089 
1090  L_solver_->apply (Y_tmp, Y, mode); // Solve L^P Y = Y_tmp.
1091 #else
1092  // Start by solving U^P Y = X for Y.
1093  U_solver_->apply (X, Y, mode);
1094 
1095  if (!this->isKokkosKernelsSpiluk_) {
1096  // Solve D^P Y = Y.
1097  //
1098  // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1099  // need to do an elementwise multiply with the conjugate of
1100  // D_, not just with D_ itself.
1101  Y.elementWiseMultiply (one, *D_, Y, zero);
1102  }
1103 
1104  L_solver_->apply (Y, Y, mode); // Solve L^P Y = Y.
1105 #endif
1106  }
1107  }
1108  else { // alpha != 1 or beta != 0
1109  if (alpha == zero) {
1110  // The special case for beta == 0 ensures that if Y contains Inf
1111  // or NaN values, we replace them with 0 (following BLAS
1112  // convention), rather than multiplying them by 0 to get NaN.
1113  if (beta == zero) {
1114  Y.putScalar (zero);
1115  } else {
1116  Y.scale (beta);
1117  }
1118  } else { // alpha != zero
1119  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1120  apply (X, Y_tmp, mode);
1121  Y.update (alpha, Y_tmp, beta);
1122  }
1123  }
1124  }//end timing
1125 
1126 #ifdef HAVE_IFPACK2_DEBUG
1127  {
1128  Teuchos::Array<magnitude_type> norms (Y.getNumVectors ());
1129  Y.norm1 (norms ());
1130  bool good = true;
1131  for (size_t j = 0; j < Y.getNumVectors (); ++j) {
1132  if (STM::isnaninf (norms[j])) {
1133  good = false;
1134  break;
1135  }
1136  }
1137  TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1138  }
1139 #endif // HAVE_IFPACK2_DEBUG
1140 
1141  ++numApply_;
1142  applyTime_ += (timer.wallTime() - startTime);
1143 }
1144 
1145 
1146 //VINH comment out since multiply() is not needed anywhere
1147 //template<class MatrixType>
1148 //void RILUK<MatrixType>::
1149 //multiply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1150 // Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1151 // const Teuchos::ETransp mode) const
1152 //{
1153 // const scalar_type zero = STS::zero ();
1154 // const scalar_type one = STS::one ();
1155 //
1156 // if (mode != Teuchos::NO_TRANS) {
1157 // U_->apply (X, Y, mode); //
1158 // Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1159 //
1160 // // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we need
1161 // // to do an elementwise multiply with the conjugate of D_, not
1162 // // just with D_ itself.
1163 // Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1164 //
1165 // MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y
1166 // L_->apply (Y_tmp, Y, mode);
1167 // Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1168 // }
1169 // else {
1170 // L_->apply (X, Y, mode);
1171 // Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1172 // Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1173 // MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y1
1174 // U_->apply (Y_tmp, Y, mode);
1175 // Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1176 // }
1177 //}
1178 
1179 template<class MatrixType>
1181 {
1182  std::ostringstream os;
1183 
1184  // Output is a valid YAML dictionary in flow style. If you don't
1185  // like everything on a single line, you should call describe()
1186  // instead.
1187  os << "\"Ifpack2::RILUK\": {";
1188  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
1189  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
1190 
1191  os << "Level-of-fill: " << getLevelOfFill() << ", ";
1192 
1193  if (A_.is_null ()) {
1194  os << "Matrix: null";
1195  }
1196  else {
1197  os << "Global matrix dimensions: ["
1198  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
1199  << ", Global nnz: " << A_->getGlobalNumEntries();
1200  }
1201 
1202  if (! L_solver_.is_null ()) os << ", " << L_solver_->description ();
1203  if (! U_solver_.is_null ()) os << ", " << U_solver_->description ();
1204 
1205  os << "}";
1206  return os.str ();
1207 }
1208 
1209 } // namespace Ifpack2
1210 
1211 #define IFPACK2_RILUK_INSTANT(S,LO,GO,N) \
1212  template class Ifpack2::RILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
1213 
1214 #endif
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:263
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_RILUK_decl.hpp:281
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_RILUK_def.hpp:140
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:245
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_RILUK_decl.hpp:284
Ifpack2 implementation details.
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:266
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:260
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_RILUK_def.hpp:237
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:79
crs_matrix_type::impl_scalar_type impl_scalar_type
Scalar type stored in Kokkos::Views (CrsMatrix and MultiVector)
Definition: Ifpack2_RILUK_decl.hpp:293
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:100
Tpetra::global_size_t getGlobalNumEntries() const
Returns the number of nonzero entries in the global graph.
Definition: Ifpack2_RILUK_decl.hpp:546
Definition: Ifpack2_Container_decl.hpp:576
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:74
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_RILUK_def.hpp:256
Tpetra::RowMatrix< MatrixType::scalar_type, MatrixType::local_ordinal_type, MatrixType::global_ordinal_type, MatrixType::node_type > ::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:257