Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_LocalSparseTriangularSolver_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
44 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
45 
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_Core.hpp"
48 #include "Teuchos_StandardParameterEntryValidators.hpp"
49 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
50 #include "KokkosSparse_trsv.hpp"
51 
52 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
53 # include "shylu_hts.hpp"
54 #endif
55 
56 namespace Ifpack2 {
57 
58 namespace Details {
59 struct TrisolverType {
60  enum Enum {
61  Internal,
62  HTS,
63  KSPTRSV
64  };
65 
66  static void loadPLTypeOption (Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
67  type_strs.resize(3);
68  type_strs[0] = "Internal";
69  type_strs[1] = "HTS";
70  type_strs[2] = "KSPTRSV";
71  type_enums.resize(3);
72  type_enums[0] = Internal;
73  type_enums[1] = HTS;
74  type_enums[2] = KSPTRSV;
75  }
76 };
77 }
78 
79 template<class MatrixType>
80 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
81 public:
82  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
83 
84  void reset () {
85 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
86  Timpl_ = Teuchos::null;
87  levelset_block_size_ = 1;
88 #endif
89  }
90 
91  void setParameters (const Teuchos::ParameterList& pl) {
92  (void)pl;
93 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
94  const char* block_size_s = "trisolver: block size";
95  if (pl.isParameter(block_size_s)) {
96  TEUCHOS_TEST_FOR_EXCEPT_MSG( ! pl.isType<int>(block_size_s),
97  "The parameter \"" << block_size_s << "\" must be of type int.");
98  levelset_block_size_ = pl.get<int>(block_size_s);
99  }
100  if (levelset_block_size_ < 1)
101  levelset_block_size_ = 1;
102 #endif
103  }
104 
105  // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
106  // call to compute() will trigger the symbolic+numeric phase, and subsequent
107  // calls (with the same Timpl_) will trigger the numeric phase. In the call to
108  // initialize(), essentially nothing happens.
109  void initialize (const crs_matrix_type& /* unused */) {
110 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
111  reset();
112  transpose_ = conjugate_ = false;
113 #endif
114  }
115 
116  void compute (const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
117  (void)T_in;
118  (void)out;
119 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
120  using Teuchos::ArrayRCP;
121 
122  auto rowptr = T_in.getLocalRowPtrsHost();
123  auto colidx = T_in.getLocalIndicesHost();
124  auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
125  Kokkos::fence();
126 
127  Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
128  HTST::make_CrsMatrix(rowptr.size() - 1,
129  rowptr.data(), colidx.data(),
130  // For std/Kokkos::complex.
131  reinterpret_cast<const scalar_type*>(val.data()),
132  transpose_, conjugate_),
133  HtsCrsMatrixDeleter());
134 
135  if (Teuchos::nonnull(Timpl_)) {
136  // Reuse the nonzero pattern.
137  HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
138  } else {
139  // Build from scratch.
140  if (T_in.getCrsGraph().is_null()) {
141  if (Teuchos::nonnull(out))
142  *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
143  return;
144  }
145  if ( ! T_in.getCrsGraph()->isSorted()) {
146  if (Teuchos::nonnull(out))
147  *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
148  return;
149  }
150  if ( ! T_in.isStorageOptimized()) {
151  if (Teuchos::nonnull(out))
152  *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
153  return;
154  }
155 
156  typename HTST::PreprocessArgs args;
157  args.T = T_hts.get();
158  args.max_nrhs = 1;
159 #ifdef _OPENMP
160  args.nthreads = omp_get_max_threads();
161 #else
162  args.nthreads = 1;
163 #endif
164  args.save_for_reprocess = true;
165  typename HTST::Options opts;
166  opts.levelset_block_size = levelset_block_size_;
167  args.options = &opts;
168 
169  try {
170  Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
171  } catch (const std::exception& e) {
172  if (Teuchos::nonnull(out))
173  *out << "HTS preprocess threw: " << e.what() << "\n";
174  }
175  }
176 #endif
177  }
178 
179  // HTS may not be able to handle a matrix, so query whether compute()
180  // succeeded.
181  bool isComputed () {
182 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
183  return Teuchos::nonnull(Timpl_);
184 #else
185  return false;
186 #endif
187  }
188 
189  // Y := beta * Y + alpha * (M * X)
190  void localApply (const MV& X, MV& Y,
191  const Teuchos::ETransp /* mode */,
192  const scalar_type& alpha, const scalar_type& beta) const {
193  (void)X;
194  (void)Y;
195  (void)alpha;
196  (void)beta;
197 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
198  const auto& X_view = X.getLocalViewHost (Tpetra::Access::ReadOnly);
199  const auto& Y_view = Y.getLocalViewHost (Tpetra::Access::ReadWrite);
200 
201  // Only does something if #rhs > current capacity.
202  HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
203  // Switch alpha and beta because of HTS's opposite convention.
204  HTST::solve_omp(Timpl_.get(),
205  // For std/Kokkos::complex.
206  reinterpret_cast<const scalar_type*>(X_view.data()),
207  X_view.extent(1),
208  // For std/Kokkos::complex.
209  reinterpret_cast<scalar_type*>(Y_view.data()),
210  beta, alpha);
211 #endif
212  }
213 
214 private:
215 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216  typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
217  typedef typename HTST::Impl TImpl;
218  typedef typename HTST::CrsMatrix HtsCrsMatrix;
219 
220  struct TImplDeleter {
221  void free (TImpl* impl) {
222  HTST::delete_Impl(impl);
223  }
224  };
225 
226  struct HtsCrsMatrixDeleter {
227  void free (HtsCrsMatrix* T) {
228  HTST::delete_CrsMatrix(T);
229  }
230  };
231 
232  Teuchos::RCP<TImpl> Timpl_;
233  bool transpose_, conjugate_;
234  int levelset_block_size_;
235 #endif
236 };
237 
238 template<class MatrixType>
240 LocalSparseTriangularSolver (const Teuchos::RCP<const row_matrix_type>& A) :
241  A_ (A)
242 {
243  initializeState();
244 
245  if (! A.is_null ()) {
246  Teuchos::RCP<const crs_matrix_type> A_crs =
247  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
248  TEUCHOS_TEST_FOR_EXCEPTION
249  (A_crs.is_null (), std::invalid_argument,
250  "Ifpack2::LocalSparseTriangularSolver constructor: "
251  "The input matrix A is not a Tpetra::CrsMatrix.");
252  A_crs_ = A_crs;
253  }
254 }
255 
256 template<class MatrixType>
258 LocalSparseTriangularSolver (const Teuchos::RCP<const row_matrix_type>& A,
259  const Teuchos::RCP<Teuchos::FancyOStream>& out) :
260  A_ (A),
261  out_ (out)
262 {
263  initializeState();
264  if (! out_.is_null ()) {
265  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
266  << std::endl;
267  }
268 
269  if (! A.is_null ()) {
270  Teuchos::RCP<const crs_matrix_type> A_crs =
271  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
272  TEUCHOS_TEST_FOR_EXCEPTION
273  (A_crs.is_null (), std::invalid_argument,
274  "Ifpack2::LocalSparseTriangularSolver constructor: "
275  "The input matrix A is not a Tpetra::CrsMatrix.");
276  A_crs_ = A_crs;
277  }
278 }
279 
280 template<class MatrixType>
283 {
284  initializeState();
285 }
286 
287 template<class MatrixType>
289 LocalSparseTriangularSolver (const bool /* unused */, const Teuchos::RCP<Teuchos::FancyOStream>& out) :
290  out_ (out)
291 {
292  initializeState();
293  if (! out_.is_null ()) {
294  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
295  << std::endl;
296  }
297 }
298 
299 template<class MatrixType>
301 {
302  isInitialized_ = false;
303  isComputed_ = false;
304  reverseStorage_ = false;
305  isInternallyChanged_ = false;
306  numInitialize_ = 0;
307  numCompute_ = 0;
308  numApply_ = 0;
309  initializeTime_ = 0.0;
310  computeTime_ = 0.0;
311  applyTime_ = 0.0;
312  isKokkosKernelsSptrsv_ = false;
313  uplo_ = "N";
314  diag_ = "N";
315 }
316 
317 template<class MatrixType>
320 {
321  if (Teuchos::nonnull (kh_))
322  {
323  kh_->destroy_sptrsv_handle();
324  }
325 }
326 
327 template<class MatrixType>
328 void
330 setParameters (const Teuchos::ParameterList& pl)
331 {
332  using Teuchos::RCP;
333  using Teuchos::ParameterList;
334  using Teuchos::Array;
335 
336  Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
337  do {
338  static const char typeName[] = "trisolver: type";
339 
340  if ( ! pl.isType<std::string>(typeName)) break;
341 
342  // Map std::string <-> TrisolverType::Enum.
343  Array<std::string> trisolverTypeStrs;
344  Array<Details::TrisolverType::Enum> trisolverTypeEnums;
345  Details::TrisolverType::loadPLTypeOption (trisolverTypeStrs, trisolverTypeEnums);
346  Teuchos::StringToIntegralParameterEntryValidator<Details::TrisolverType::Enum>
347  s2i(trisolverTypeStrs (), trisolverTypeEnums (), typeName, false);
348 
349  trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
350  } while (0);
351 
352  if (trisolverType == Details::TrisolverType::HTS) {
353  htsImpl_ = Teuchos::rcp (new HtsImpl ());
354  htsImpl_->setParameters (pl);
355  }
356 
357  if (trisolverType == Details::TrisolverType::KSPTRSV) {
358  kh_ = Teuchos::rcp (new k_handle());
359  }
360 
361  if (pl.isParameter("trisolver: reverse U"))
362  reverseStorage_ = pl.get<bool>("trisolver: reverse U");
363 
364  TEUCHOS_TEST_FOR_EXCEPTION
365  (reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
366  std::logic_error, "Ifpack2::LocalSparseTriangularSolver::setParameters: "
367  "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
368  "options. See GitHub issue #2647.");
369 }
370 
371 template<class MatrixType>
372 void
375 {
376  using Tpetra::Details::determineLocalTriangularStructure;
377 
378  using local_matrix_type = typename crs_matrix_type::local_matrix_device_type;
379  using LO = local_ordinal_type;
380 
381  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
382  if (! out_.is_null ()) {
383  *out_ << ">>> DEBUG " << prefix << std::endl;
384  }
385 
386  TEUCHOS_TEST_FOR_EXCEPTION
387  (A_.is_null (), std::runtime_error, prefix << "You must call "
388  "setMatrix() with a nonnull input matrix before you may call "
389  "initialize() or compute().");
390  if (A_crs_.is_null ()) {
391  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
392  TEUCHOS_TEST_FOR_EXCEPTION
393  (A_crs.get () == nullptr, std::invalid_argument,
394  prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
395  A_crs_ = A_crs;
396  }
397  auto G = A_crs_->getGraph ();
398  TEUCHOS_TEST_FOR_EXCEPTION
399  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
400  "but A_crs_'s RowGraph G is null. "
401  "Please report this bug to the Ifpack2 developers.");
402  // At this point, the graph MUST be fillComplete. The "initialize"
403  // (symbolic) part of setup only depends on the graph structure, so
404  // the matrix itself need not be fill complete.
405  TEUCHOS_TEST_FOR_EXCEPTION
406  (! G->isFillComplete (), std::runtime_error, "If you call this method, "
407  "the matrix's graph must be fill complete. It is not.");
408 
409  // mfh 30 Apr 2018: See GitHub Issue #2658.
410  constexpr bool ignoreMapsForTriStructure = true;
411  auto lclTriStructure = [&] {
412  auto lclMatrix = A_crs_->getLocalMatrixDevice ();
413  auto lclRowMap = A_crs_->getRowMap ()->getLocalMap ();
414  auto lclColMap = A_crs_->getColMap ()->getLocalMap ();
415  auto lclTriStruct =
416  determineLocalTriangularStructure (lclMatrix.graph,
417  lclRowMap,
418  lclColMap,
419  ignoreMapsForTriStructure);
420  const LO lclNumRows = lclRowMap.getLocalNumElements ();
421  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
422  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" :
423  (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
424  return lclTriStruct;
425  } ();
426 
427  if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
428  htsImpl_.is_null ()) {
429  // Reverse the storage for an upper triangular matrix
430  auto Alocal = A_crs_->getLocalMatrixDevice();
431  auto ptr = Alocal.graph.row_map;
432  auto ind = Alocal.graph.entries;
433  auto val = Alocal.values;
434 
435  auto numRows = Alocal.numRows();
436  auto numCols = Alocal.numCols();
437  auto numNnz = Alocal.nnz();
438 
439  typename decltype(ptr)::non_const_type newptr ("ptr", ptr.extent (0));
440  typename decltype(ind)::non_const_type newind ("ind", ind.extent (0));
441  decltype(val) newval ("val", val.extent (0));
442 
443  // FIXME: The code below assumes UVM
444  typename crs_matrix_type::execution_space().fence();
445  newptr(0) = 0;
446  for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
447  auto A_r = Alocal.row(numRows-1 - row);
448 
449  auto numEnt = A_r.length;
450  for (local_ordinal_type k = 0; k < numEnt; ++k) {
451  newind(rowStart + k) = numCols-1 - A_r.colidx(numEnt-1 - k);
452  newval(rowStart + k) = A_r.value (numEnt-1 - k);
453  }
454  rowStart += numEnt;
455  newptr(row+1) = rowStart;
456  }
457  typename crs_matrix_type::execution_space().fence();
458 
459  // Reverse maps
460  Teuchos::RCP<map_type> newRowMap, newColMap;
461  {
462  // Reverse row map
463  auto rowMap = A_->getRowMap();
464  auto numElems = rowMap->getLocalNumElements();
465  auto rowElems = rowMap->getLocalElementList();
466 
467  Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
468  for (size_t i = 0; i < numElems; i++)
469  newRowElems[i] = rowElems[numElems-1 - i];
470 
471  newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
472  }
473  {
474  // Reverse column map
475  auto colMap = A_->getColMap();
476  auto numElems = colMap->getLocalNumElements();
477  auto colElems = colMap->getLocalElementList();
478 
479  Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
480  for (size_t i = 0; i < numElems; i++)
481  newColElems[i] = colElems[numElems-1 - i];
482 
483  newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
484  }
485 
486  // Construct new matrix
487  local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
488 
489  A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
490 
491  isInternallyChanged_ = true;
492 
493  // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
494  // didn't want to break any invariants, especially considering
495  // that this branch is likely poorly tested.
496  auto newLclTriStructure =
497  determineLocalTriangularStructure (newLocalMatrix.graph,
498  newRowMap->getLocalMap (),
499  newColMap->getLocalMap (),
500  ignoreMapsForTriStructure);
501  const LO newLclNumRows = newRowMap->getLocalNumElements ();
502  this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
503  this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" :
504  (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
505  }
506 
507  if (Teuchos::nonnull (htsImpl_))
508  {
509  htsImpl_->initialize (*A_crs_);
510  isInternallyChanged_ = true;
511  }
512 
513  const bool ksptrsv_valid_uplo = (this->uplo_ != "N");
514  if (Teuchos::nonnull(kh_) && ksptrsv_valid_uplo && this->diag_ != "U")
515  {
516  this->isKokkosKernelsSptrsv_ = true;
517  }
518  else
519  {
520  this->isKokkosKernelsSptrsv_ = false;
521  }
522 
523  isInitialized_ = true;
524  ++numInitialize_;
525 }
526 
527 template<class MatrixType>
528 void
531 {
532  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
533  if (! out_.is_null ()) {
534  *out_ << ">>> DEBUG " << prefix << std::endl;
535  }
536 
537  TEUCHOS_TEST_FOR_EXCEPTION
538  (A_.is_null (), std::runtime_error, prefix << "You must call "
539  "setMatrix() with a nonnull input matrix before you may call "
540  "initialize() or compute().");
541  TEUCHOS_TEST_FOR_EXCEPTION
542  (A_crs_.is_null (), std::logic_error, prefix << "A_ is nonnull, but "
543  "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
544  // At this point, the matrix MUST be fillComplete.
545  TEUCHOS_TEST_FOR_EXCEPTION
546  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
547  "method, the matrix must be fill complete. It is not.");
548 
549  if (! isInitialized_) {
550  initialize ();
551  }
552  TEUCHOS_TEST_FOR_EXCEPTION
553  (! isInitialized_, std::logic_error, prefix << "initialize() should have "
554  "been called by this point, but isInitialized_ is false. "
555  "Please report this bug to the Ifpack2 developers.");
556 
557  if (! isComputed_) {//Only compute if not computed before
558  if (Teuchos::nonnull (htsImpl_))
559  htsImpl_->compute (*A_crs_, out_);
560 
561  if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_)
562  {
563  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
564  auto Alocal = A_crs->getLocalMatrixDevice();
565  auto ptr = Alocal.graph.row_map;
566  auto ind = Alocal.graph.entries;
567  auto val = Alocal.values;
568 
569  auto numRows = Alocal.numRows();
570  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
571 
572  // Destroy existing handle and recreate in case new matrix provided - requires rerunning symbolic analysis
573  kh_->destroy_sptrsv_handle();
574 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
575  // CuSparse only supports int type ordinals
576  // and scalar types of float, double, float complex and double complex
577  if (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
578  std::is_same<int, local_ordinal_type>::value &&
579  (std::is_same<scalar_type, float>::value ||
580  std::is_same<scalar_type, double>::value ||
581  std::is_same<scalar_type, Kokkos::complex<float>>::value ||
582  std::is_same<scalar_type, Kokkos::complex<double>>::value))
583  {
584  kh_->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE, numRows, is_lower_tri);
585  }
586  else
587 #endif
588  {
589  kh_->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1, numRows, is_lower_tri);
590  }
591  KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
592  }
593 
594  isComputed_ = true;
595  ++numCompute_;
596  }
597 }
598 
599 template<class MatrixType>
601 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type,
603  Tpetra::MultiVector<scalar_type, local_ordinal_type,
605  Teuchos::ETransp mode,
606  scalar_type alpha,
607  scalar_type beta) const
608 {
609  using Teuchos::RCP;
610  using Teuchos::rcp;
611  using Teuchos::rcpFromRef;
612  typedef scalar_type ST;
613  typedef Teuchos::ScalarTraits<ST> STS;
614  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
615  if (! out_.is_null ()) {
616  *out_ << ">>> DEBUG " << prefix;
617  if (A_crs_.is_null ()) {
618  *out_ << "A_crs_ is null!" << std::endl;
619  }
620  else {
621  Teuchos::RCP<const crs_matrix_type> A_crs =
622  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
623  const std::string uplo = this->uplo_;
624  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
625  (mode == Teuchos::TRANS ? "T" : "N");
626  const std::string diag = this->diag_;
627  *out_ << "uplo=\"" << uplo
628  << "\", trans=\"" << trans
629  << "\", diag=\"" << diag << "\"" << std::endl;
630  }
631  }
632 
633  TEUCHOS_TEST_FOR_EXCEPTION
634  (! isComputed (), std::runtime_error, prefix << "If compute() has not yet "
635  "been called, or if you have changed the matrix via setMatrix(), you must "
636  "call compute() before you may call this method.");
637  // If isComputed() is true, it's impossible for the matrix to be
638  // null, or for it not to be a Tpetra::CrsMatrix.
639  TEUCHOS_TEST_FOR_EXCEPTION
640  (A_.is_null (), std::logic_error, prefix << "A_ is null. "
641  "Please report this bug to the Ifpack2 developers.");
642  TEUCHOS_TEST_FOR_EXCEPTION
643  (A_crs_.is_null (), std::logic_error, prefix << "A_crs_ is null. "
644  "Please report this bug to the Ifpack2 developers.");
645  // However, it _is_ possible that the user called resumeFill() on
646  // the matrix, after calling compute(). This is NOT allowed.
647  TEUCHOS_TEST_FOR_EXCEPTION
648  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
649  "method, the matrix must be fill complete. It is not. This means that "
650  " you must have called resumeFill() on the matrix before calling apply(). "
651  "This is NOT allowed. Note that this class may use the matrix's data in "
652  "place without copying it. Thus, you cannot change the matrix and expect "
653  "the solver to stay the same. If you have changed the matrix, first call "
654  "fillComplete() on it, then call compute() on this object, before you call"
655  " apply(). You do NOT need to call setMatrix, as long as the matrix "
656  "itself (that is, its address in memory) is the same.");
657 
658  auto G = A_crs_->getGraph ();
659  TEUCHOS_TEST_FOR_EXCEPTION
660  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
661  "but A_crs_'s RowGraph G is null. "
662  "Please report this bug to the Ifpack2 developers.");
663  auto importer = G->getImporter ();
664  auto exporter = G->getExporter ();
665 
666  if (! importer.is_null ()) {
667  if (X_colMap_.is_null () || X_colMap_->getNumVectors () != X.getNumVectors ()) {
668  X_colMap_ = rcp (new MV (importer->getTargetMap (), X.getNumVectors ()));
669  }
670  else {
671  X_colMap_->putScalar (STS::zero ());
672  }
673  // See discussion of Github Issue #672 for why the Import needs to
674  // use the ZERO CombineMode. The case where the Export is
675  // nontrivial is likely never exercised.
676  X_colMap_->doImport (X, *importer, Tpetra::ZERO);
677  }
678  RCP<const MV> X_cur = importer.is_null () ? rcpFromRef (X) :
679  Teuchos::rcp_const_cast<const MV> (X_colMap_);
680 
681  if (! exporter.is_null ()) {
682  if (Y_rowMap_.is_null () || Y_rowMap_->getNumVectors () != Y.getNumVectors ()) {
683  Y_rowMap_ = rcp (new MV (exporter->getSourceMap (), Y.getNumVectors ()));
684  }
685  else {
686  Y_rowMap_->putScalar (STS::zero ());
687  }
688  Y_rowMap_->doExport (Y, *importer, Tpetra::ADD);
689  }
690  RCP<MV> Y_cur = exporter.is_null () ? rcpFromRef (Y) : Y_rowMap_;
691 
692  localApply (*X_cur, *Y_cur, mode, alpha, beta);
693 
694  if (! exporter.is_null ()) {
695  Y.putScalar (STS::zero ());
696  Y.doExport (*Y_cur, *exporter, Tpetra::ADD);
697  }
698 
699  ++numApply_;
700 }
701 
702 template<class MatrixType>
703 void
705 localTriangularSolve (const MV& Y,
706  MV& X,
707  const Teuchos::ETransp mode) const
708 {
709  using Teuchos::CONJ_TRANS;
710  using Teuchos::NO_TRANS;
711  using Teuchos::TRANS;
712  const char tfecfFuncName[] = "localTriangularSolve: ";
713 
714  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
715  (! A_crs_->isFillComplete (), std::runtime_error,
716  "The matrix is not fill complete.");
717  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
718  (! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument,
719  "X and Y must be constant stride.");
720  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
721  ( A_crs_->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
722  "The matrix is neither upper triangular or lower triangular. "
723  "You may only call this method if the matrix is triangular. "
724  "Remember that this is a local (per MPI process) property, and that "
725  "Tpetra only knows how to do a local (per process) triangular solve.");
726  using STS = Teuchos::ScalarTraits<scalar_type>;
727  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
728  (STS::isComplex && mode == TRANS, std::logic_error, "This method does "
729  "not currently support non-conjugated transposed solve (mode == "
730  "Teuchos::TRANS) for complex scalar types.");
731 
732  const std::string uplo = this->uplo_;
733  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
734  (mode == Teuchos::TRANS ? "T" : "N");
735 
736  if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans == "N")
737  {
738  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (this->A_);
739  auto A_lclk = A_crs->getLocalMatrixDevice ();
740  auto ptr = A_lclk.graph.row_map;
741  auto ind = A_lclk.graph.entries;
742  auto val = A_lclk.values;
743 
744  const size_t numVecs = std::min (X.getNumVectors (), Y.getNumVectors ());
745 
746  for (size_t j = 0; j < numVecs; ++j) {
747  auto X_j = X.getVectorNonConst (j);
748  auto Y_j = Y.getVector (j);
749  auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
750  auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
751  auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
752  auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
753  KokkosSparse::Experimental::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
754  // TODO is this fence needed...
755  typename k_handle::HandleExecSpace().fence();
756  }
757  }
758  else
759  {
760  const std::string diag = this->diag_;
761  // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
762  // sequential, host-only code. See
763  // https://github.com/kokkos/kokkos-kernels/issues/48.
764 
765  auto A_lcl = this->A_crs_->getLocalMatrixHost ();
766 
767  if (X.isConstantStride () && Y.isConstantStride ()) {
768  auto X_lcl = X.getLocalViewHost (Tpetra::Access::ReadWrite);
769  auto Y_lcl = Y.getLocalViewHost (Tpetra::Access::ReadOnly);
770  KokkosSparse::trsv (uplo.c_str (), trans.c_str (), diag.c_str (),
771  A_lcl, Y_lcl, X_lcl);
772  }
773  else {
774  const size_t numVecs =
775  std::min (X.getNumVectors (), Y.getNumVectors ());
776  for (size_t j = 0; j < numVecs; ++j) {
777  auto X_j = X.getVectorNonConst (j);
778  auto Y_j = Y.getVector (j);
779  auto X_lcl = X_j->getLocalViewHost (Tpetra::Access::ReadWrite);
780  auto Y_lcl = Y_j->getLocalViewHost (Tpetra::Access::ReadOnly);
781  KokkosSparse::trsv (uplo.c_str (), trans.c_str (),
782  diag.c_str (), A_lcl, Y_lcl, X_lcl);
783  }
784  }
785  }
786 }
787 
788 template<class MatrixType>
789 void
790 LocalSparseTriangularSolver<MatrixType>::
791 localApply (const MV& X,
792  MV& Y,
793  const Teuchos::ETransp mode,
794  const scalar_type& alpha,
795  const scalar_type& beta) const
796 {
797  if (mode == Teuchos::NO_TRANS && Teuchos::nonnull (htsImpl_) &&
798  htsImpl_->isComputed ()) {
799  htsImpl_->localApply (X, Y, mode, alpha, beta);
800  return;
801  }
802 
803  using Teuchos::RCP;
804  typedef scalar_type ST;
805  typedef Teuchos::ScalarTraits<ST> STS;
806 
807  if (beta == STS::zero ()) {
808  if (alpha == STS::zero ()) {
809  Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y)
810  }
811  else { // alpha != 0
812  this->localTriangularSolve (X, Y, mode);
813  if (alpha != STS::one ()) {
814  Y.scale (alpha);
815  }
816  }
817  }
818  else { // beta != 0
819  if (alpha == STS::zero ()) {
820  Y.scale (beta); // Y := beta * Y
821  }
822  else { // alpha != 0
823  MV Y_tmp (Y, Teuchos::Copy);
824  this->localTriangularSolve (X, Y_tmp, mode); // Y_tmp := M * X
825  Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
826  }
827  }
828 }
829 
830 
831 template <class MatrixType>
832 int
835  return numInitialize_;
836 }
837 
838 template <class MatrixType>
839 int
841 getNumCompute () const {
842  return numCompute_;
843 }
844 
845 template <class MatrixType>
846 int
848 getNumApply () const {
849  return numApply_;
850 }
851 
852 template <class MatrixType>
853 double
856  return initializeTime_;
857 }
858 
859 template<class MatrixType>
860 double
862 getComputeTime () const {
863  return computeTime_;
864 }
865 
866 template<class MatrixType>
867 double
869 getApplyTime() const {
870  return applyTime_;
871 }
872 
873 template <class MatrixType>
874 std::string
876 description () const
877 {
878  std::ostringstream os;
879 
880  // Output is a valid YAML dictionary in flow style. If you don't
881  // like everything on a single line, you should call describe()
882  // instead.
883  os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
884  if (this->getObjectLabel () != "") {
885  os << "Label: \"" << this->getObjectLabel () << "\", ";
886  }
887  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
888  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
889 
890  if (A_.is_null ()) {
891  os << "Matrix: null";
892  }
893  else {
894  os << "Matrix dimensions: ["
895  << A_->getGlobalNumRows () << ", "
896  << A_->getGlobalNumCols () << "]"
897  << ", Number of nonzeros: " << A_->getGlobalNumEntries();
898  }
899 
900  if (Teuchos::nonnull (htsImpl_))
901  os << ", HTS computed: " << (htsImpl_->isComputed () ? "true" : "false");
902  os << "}";
903  return os.str ();
904 }
905 
906 template <class MatrixType>
908 describe (Teuchos::FancyOStream& out,
909  const Teuchos::EVerbosityLevel verbLevel) const
910 {
911  using std::endl;
912  // Default verbosity level is VERB_LOW, which prints only on Process
913  // 0 of the matrix's communicator.
914  const Teuchos::EVerbosityLevel vl
915  = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
916 
917  if (vl != Teuchos::VERB_NONE) {
918  // Print only on Process 0 in the matrix's communicator. If the
919  // matrix is null, though, we have to get the communicator from
920  // somewhere, so we ask Tpetra for its default communicator. If
921  // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
922  auto comm = A_.is_null () ?
923  Tpetra::getDefaultComm () :
924  A_->getComm ();
925 
926  // Users aren't supposed to do anything with the matrix on
927  // processes where its communicator is null.
928  if (! comm.is_null () && comm->getRank () == 0) {
929  // By convention, describe() should always begin with a tab.
930  Teuchos::OSTab tab0 (out);
931  // Output is in YAML format. We have to escape the class name,
932  // because it has a colon.
933  out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
934  Teuchos::OSTab tab1 (out);
935  out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name () << endl
936  << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name () << endl
937  << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name () << endl
938  << "Node: " << Teuchos::TypeNameTraits<node_type>::name () << endl;
939  }
940  }
941 }
942 
943 template <class MatrixType>
944 Teuchos::RCP<const typename LocalSparseTriangularSolver<MatrixType>::map_type>
946 getDomainMap () const
947 {
948  TEUCHOS_TEST_FOR_EXCEPTION
949  (A_.is_null (), std::runtime_error,
950  "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
951  "The matrix is null. Please call setMatrix() with a nonnull input "
952  "before calling this method.");
953  return A_->getDomainMap ();
954 }
955 
956 template <class MatrixType>
957 Teuchos::RCP<const typename LocalSparseTriangularSolver<MatrixType>::map_type>
959 getRangeMap () const
960 {
961  TEUCHOS_TEST_FOR_EXCEPTION
962  (A_.is_null (), std::runtime_error,
963  "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
964  "The matrix is null. Please call setMatrix() with a nonnull input "
965  "before calling this method.");
966  return A_->getRangeMap ();
967 }
968 
969 template<class MatrixType>
971 setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
972 {
973  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
974 
975  // If the pointer didn't change, do nothing. This is reasonable
976  // because users are supposed to call this method with the same
977  // object over all participating processes, and pointer identity
978  // implies object identity.
979  if (A.getRawPtr () != A_.getRawPtr () || isInternallyChanged_) {
980  // Check in serial or one-process mode if the matrix is square.
981  TEUCHOS_TEST_FOR_EXCEPTION
982  (! A.is_null () && A->getComm ()->getSize () == 1 &&
983  A->getLocalNumRows () != A->getLocalNumCols (),
984  std::runtime_error, prefix << "If A's communicator only contains one "
985  "process, then A must be square. Instead, you provided a matrix A with "
986  << A->getLocalNumRows () << " rows and " << A->getLocalNumCols ()
987  << " columns.");
988 
989  // It's legal for A to be null; in that case, you may not call
990  // initialize() until calling setMatrix() with a nonnull input.
991  // Regardless, setting the matrix invalidates the preconditioner.
992  isInitialized_ = false;
993  isComputed_ = false;
994 
995  if (A.is_null ()) {
996  A_crs_ = Teuchos::null;
997  A_ = Teuchos::null;
998  }
999  else { // A is not null
1000  Teuchos::RCP<const crs_matrix_type> A_crs =
1001  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
1002  TEUCHOS_TEST_FOR_EXCEPTION
1003  (A_crs.is_null (), std::invalid_argument, prefix <<
1004  "The input matrix A is not a Tpetra::CrsMatrix.");
1005  A_crs_ = A_crs;
1006  A_ = A;
1007  }
1008 
1009  if (Teuchos::nonnull (htsImpl_))
1010  htsImpl_->reset ();
1011  } // pointers are not the same
1012 
1013  //NOTE (Nov-09-2022):
1014  //For Cuda >= 11.3 (using cusparseSpSV), always call compute before apply,
1015  //even when matrix values are changed with the same sparsity pattern.
1016  //So, force isComputed_ to FALSE here
1017 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1018  isComputed_ = false;
1019 #endif
1020 }
1021 
1022 } // namespace Ifpack2
1023 
1024 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S,LO,GO,N) \
1025  template class Ifpack2::LocalSparseTriangularSolver< Tpetra::RowMatrix<S, LO, GO, N> >;
1026 
1027 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:601
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:834
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:946
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:959
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:841
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:869
void compute()
"Numeric" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:530
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:908
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:95
Ifpack2 implementation details.
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:97
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:222
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:862
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:848
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:79
void initialize()
"Symbolic" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:374
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:102
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner&#39;s matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:971
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:855
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:93
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:108
void setParameters(const Teuchos::ParameterList &params)
Set this object&#39;s parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:330
MatrixType::scalar_type scalar_type
Type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:91
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:876
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:74
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:319
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:282