Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsMatrixMultiplyOp.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSMATRIXMULTIPLYOP_HPP
43 #define TPETRA_CRSMATRIXMULTIPLYOP_HPP
44 
49 
51 #include "Tpetra_CrsMatrix.hpp"
52 #include "Tpetra_Util.hpp"
55 #include "Tpetra_LocalCrsMatrixOperator.hpp"
56 
57 namespace Tpetra {
58 
95  template <class Scalar,
96  class MatScalar,
97  class LocalOrdinal,
98  class GlobalOrdinal,
99  class Node>
101  public Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>
102  {
103  public:
105  using crs_matrix_type =
109 
110  private:
111  using local_matrix_type =
113 
114  public:
116 
117 
122  CrsMatrixMultiplyOp (const Teuchos::RCP<const crs_matrix_type>& A) :
123  matrix_ (A),
124  localMultiply_ (std::make_shared<local_matrix_type> (A->getLocalMatrix ()))
125  {}
126 
128  ~CrsMatrixMultiplyOp () override = default;
129 
131 
133 
136  void
139  Teuchos::ETransp mode = Teuchos::NO_TRANS,
140  Scalar alpha = Teuchos::ScalarTraits<Scalar>::one (),
141  Scalar beta = Teuchos::ScalarTraits<Scalar>::zero ()) const override
142  {
143  TEUCHOS_TEST_FOR_EXCEPTION
144  (! matrix_->isFillComplete (), std::runtime_error,
145  Teuchos::typeName (*this) << "::apply(): underlying matrix is not fill-complete.");
146  TEUCHOS_TEST_FOR_EXCEPTION
147  (X.getNumVectors () != Y.getNumVectors (), std::runtime_error,
148  Teuchos::typeName (*this) << "::apply(X,Y): X and Y must have the same number of vectors.");
149  TEUCHOS_TEST_FOR_EXCEPTION
150  (Teuchos::ScalarTraits<Scalar>::isComplex && mode == Teuchos::TRANS, std::logic_error,
151  Teuchos::typeName (*this) << "::apply() does not currently support transposed multiplications for complex scalar types.");
152  if (mode == Teuchos::NO_TRANS) {
153  applyNonTranspose (X, Y, alpha, beta);
154  }
155  else {
156  applyTranspose (X, Y, mode, alpha, beta);
157  }
158  }
159 
199  void
203  const Scalar& dampingFactor,
204  const ESweepDirection direction,
205  const int numSweeps) const
206  {
207  using Teuchos::null;
208  using Teuchos::RCP;
209  using Teuchos::rcp;
210  using Teuchos::rcpFromRef;
211  using Teuchos::rcp_const_cast;
212  typedef Scalar OS;
213  typedef Export<LocalOrdinal, GlobalOrdinal, Node> export_type;
214  typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type;
216 
217  TEUCHOS_TEST_FOR_EXCEPTION
218  (numSweeps < 0, std::invalid_argument,
219  "gaussSeidel: The number of sweeps must be nonnegative, "
220  "but you provided numSweeps = " << numSweeps << " < 0.");
221 
222  // Translate from global to local sweep direction.
223  // While doing this, validate the input.
224  ESweepDirection localDirection;
225  if (direction == Forward) {
226  localDirection = Forward;
227  }
228  else if (direction == Backward) {
229  localDirection = Backward;
230  }
231  else if (direction == Symmetric) {
232  // We'll control local sweep direction manually.
233  localDirection = Forward;
234  }
235  else {
236  TEUCHOS_TEST_FOR_EXCEPTION
237  (true, std::invalid_argument,
238  "gaussSeidel: The 'direction' enum does not have any of its valid "
239  "values: Forward, Backward, or Symmetric.");
240  }
241 
242  if (numSweeps == 0) {
243  return; // Nothing to do.
244  }
245 
246  // We don't need the Export object because this method assumes
247  // that the row, domain, and range Maps are the same. We do need
248  // the Import object, if there is one, though.
249  RCP<const import_type> importer = matrix_->getGraph()->getImporter();
250  RCP<const export_type> exporter = matrix_->getGraph()->getExporter();
251  TEUCHOS_TEST_FOR_EXCEPTION
252  (! exporter.is_null (), std::runtime_error,
253  "Tpetra's gaussSeidel implementation requires that the row, domain, "
254  "and range Maps be the same. This cannot be the case, because the "
255  "matrix has a nontrivial Export object.");
256 
257  RCP<const map_type> domainMap = matrix_->getDomainMap ();
258  RCP<const map_type> rangeMap = matrix_->getRangeMap ();
259  RCP<const map_type> rowMap = matrix_->getGraph ()->getRowMap ();
260  RCP<const map_type> colMap = matrix_->getGraph ()->getColMap ();
261 
262  const bool debug = ::Tpetra::Details::Behavior::debug ();
263  if (debug) {
264  // The relation 'isSameAs' is transitive. It's also a
265  // collective, so we don't have to do a "shared" test for
266  // exception (i.e., a global reduction on the test value).
267  TEUCHOS_TEST_FOR_EXCEPTION
268  (! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
269  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
270  "multivector X be in the domain Map of the matrix.");
271  TEUCHOS_TEST_FOR_EXCEPTION
272  (! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
273  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
274  "B be in the range Map of the matrix.");
275  TEUCHOS_TEST_FOR_EXCEPTION
276  (! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
277  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
278  "D be in the row Map of the matrix.");
279  TEUCHOS_TEST_FOR_EXCEPTION
280  (! rowMap->isSameAs (*rangeMap), std::runtime_error,
281  "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the "
282  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
283  TEUCHOS_TEST_FOR_EXCEPTION
284  (! domainMap->isSameAs (*rangeMap), std::runtime_error,
285  "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and "
286  "the range Map of the matrix be the same.");
287  }
288 
289  // If B is not constant stride, copy it into a constant stride
290  // multivector. We'l handle the right-hand side B first and deal
291  // with X right before the sweeps, to improve locality of the
292  // first sweep. (If the problem is small enough, then that will
293  // hopefully keep more of the entries of X in cache. This
294  // optimizes for the typical case of a small number of sweeps.)
295  RCP<const OSMV> B_in;
296  if (B.isConstantStride()) {
297  B_in = rcpFromRef (B);
298  }
299  else {
300  // The range Map and row Map are the same in this case, so we
301  // can use the (possibly cached) row Map multivector to store a
302  // constant stride copy of B. We don't have to copy back, since
303  // Gauss-Seidel won't modify B.
304  RCP<OSMV> B_in_nonconst = getRowMapMultiVector (B, true);
305  deep_copy (*B_in_nonconst, B);
306  B_in = rcp_const_cast<const OSMV> (B_in_nonconst);
307 
309  (! B.isConstantStride (), std::runtime_error,
310  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
311  "requires that X and B both have constant stride. Since B does not "
312  "have constant stride, we had to make a copy. This is a limitation of "
313  "the current implementation and not your fault, but we still report it "
314  "as an efficiency warning for your information.");
315  }
316 
317  // If X is not constant stride, copy it into a constant stride
318  // multivector. Also, make the column Map multivector X_colMap,
319  // and its domain Map view X_domainMap. (X actually must be a
320  // domain Map view of a column Map multivector; exploit this, if X
321  // has constant stride.)
322 
323  RCP<OSMV> X_domainMap;
324  RCP<OSMV> X_colMap;
325  bool copiedInput = false;
326 
327  if (importer.is_null ()) { // Domain and column Maps are the same.
328  if (X.isConstantStride ()) {
329  X_domainMap = rcpFromRef (X);
330  X_colMap = X_domainMap;
331  copiedInput = false;
332  }
333  else {
334  // Get a temporary column Map multivector, make a domain Map
335  // view of it, and copy X into the domain Map view. We have
336  // to copy here because we won't be doing Import operations.
337  X_colMap = getColumnMapMultiVector (X, true);
338  X_domainMap = X_colMap; // Domain and column Maps are the same.
339  deep_copy (*X_domainMap, X); // Copy X into the domain Map view.
340  copiedInput = true;
342  (! X.isConstantStride (), std::runtime_error,
343  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
344  "requires that X and B both have constant stride. Since X does not "
345  "have constant stride, we had to make a copy. This is a limitation of "
346  "the current implementation and not your fault, but we still report it "
347  "as an efficiency warning for your information.");
348  }
349  }
350  else { // We will be doing Import operations in the sweeps.
351  if (X.isConstantStride ()) {
352  X_domainMap = rcpFromRef (X);
353  // This kernel assumes that X is a domain Map view of a
354  // column Map multivector. We will only check if this is
355  // valid in debug mode.
356  X_colMap = X_domainMap->offsetViewNonConst (colMap, 0);
357 
358  // Do the first Import for the first sweep. This simplifies
359  // the logic in the sweeps.
360  X_colMap->doImport (X, *importer, INSERT);
361  copiedInput = false;
362  }
363  else {
364  // Get a temporary column Map multivector X_colMap, and make a
365  // domain Map view X_domainMap of it. Instead of copying, we
366  // do an Import from X into X_domainMap. This saves us a
367  // copy, since the Import has to copy the data anyway.
368  X_colMap = getColumnMapMultiVector (X, true);
369  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
370  X_colMap->doImport (X, *importer, INSERT);
371  copiedInput = true;
373  (! X.isConstantStride (), std::runtime_error,
374  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
375  "requires that X and B both have constant stride. Since X does not "
376  "have constant stride, we had to make a copy. This is a limitation of "
377  "the current implementation and not your fault, but we still report it "
378  "as an efficiency warning for your information.");
379  }
380  }
381 
382  for (int sweep = 0; sweep < numSweeps; ++sweep) {
383  if (! importer.is_null () && sweep > 0) {
384  // We already did the first Import for the zeroth sweep.
385  X_colMap->doImport (*X_domainMap, *importer, INSERT);
386  }
387 
388  // Do local Gauss-Seidel.
389  if (direction != Symmetric) {
390  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
391  dampingFactor,
392  localDirection);
393  }
394  else { // direction == Symmetric
395  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
396  dampingFactor,
397  Forward);
398  // Communicate again before the Backward sweep.
399  if (! importer.is_null ()) {
400  X_colMap->doImport (*X_domainMap, *importer, INSERT);
401  }
402  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
403  dampingFactor,
404  Backward);
405  }
406  }
407 
408  if (copiedInput) {
409  deep_copy (X, *X_domainMap); // Copy back: X_domainMap -> X.
410  }
411  }
412 
437  void
441  const Scalar& dampingFactor,
442  const ESweepDirection direction,
443  const int numSweeps) const
444  {
445  using Teuchos::null;
446  using Teuchos::RCP;
447  using Teuchos::rcp;
448  using Teuchos::rcpFromRef;
449  using Teuchos::rcp_const_cast;
450  typedef Scalar OS;
451  typedef Export<LocalOrdinal, GlobalOrdinal, Node> export_type;
452  typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type;
454 
455  TEUCHOS_TEST_FOR_EXCEPTION
456  (numSweeps < 0, std::invalid_argument,
457  "gaussSeidelCopy: The number of sweeps must be nonnegative, "
458  "but you provided numSweeps = " << numSweeps << " < 0.");
459 
460  // Translate from global to local sweep direction.
461  // While doing this, validate the input.
462  ESweepDirection localDirection;
463  if (direction == Forward) {
464  localDirection = Forward;
465  }
466  else if (direction == Backward) {
467  localDirection = Backward;
468  }
469  else if (direction == Symmetric) {
470  // We'll control local sweep direction manually.
471  localDirection = Forward;
472  }
473  else {
474  TEUCHOS_TEST_FOR_EXCEPTION
475  (true, std::invalid_argument,
476  "gaussSeidelCopy: The 'direction' enum does not have any of its "
477  "valid values: Forward, Backward, or Symmetric.");
478  }
479 
480  if (numSweeps == 0) {
481  return;
482  }
483 
484  RCP<const import_type> importer = matrix_->getGraph()->getImporter();
485  RCP<const export_type> exporter = matrix_->getGraph()->getExporter();
486  TEUCHOS_TEST_FOR_EXCEPTION
487  (! exporter.is_null (),
488  std::runtime_error,
489  "Tpetra's gaussSeidelCopy implementation requires that the row, domain, "
490  "and range Maps be the same. This cannot be the case, because the "
491  "matrix has a nontrivial Export object.");
492 
493  RCP<const map_type> domainMap = matrix_->getDomainMap ();
494  RCP<const map_type> rangeMap = matrix_->getRangeMap ();
495  RCP<const map_type> rowMap = matrix_->getGraph ()->getRowMap ();
496  RCP<const map_type> colMap = matrix_->getGraph ()->getColMap ();
497 
498  const bool debug = ::Tpetra::Details::Behavior::debug ();
499  if (debug) {
500  // The relation 'isSameAs' is transitive. It's also a
501  // collective, so we don't have to do a "shared" test for
502  // exception (i.e., a global reduction on the test value).
503  TEUCHOS_TEST_FOR_EXCEPTION
504  (! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
505  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
506  "multivector X be in the domain Map of the matrix.");
507  TEUCHOS_TEST_FOR_EXCEPTION
508  (! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
509  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
510  "B be in the range Map of the matrix.");
511  TEUCHOS_TEST_FOR_EXCEPTION
512  (! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
513  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
514  "D be in the row Map of the matrix.");
515  TEUCHOS_TEST_FOR_EXCEPTION
516  (! rowMap->isSameAs (*rangeMap), std::runtime_error,
517  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
518  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
519  TEUCHOS_TEST_FOR_EXCEPTION
520  (! domainMap->isSameAs (*rangeMap), std::runtime_error,
521  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
522  "the range Map of the matrix be the same.");
523  }
524 
525  // Fetch a (possibly cached) temporary column Map multivector
526  // X_colMap, and a domain Map view X_domainMap of it. Both have
527  // constant stride by construction. We know that the domain Map
528  // must include the column Map, because our Gauss-Seidel kernel
529  // requires that the row Map, domain Map, and range Map are all
530  // the same, and that each process owns all of its own diagonal
531  // entries of the matrix.
532 
533  RCP<OSMV> X_colMap;
534  RCP<OSMV> X_domainMap;
535  bool copyBackOutput = false;
536  if (importer.is_null ()) {
537  if (X.isConstantStride ()) {
538  X_colMap = rcpFromRef (X);
539  X_domainMap = rcpFromRef (X);
540  // No need to copy back to X at end.
541  }
542  else { // We must copy X into a constant stride multivector.
543  // Just use the cached column Map multivector for that.
544  X_colMap = getColumnMapMultiVector (X, true);
545  // X_domainMap is always a domain Map view of the column Map
546  // multivector. In this case, the domain and column Maps are
547  // the same, so X_domainMap _is_ X_colMap.
548  X_domainMap = X_colMap;
549  deep_copy (*X_domainMap, X); // Copy X into constant stride multivector
550  copyBackOutput = true; // Don't forget to copy back at end.
552  (! X.isConstantStride (), std::runtime_error,
553  "gaussSeidelCopy: The current implementation of the Gauss-Seidel "
554  "kernel requires that X and B both have constant stride. Since X "
555  "does not have constant stride, we had to make a copy. This is a "
556  "limitation of the current implementation and not your fault, but we "
557  "still report it as an efficiency warning for your information.");
558  }
559  }
560  else { // Column Map and domain Map are _not_ the same.
561  X_colMap = getColumnMapMultiVector (X);
562  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
563 
564  // We could just copy X into X_domainMap. However, that wastes
565  // a copy, because the Import also does a copy (plus
566  // communication). Since the typical use case for Gauss-Seidel
567  // is a small number of sweeps (2 is typical), we don't want to
568  // waste that copy. Thus, we do the Import here, and skip the
569  // first Import in the first sweep. Importing directly from X
570  // effects the copy into X_domainMap (which is a view of
571  // X_colMap).
572  X_colMap->doImport (X, *importer, INSERT);
573 
574  copyBackOutput = true; // Don't forget to copy back at end.
575  }
576 
577  // The Gauss-Seidel / SOR kernel expects multivectors of constant
578  // stride. X_colMap is by construction, but B might not be. If
579  // it's not, we have to make a copy.
580  RCP<const OSMV> B_in;
581  if (B.isConstantStride ()) {
582  B_in = rcpFromRef (B);
583  }
584  else {
585  // Range Map and row Map are the same in this case, so we can
586  // use the cached row Map multivector to store a constant stride
587  // copy of B.
588  RCP<OSMV> B_in_nonconst = getRowMapMultiVector (B, true);
589  *B_in_nonconst = B;
590  B_in = rcp_const_cast<const OSMV> (B_in_nonconst);
591 
593  (! B.isConstantStride (), std::runtime_error,
594  "gaussSeidelCopy: The current implementation requires that B have "
595  "constant stride. Since B does not have constant stride, we had to "
596  "copy it into a separate constant-stride multivector. This is a "
597  "limitation of the current implementation and not your fault, but we "
598  "still report it as an efficiency warning for your information.");
599  }
600 
601  for (int sweep = 0; sweep < numSweeps; ++sweep) {
602  if (! importer.is_null () && sweep > 0) {
603  // We already did the first Import for the zeroth sweep above.
604  X_colMap->doImport (*X_domainMap, *importer, INSERT);
605  }
606 
607  // Do local Gauss-Seidel.
608  if (direction != Symmetric) {
609  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
610  dampingFactor,
611  localDirection);
612  }
613  else { // direction == Symmetric
614  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
615  dampingFactor,
616  Forward);
617  // Communicate again before the Backward sweep, if necessary.
618  if (! importer.is_null ()) {
619  X_colMap->doImport (*X_domainMap, *importer, INSERT);
620  }
621  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
622  dampingFactor,
623  Backward);
624  }
625  }
626 
627  if (copyBackOutput) {
628  deep_copy (X, *X_domainMap); // Copy result back into X.
629  }
630  }
631 
637  bool hasTransposeApply() const override {
638  return true;
639  }
640 
642  Teuchos::RCP<const map_type> getDomainMap () const override {
643  return matrix_->getDomainMap ();
644  }
645 
647  Teuchos::RCP<const map_type> getRangeMap () const override {
648  return matrix_->getRangeMap ();
649  }
650 
652 
653  protected:
655 
657  const Teuchos::RCP<const crs_matrix_type> matrix_;
658 
660  LocalCrsMatrixOperator<Scalar, MatScalar, typename
662 
675  mutable Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > importMV_;
676 
689  mutable Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > exportMV_;
690 
693  void
696  Teuchos::ETransp mode,
697  Scalar alpha,
698  Scalar beta) const
699  {
700  using Teuchos::null;
701  using Teuchos::RCP;
702  using Teuchos::rcp;
703  using export_type = Export<LocalOrdinal, GlobalOrdinal, Node>;
704  using import_type = Import<LocalOrdinal, GlobalOrdinal, Node>;
705  using STS = Teuchos::ScalarTraits<Scalar>;
706 
707  const size_t numVectors = X_in.getNumVectors();
708  // because of Views, it is difficult to determine if X and Y point to the same data.
709  // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning)
710  // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y
711  RCP<const import_type> importer = matrix_->getGraph()->getImporter();
712  RCP<const export_type> exporter = matrix_->getGraph()->getExporter();
713 
714  // some parameters for below
715  const bool Y_is_replicated = ! Y_in.isDistributed ();
716  const bool Y_is_overwritten = (beta == STS::zero ());
717  const int myRank = matrix_->getComm ()->getRank ();
718  if (Y_is_replicated && myRank != 0) {
719  beta = STS::zero ();
720  }
721 
722  // access X indirectly, in case we need to create temporary storage
723  RCP<const MV> X;
724  // currently, cannot multiply from multivector of non-constant stride
725  if (! X_in.isConstantStride () && importer == null) {
726  // generate a strided copy of X_in
727  X = Teuchos::rcp (new MV (X_in, Teuchos::Copy));
728  }
729  else {
730  // just temporary, so this non-owning RCP is okay
731  X = Teuchos::rcpFromRef (X_in);
732  }
733 
734  // set up import/export temporary multivectors
735  if (importer != null) {
736  if (importMV_ != null && importMV_->getNumVectors () != numVectors) {
737  importMV_ = null;
738  }
739  if (importMV_ == null) {
740  importMV_ = rcp (new MV (matrix_->getColMap (), numVectors));
741  }
742  }
743  if (exporter != null) {
744  if (exportMV_ != null && exportMV_->getNumVectors () != numVectors) {
745  exportMV_ = null;
746  }
747  if (exportMV_ == null) {
748  exportMV_ = rcp (new MV (matrix_->getRowMap (), numVectors));
749  }
750  }
751 
752  // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
753  if (exporter != null) {
754  exportMV_->doImport(X_in,*exporter,INSERT);
755  X = exportMV_; // multiply out of exportMV_
756  }
757 
758  auto X_lcl = X->getLocalViewDevice ();
759 
760  // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
761  // We will compute solution into the to-be-exported MV; get a view
762  if (importer != null) {
763  auto Y_lcl = importMV_->getLocalViewDevice ();
764  importMV_->modify_device ();
765 
766  localMultiply_.apply (X_lcl, Y_lcl, mode, alpha, STS::zero ());
767  if (Y_is_overwritten) {
768  Y_in.putScalar (STS::zero ());
769  }
770  else {
771  Y_in.scale (beta);
772  }
773  Y_in.doExport (*importMV_, *importer, ADD);
774  }
775  // otherwise, multiply into Y
776  else {
777  // can't multiply in-situ; can't multiply into non-strided multivector
778  if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) {
779  // generate a strided copy of Y
780  MV Y (Y_in, Teuchos::Copy);
781  auto Y_lcl = Y.getLocalViewDevice ();
782  Y.modify_device ();
783 
784  localMultiply_.apply (X_lcl, Y_lcl, mode, alpha, beta);
785  Tpetra::deep_copy (Y_in, Y);
786  }
787  else {
788  auto Y_lcl = Y_in.getLocalViewDevice ();
789  Y_in.modify_device ();
790 
791  localMultiply_.apply (X_lcl, Y_lcl, mode, alpha, beta);
792  }
793  }
794  // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
795  if (Y_is_replicated) {
796  Y_in.reduce();
797  }
798  }
799 
801  void
804  Scalar alpha,
805  Scalar beta) const
806  {
808  using Teuchos::NO_TRANS;
809  using Teuchos::RCP;
810  using Teuchos::rcp;
811  using Teuchos::rcp_const_cast;
812  using Teuchos::rcpFromRef;
813  typedef Export<LocalOrdinal,GlobalOrdinal,Node> export_type;
814  typedef Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
815  typedef Teuchos::ScalarTraits<Scalar> STS;
816 
817  if (alpha == STS::zero ()) {
818  if (beta == STS::zero ()) {
819  Y_in.putScalar (STS::zero ());
820  } else if (beta != STS::one ()) {
821  Y_in.scale (beta);
822  }
823  return;
824  }
825 
826  // It's possible that X is a view of Y or vice versa. We don't
827  // allow this (apply() requires that X and Y not alias one
828  // another), but it's helpful to detect and work around this
829  // case. We don't try to to detect the more subtle cases (e.g.,
830  // one is a subview of the other, but their initial pointers
831  // differ). We only need to do this if this matrix's Import is
832  // trivial; otherwise, we don't actually apply the operator from
833  // X into Y.
834 
835  RCP<const import_type> importer = matrix_->getGraph()->getImporter();
836  RCP<const export_type> exporter = matrix_->getGraph()->getExporter();
837 
838  // If beta == 0, then the output MV will be overwritten; none of
839  // its entries should be read. (Sparse BLAS semantics say that we
840  // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
841  // This matters if we need to do an Export operation; see below.
842  const bool Y_is_overwritten = (beta == STS::zero());
843 
844  // We treat the case of a replicated MV output specially.
845  const bool Y_is_replicated =
846  (! Y_in.isDistributed () && matrix_->getComm ()->getSize () != 1);
847 
848  // This is part of the special case for replicated MV output.
849  // We'll let each process do its thing, but do an all-reduce at
850  // the end to sum up the results. Setting beta=0 on all
851  // processes but Proc 0 makes the math work out for the
852  // all-reduce. (This assumes that the replicated data is
853  // correctly replicated, so that the data are the same on all
854  // processes.)
855  if (Y_is_replicated && matrix_->getComm ()->getRank () > 0) {
856  beta = STS::zero();
857  }
858 
859  // Temporary MV for Import operation. After the block of code
860  // below, this will be an (Imported if necessary) column Map MV
861  // ready to give to localMultiply_.apply(...).
862  RCP<const MV> X_colMap;
863  if (importer.is_null ()) {
864  if (! X_in.isConstantStride ()) {
865  // Not all sparse mat-vec kernels can handle an input MV with
866  // nonconstant stride correctly, so we have to copy it in that
867  // case into a constant stride MV. To make a constant stride
868  // copy of X_in, we force creation of the column (== domain)
869  // Map MV (if it hasn't already been created, else fetch the
870  // cached copy). This avoids creating a new MV each time.
871  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
872  Tpetra::deep_copy (*X_colMapNonConst, X_in);
873  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
874  }
875  else {
876  // The domain and column Maps are the same, so do the local
877  // multiply using the domain Map input MV X_in.
878  X_colMap = rcpFromRef (X_in);
879  }
880  }
881  else { // need to Import source (multi)vector
882  ProfilingRegion regionImport ("Tpetra::CrsMatrixMultiplyOp::apply: Import");
883 
884  // We're doing an Import anyway, which will copy the relevant
885  // elements of the domain Map MV X_in into a separate column Map
886  // MV. Thus, we don't have to worry whether X_in is constant
887  // stride.
888  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
889 
890  // Import from the domain Map MV to the column Map MV.
891  X_colMapNonConst->doImport (X_in, *importer, INSERT);
892  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
893  }
894 
895  // Temporary MV for doExport (if needed), or for copying a
896  // nonconstant stride output MV into a constant stride MV. This
897  // is null if we don't need the temporary MV, that is, if the
898  // Export is trivial (null).
899  RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
900 
901  auto X_lcl = X_colMap->getLocalViewDevice ();
902 
903  // If we have a nontrivial Export object, we must perform an
904  // Export. In that case, the local multiply result will go into
905  // the row Map multivector. We don't have to make a
906  // constant-stride version of Y_in in this case, because we had to
907  // make a constant stride Y_rowMap MV and do an Export anyway.
908  if (! exporter.is_null ()) {
909  auto Y_lcl = Y_rowMap->getLocalViewDevice ();
910  Y_rowMap->modify_device ();
911  localMultiply_.apply (X_lcl, Y_lcl, NO_TRANS,
912  alpha, STS::zero ());
913  {
914  ProfilingRegion regionExport ("Tpetra::CrsMatrixMultiplyOp::apply: Export");
915 
916  // If we're overwriting the output MV Y_in completely (beta
917  // == 0), then make sure that it is filled with zeros before
918  // we do the Export. Otherwise, the ADD combine mode will
919  // use data in Y_in, which is supposed to be zero.
920  if (Y_is_overwritten) {
921  Y_in.putScalar (STS::zero());
922  }
923  else {
924  // Scale the output MV by beta, so that the Export sums in
925  // the mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
926  Y_in.scale (beta);
927  }
928  // Do the Export operation.
929  Y_in.doExport (*Y_rowMap, *exporter, ADD);
930  }
931  }
932  else { // Don't do an Export: row Map and range Map are the same.
933  //
934  // If Y_in does not have constant stride, or if the column Map
935  // MV aliases Y_in, then we can't let the kernel write directly
936  // to Y_in. Instead, we have to use the cached row (== range)
937  // Map MV as temporary storage.
938  //
939  // FIXME (mfh 05 Jun 2014, mfh 07 Dec 2018) This test for
940  // aliasing only tests if the user passed in the same
941  // MultiVector for both X and Y. It won't detect whether one
942  // MultiVector views the other. We should also check the
943  // MultiVectors' raw data pointers.
944  if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
945  // Force creating the MV if it hasn't been created already.
946  // This will reuse a previously created cached MV.
947  Y_rowMap = getRowMapMultiVector (Y_in, true);
948 
949  // If beta == 0, we don't need to copy Y_in into Y_rowMap,
950  // since we're overwriting it anyway.
951  if (beta != STS::zero ()) {
952  Tpetra::deep_copy (*Y_rowMap, Y_in);
953  }
954 
955  auto Y_lcl = Y_rowMap->getLocalViewDevice ();
956  Y_rowMap->modify_device ();
957  localMultiply_.apply (X_lcl, Y_lcl, NO_TRANS, alpha, beta);
958  Tpetra::deep_copy (Y_in, *Y_rowMap);
959  }
960  else {
961  auto Y_lcl = Y_in.getLocalViewDevice ();
962  Y_in.modify_device ();
963  localMultiply_.apply (X_lcl, Y_lcl, NO_TRANS, alpha, beta);
964  }
965  }
966 
967  // If the range Map is a locally replicated Map, sum up
968  // contributions from each process. We set beta = 0 on all
969  // processes but Proc 0 initially, so this will handle the scaling
970  // factor beta correctly.
971  if (Y_is_replicated) {
972  ProfilingRegion regionReduce ("Tpetra::CrsMatrixMultiplyOp::apply: Reduce Y");
973  Y_in.reduce ();
974  }
975  }
976 
977  private:
997  Teuchos::RCP<MV>
998  getColumnMapMultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X_domainMap,
999  const bool force = false) const
1000  {
1001  using Teuchos::null;
1002  using Teuchos::RCP;
1003  using Teuchos::rcp;
1004  typedef Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
1005 
1006  const size_t numVecs = X_domainMap.getNumVectors ();
1007  RCP<const import_type> importer = matrix_->getGraph ()->getImporter ();
1008  RCP<const map_type> colMap = matrix_->getColMap ();
1009 
1010  RCP<MV> X_colMap; // null by default
1011 
1012  // If the Import object is trivial (null), then we don't need a
1013  // separate column Map multivector. Just return null in that
1014  // case. The caller is responsible for knowing not to use the
1015  // returned null pointer.
1016  //
1017  // If the Import is nontrivial, then we do need a separate
1018  // column Map multivector for the Import operation. Check in
1019  // that case if we have to (re)create the column Map
1020  // multivector.
1021  if (! importer.is_null () || force) {
1022  if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
1023  X_colMap = rcp (new MV (colMap, numVecs));
1024 
1025  // Cache the newly created multivector for later reuse.
1026  importMV_ = X_colMap;
1027  }
1028  else { // Yay, we can reuse the cached multivector!
1029  X_colMap = importMV_;
1030  // mfh 09 Jan 2013: We don't have to fill with zeros first,
1031  // because the Import uses INSERT combine mode, which overwrites
1032  // existing entries.
1033  //
1034  //X_colMap->putScalar (STS::zero ());
1035  }
1036  }
1037  return X_colMap;
1038  }
1039 
1061  Teuchos::RCP<MV>
1062  getRowMapMultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y_rangeMap,
1063  const bool force = false) const
1064  {
1065  using Teuchos::null;
1066  using Teuchos::RCP;
1067  using Teuchos::rcp;
1068  typedef Export<LocalOrdinal,GlobalOrdinal,Node> export_type;
1069 
1070  const size_t numVecs = Y_rangeMap.getNumVectors ();
1071  RCP<const export_type> exporter = matrix_->getGraph ()->getExporter ();
1072  RCP<const map_type> rowMap = matrix_->getRowMap ();
1073 
1074  RCP<MV> Y_rowMap; // null by default
1075 
1076  // If the Export object is trivial (null), then we don't need a
1077  // separate row Map multivector. Just return null in that case.
1078  // The caller is responsible for knowing not to use the returned
1079  // null pointer.
1080  //
1081  // If the Export is nontrivial, then we do need a separate row
1082  // Map multivector for the Export operation. Check in that case
1083  // if we have to (re)create the row Map multivector.
1084  if (! exporter.is_null () || force) {
1085  if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
1086  Y_rowMap = rcp (new MV (rowMap, numVecs));
1087  exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse
1088  }
1089  else { // Yay, we can reuse the cached multivector!
1090  Y_rowMap = exportMV_;
1091  }
1092  }
1093  return Y_rowMap;
1094  }
1095  };
1096 
1104  template <class OpScalar,
1105  class MatScalar,
1106  class LocalOrdinal,
1107  class GlobalOrdinal,
1108  class Node>
1109  Teuchos::RCP<
1110  CrsMatrixMultiplyOp<OpScalar, MatScalar, LocalOrdinal, GlobalOrdinal, Node> >
1111  createCrsMatrixMultiplyOp (const Teuchos::RCP<
1113  {
1114  typedef CrsMatrixMultiplyOp<OpScalar, MatScalar, LocalOrdinal,
1115  GlobalOrdinal, Node> op_type;
1116  return Teuchos::rcp (new op_type (A));
1117  }
1118 
1119 } // end of namespace Tpetra
1120 
1121 #endif // TPETRA_CRSMATRIXMULTIPLYOP_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void gaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
"Hybrid" Jacobi + (Gauss-Seidel or SOR) on .
Abstract interface for local operators (e.g., matrices and preconditioners).
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this Operator.
void applyTranspose(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X_in, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y_in, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Apply the transpose or conjugate transpose of the matrix to X_in, producing Y_in. ...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > exportMV_
Row Map MultiVector used in apply().
void gaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
Version of gaussSeidel(), with fewer requirements on X.
One or more distributed dense vectors.
static bool debug()
Whether Tpetra is in debug mode.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this Operator.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute Y = beta*Y + alpha*Op(A)*X, where Op(A) is either A, , or .
bool hasTransposeApply() const override
Whether this Operator&#39;s apply() method can apply the transpose or conjugate transpose.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > importMV_
Column Map MultiVector used in apply().
~CrsMatrixMultiplyOp() override=default
Destructor (virtual for memory safety of derived classes).
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
CrsMatrixMultiplyOp(const Teuchos::RCP< const crs_matrix_type > &A)
Constructor.
LocalCrsMatrixOperator< Scalar, MatScalar, typename crs_matrix_type::device_type > localMultiply_
Implementation of local sparse matrix-vector multiply.
Insert new values that don&#39;t currently exist.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Abstract interface for operators (e.g., matrices and preconditioners).
const Teuchos::RCP< const crs_matrix_type > matrix_
The underlying CrsMatrix object.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
dual_view_type::t_dev getLocalViewDevice() const
A local Kokkos::View of device memory.
size_t getNumVectors() const
Number of columns in the multivector.
Sum new values into existing values.
void modify_device()
Mark data as modified on the device side.
typename Node::device_type device_type
The Kokkos device type.
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Forward declaration of Tpetra::CrsMatrixMultiplyOp.
A parallel distribution of indices over processes.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
A class for wrapping a CrsMatrix multiply in a Operator.
bool isDistributed() const
Whether this is a globally distributed object.
Stand-alone utility functions and macros.
void reduce()
Sum values of a locally replicated multivector across all processes.
Teuchos::RCP< CrsMatrixMultiplyOp< OpScalar, MatScalar, LocalOrdinal, GlobalOrdinal, Node > > createCrsMatrixMultiplyOp(const Teuchos::RCP< const CrsMatrix< MatScalar, LocalOrdinal, GlobalOrdinal, Node > > &A)
Non-member function to create a CrsMatrixMultiplyOp.
void applyNonTranspose(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X_in, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y_in, Scalar alpha, Scalar beta) const
Apply the matrix (not its transpose) to X_in, producing Y_in.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.