Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_LocalFilter_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_LOCALFILTER_DEF_HPP
44 #define IFPACK2_LOCALFILTER_DEF_HPP
45 
46 #include <Ifpack2_LocalFilter_decl.hpp>
47 #include <Tpetra_Map.hpp>
48 #include <Tpetra_MultiVector.hpp>
49 #include <Tpetra_Vector.hpp>
50 
51 #ifdef HAVE_MPI
52 # include "Teuchos_DefaultMpiComm.hpp"
53 #else
54 # include "Teuchos_DefaultSerialComm.hpp"
55 #endif
56 
57 namespace Ifpack2 {
58 
59 
60 template<class MatrixType>
61 bool
62 LocalFilter<MatrixType>::
63 mapPairsAreFitted (const row_matrix_type& A)
64 {
65  const map_type& rangeMap = * (A.getRangeMap ());
66  const map_type& rowMap = * (A.getRowMap ());
67  const bool rangeAndRowFitted = mapPairIsFitted (rowMap, rangeMap);
68 
69  const map_type& domainMap = * (A.getDomainMap ());
70  const map_type& columnMap = * (A.getColMap ());
71  const bool domainAndColumnFitted = mapPairIsFitted (columnMap, domainMap);
72 
73  //Note BMK 6-22: Map::isLocallyFitted is a local-only operation, not a collective.
74  //This means that it can return different values on different ranks. This can cause MPI to hang,
75  //even though it's supposed to terminate globally when any single rank does.
76  //
77  //This function doesn't need to be fast since it's debug-only code.
78  int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
79  int globalSuccess;
80 
81  Teuchos::reduceAll<int, int> (*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
82 
83  return globalSuccess == 1;
84 }
85 
86 
87 template<class MatrixType>
88 bool
89 LocalFilter<MatrixType>::
90 mapPairIsFitted (const map_type& map1, const map_type& map2)
91 {
92  return map1.isLocallyFitted (map2);
93 }
94 
95 
96 template<class MatrixType>
98 LocalFilter (const Teuchos::RCP<const row_matrix_type>& A) :
99  A_ (A),
100  NumNonzeros_ (0),
101  MaxNumEntries_ (0),
102  MaxNumEntriesA_ (0)
103 {
104  using Teuchos::RCP;
105  using Teuchos::rcp;
106 
107 #ifdef HAVE_IFPACK2_DEBUG
108  if(! mapPairsAreFitted (*A))
109  {
110  std::cout << "WARNING: Ifpack2::LocalFilter:\n" <<
111  "A's Map pairs are not fitted to each other on Process "
112  << A_->getRowMap ()->getComm ()->getRank () << " of the input matrix's "
113  "communicator.\n"
114  "This means that LocalFilter may not work with A. "
115  "Please see discussion of Bug 5992.";
116  }
117 #endif // HAVE_IFPACK2_DEBUG
118 
119  // Build the local communicator (containing this process only).
120  RCP<const Teuchos::Comm<int> > localComm;
121 #ifdef HAVE_MPI
122  localComm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF));
123 #else
124  localComm = rcp (new Teuchos::SerialComm<int> ());
125 #endif // HAVE_MPI
126 
127 
128  // // FIXME (mfh 21 Nov 2013) Currently, the implementation implicitly
129  // // assumes that the range Map is fitted to the row Map. Otherwise,
130  // // it probably won't work at all.
131  // TEUCHOS_TEST_FOR_EXCEPTION(
132  // mapPairIsFitted (* (A_->getRangeMap ()), * (A_->getRowMap ())),
133  // std::logic_error, "Ifpack2::LocalFilter: Range Map of the input matrix "
134  // "is not fitted to its row Map. LocalFilter does not currently work in "
135  // "this case. See Bug 5992.");
136 
137  // Build the local row, domain, and range Maps. They both use the
138  // local communicator built above. The global indices of each are
139  // different than those of the corresponding original Map; they
140  // actually are the same as the local indices of the original Map.
141  //
142  // It's even OK if signedness of local_ordinal_type and
143  // global_ordinal_type are different. (That would be a BAD IDEA but
144  // some users seem to enjoy making trouble for themselves and us.)
145  //
146  // Both the local row and local range Maps must have the same number
147  // of entries, namely, that of the local number of entries of A's
148  // range Map.
149 
150  const size_t numRows = A_->getRangeMap()->getLocalNumElements ();
151 
152  // using std::cerr;
153  // using std::endl;
154  // cerr << "A_ has " << A_->getLocalNumRows () << " rows." << endl
155  // << "Range Map has " << A_->getRangeMap ()->getLocalNumElements () << " entries." << endl
156  // << "Row Map has " << A_->getRowMap ()->getLocalNumElements () << " entries." << endl;
157 
158  const global_ordinal_type indexBase = static_cast<global_ordinal_type> (0);
159 
160  localRowMap_ =
161  rcp (new map_type (numRows, indexBase, localComm,
162  Tpetra::GloballyDistributed));
163  // If the original matrix's range Map is not fitted to its row Map,
164  // we'll have to do an Export when applying the matrix.
165  localRangeMap_ = localRowMap_;
166 
167  // If the original matrix's domain Map is not fitted to its column
168  // Map, we'll have to do an Import when applying the matrix.
169  if (A_->getRangeMap ().getRawPtr () == A_->getDomainMap ().getRawPtr ()) {
170  // The input matrix's domain and range Maps are identical, so the
171  // locally filtered matrix's domain and range Maps can be also.
172  localDomainMap_ = localRangeMap_;
173  }
174  else {
175  const size_t numCols = A_->getDomainMap()->getLocalNumElements ();
176  localDomainMap_ =
177  rcp (new map_type (numCols, indexBase, localComm,
178  Tpetra::GloballyDistributed));
179  }
180 
181  // NodeNumEntries_ will contain the actual number of nonzeros for
182  // each localized row (that is, without external nodes, and always
183  // with the diagonal entry)
184  NumEntries_.resize (numRows);
185 
186  // tentative value for MaxNumEntries. This is the number of
187  // nonzeros in the local matrix
188  MaxNumEntries_ = A_->getLocalMaxNumRowEntries ();
189  MaxNumEntriesA_ = A_->getLocalMaxNumRowEntries ();
190 
191  // Allocate temporary arrays for getLocalRowCopy().
192  Kokkos::resize(localIndices_,MaxNumEntries_);
193  Kokkos::resize(localIndicesForGlobalCopy_,MaxNumEntries_);
194  Kokkos::resize(Values_,MaxNumEntries_);
195 
196  // now compute:
197  // - the number of nonzero per row
198  // - the total number of nonzeros
199  // - the diagonal entries
200 
201  // compute nonzeros (total and per-row), and store the
202  // diagonal entries (already modified)
203  size_t ActualMaxNumEntries = 0;
204 
205  for (size_t i = 0; i < numRows; ++i) {
206  NumEntries_[i] = 0;
207  size_t Nnz, NewNnz = 0;
208  A_->getLocalRowCopy (i, localIndices_, Values_, Nnz);
209  for (size_t j = 0; j < Nnz; ++j) {
210  // FIXME (mfh 03 Apr 2013) This assumes the following:
211  //
212  // 1. Row Map, range Map, and domain Map are all the same.
213  //
214  // 2. The column Map's list of GIDs on this process is the
215  // domain Map's list of GIDs, followed by remote GIDs. Thus,
216  // for any GID in the domain Map on this process, its LID in
217  // the domain Map (and therefore in the row Map, by (1)) is
218  // the same as its LID in the column Map. (Hence the
219  // less-than test, which if true, means that localIndices_[j]
220  // belongs to the row Map.)
221  if (static_cast<size_t> (localIndices_[j]) < numRows) {
222  ++NewNnz;
223  }
224  }
225 
226  if (NewNnz > ActualMaxNumEntries) {
227  ActualMaxNumEntries = NewNnz;
228  }
229 
230  NumNonzeros_ += NewNnz;
231  NumEntries_[i] = NewNnz;
232  }
233 
234  MaxNumEntries_ = ActualMaxNumEntries;
235 }
236 
237 
238 template<class MatrixType>
240 {}
241 
242 
243 template<class MatrixType>
244 Teuchos::RCP<const Teuchos::Comm<int> >
246 {
247  return localRowMap_->getComm ();
248 }
249 
250 
251 
252 
253 template<class MatrixType>
254 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
255  typename MatrixType::global_ordinal_type,
256  typename MatrixType::node_type> >
258 {
259  return localRowMap_;
260 }
261 
262 
263 template<class MatrixType>
264 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
265  typename MatrixType::global_ordinal_type,
266  typename MatrixType::node_type> >
268 {
269  return localRowMap_; // FIXME (mfh 20 Nov 2013)
270 }
271 
272 
273 template<class MatrixType>
274 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
275  typename MatrixType::global_ordinal_type,
276  typename MatrixType::node_type> >
278 {
279  return localDomainMap_;
280 }
281 
282 
283 template<class MatrixType>
284 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
285  typename MatrixType::global_ordinal_type,
286  typename MatrixType::node_type> >
288 {
289  return localRangeMap_;
290 }
291 
292 
293 template<class MatrixType>
294 Teuchos::RCP<const Tpetra::RowGraph<typename MatrixType::local_ordinal_type,
295  typename MatrixType::global_ordinal_type,
296  typename MatrixType::node_type> >
298 {
299  // FIXME (mfh 20 Nov 2013) This is not what the documentation says
300  // this method should do! It should return the graph of the locally
301  // filtered matrix, not the original matrix's graph.
302  return A_->getGraph ();
303 }
304 
305 
306 template<class MatrixType>
308 {
309  return static_cast<global_size_t> (localRangeMap_->getLocalNumElements ());
310 }
311 
312 
313 template<class MatrixType>
315 {
316  return static_cast<global_size_t> (localDomainMap_->getLocalNumElements ());
317 }
318 
319 
320 template<class MatrixType>
322 {
323  return static_cast<size_t> (localRangeMap_->getLocalNumElements ());
324 }
325 
326 
327 template<class MatrixType>
329 {
330  return static_cast<size_t> (localDomainMap_->getLocalNumElements ());
331 }
332 
333 
334 template<class MatrixType>
335 typename MatrixType::global_ordinal_type
337 {
338  return A_->getIndexBase ();
339 }
340 
341 
342 template<class MatrixType>
344 {
345  return NumNonzeros_;
346 }
347 
348 
349 template<class MatrixType>
351 {
352  return NumNonzeros_;
353 }
354 
355 
356 template<class MatrixType>
357 size_t
359 getNumEntriesInGlobalRow (global_ordinal_type globalRow) const
360 {
361  const local_ordinal_type localRow = getRowMap ()->getLocalElement (globalRow);
362  if (localRow == Teuchos::OrdinalTraits<local_ordinal_type>::invalid ()) {
363  // NOTE (mfh 26 Mar 2014) We return zero if globalRow is not in
364  // the row Map on this process, since "get the number of entries
365  // in the global row" refers only to what the calling process owns
366  // in that row. In this case, it owns no entries in that row,
367  // since it doesn't own the row.
368  return 0;
369  } else {
370  return NumEntries_[localRow];
371  }
372 }
373 
374 
375 template<class MatrixType>
376 size_t
378 getNumEntriesInLocalRow (local_ordinal_type localRow) const
379 {
380  // FIXME (mfh 07 Jul 2014) Shouldn't localRow be a local row index
381  // in the matrix's row Map, not in the LocalFilter's row Map? The
382  // latter is different; it even has different global indices!
383  // (Maybe _that_'s the bug.)
384 
385  if (getRowMap ()->isNodeLocalElement (localRow)) {
386  return NumEntries_[localRow];
387  } else {
388  // NOTE (mfh 26 Mar 2014) We return zero if localRow is not in the
389  // row Map on this process, since "get the number of entries in
390  // the local row" refers only to what the calling process owns in
391  // that row. In this case, it owns no entries in that row, since
392  // it doesn't own the row.
393  return 0;
394  }
395 }
396 
397 
398 template<class MatrixType>
400 {
401  return MaxNumEntries_;
402 }
403 
404 
405 template<class MatrixType>
407 {
408  return MaxNumEntries_;
409 }
410 
411 
412 template<class MatrixType>
414 {
415  return true;
416 }
417 
418 
419 template<class MatrixType>
421 {
422  return A_->isLocallyIndexed ();
423 }
424 
425 
426 template<class MatrixType>
428 {
429  return A_->isGloballyIndexed();
430 }
431 
432 
433 template<class MatrixType>
435 {
436  return A_->isFillComplete ();
437 }
438 
439 
440 template<class MatrixType>
441 void
443  getGlobalRowCopy (global_ordinal_type globalRow,
444  nonconst_global_inds_host_view_type &globalIndices,
445  nonconst_values_host_view_type &values,
446  size_t& numEntries) const
447 {
448  typedef local_ordinal_type LO;
449  typedef typename Teuchos::Array<LO>::size_type size_type;
450 
451  const LO localRow = this->getRowMap ()->getLocalElement (globalRow);
452  if (localRow == Teuchos::OrdinalTraits<LO>::invalid ()) {
453  // NOTE (mfh 26 Mar 2014) We return no entries if globalRow is not
454  // in the row Map on this process, since "get a copy of the
455  // entries in the global row" refers only to what the calling
456  // process owns in that row. In this case, it owns no entries in
457  // that row, since it doesn't own the row.
458  numEntries = 0;
459  }
460  else {
461  // First get a copy of the current row using local indices. Then,
462  // convert to global indices using the input matrix's column Map.
463  //
464  numEntries = this->getNumEntriesInLocalRow (localRow);
465  // FIXME (mfh 26 Mar 2014) If local_ordinal_type ==
466  // global_ordinal_type, we could just alias the input array
467  // instead of allocating a temporary array.
468 
469  // In this case, getLocalRowCopy *does* use the localIndices_, so we use a second temp array
470  this->getLocalRowCopy (localRow, localIndicesForGlobalCopy_, values, numEntries);
471 
472  const map_type& colMap = * (this->getColMap ());
473 
474  // Don't fill the output array beyond its size.
475  const size_type numEnt =
476  std::min (static_cast<size_type> (numEntries),
477  std::min ((size_type)globalIndices.size (), (size_type)values.size ()));
478  for (size_type k = 0; k < numEnt; ++k) {
479  globalIndices[k] = colMap.getGlobalElement (localIndicesForGlobalCopy_[k]);
480  }
481  }
482 }
483 
484 
485 template<class MatrixType>
486 void
488 getLocalRowCopy (local_ordinal_type LocalRow,
489  nonconst_local_inds_host_view_type &Indices,
490  nonconst_values_host_view_type &Values,
491  size_t& NumEntries) const
492 {
493  typedef local_ordinal_type LO;
494  typedef global_ordinal_type GO;
495 
496  if (! A_->getRowMap ()->isNodeLocalElement (LocalRow)) {
497  // The calling process owns zero entries in the row.
498  NumEntries = 0;
499  return;
500  }
501 
502  if (A_->getRowMap()->getComm()->getSize() == 1) {
503  A_->getLocalRowCopy (LocalRow, Indices, Values, NumEntries);
504  return;
505  }
506 
507 
508  const size_t numEntInLclRow = NumEntries_[LocalRow];
509  if (static_cast<size_t> (Indices.size ()) < numEntInLclRow ||
510  static_cast<size_t> (Values.size ()) < numEntInLclRow) {
511  // FIXME (mfh 07 Jul 2014) Return an error code instead of
512  // throwing. We should really attempt to fill as much space as
513  // we're given, in this case.
514  TEUCHOS_TEST_FOR_EXCEPTION(
515  true, std::runtime_error,
516  "Ifpack2::LocalFilter::getLocalRowCopy: Invalid output array length. "
517  "The output arrays must each have length at least " << numEntInLclRow
518  << " for local row " << LocalRow << " on Process "
519  << localRowMap_->getComm ()->getRank () << ".");
520  }
521  else if (numEntInLclRow == static_cast<size_t> (0)) {
522  // getNumEntriesInLocalRow() returns zero if LocalRow is not owned
523  // by the calling process. In that case, the calling process owns
524  // zero entries in the row.
525  NumEntries = 0;
526  return;
527  }
528 
529  // Always extract using the temporary arrays Values_ and
530  // localIndices_. This is because I may need more space than that
531  // given by the user. The users expects only the local (in the
532  // domain Map) column indices, but I have to extract both local and
533  // remote (not in the domain Map) column indices.
534  //
535  // FIXME (mfh 07 Jul 2014) Use of temporary local storage is not
536  // conducive to thread parallelism. A better way would be to change
537  // the interface so that it only extracts values for the "local"
538  // column indices. CrsMatrix could take a set of column indices,
539  // and return their corresponding values.
540  size_t numEntInMat = 0;
541  A_->getLocalRowCopy (LocalRow, localIndices_, Values_ , numEntInMat);
542 
543  // Fill the user's arrays with the "local" indices and values in
544  // that row. Note that the matrix might have a different column Map
545  // than the local filter.
546  const map_type& matrixDomMap = * (A_->getDomainMap ());
547  const map_type& matrixColMap = * (A_->getColMap ());
548 
549  const size_t capacity = static_cast<size_t> (std::min (Indices.size (),
550  Values.size ()));
551  NumEntries = 0;
552  const size_t numRows = localRowMap_->getLocalNumElements (); // superfluous
553  const bool buggy = true; // mfh 07 Jul 2014: See FIXME below.
554  for (size_t j = 0; j < numEntInMat; ++j) {
555  // The LocalFilter only includes entries in the domain Map on
556  // the calling process. We figure out whether an entry is in
557  // the domain Map by converting the (matrix column Map) index to
558  // a global index, and then asking whether that global index is
559  // in the domain Map.
560  const LO matrixLclCol = localIndices_[j];
561  const GO gblCol = matrixColMap.getGlobalElement (matrixLclCol);
562 
563  // FIXME (mfh 07 Jul 2014) This is the likely center of Bug 5992
564  // and perhaps other bugs, like Bug 6117. If 'buggy' is true,
565  // Ifpack2 tests pass; if 'buggy' is false, the tests don't pass.
566  // This suggests that Ifpack2 classes could be using LocalFilter
567  // incorrectly, perhaps by giving it an incorrect domain Map.
568  if (buggy) {
569  // only local indices
570  if ((size_t) localIndices_[j] < numRows) {
571  Indices[NumEntries] = localIndices_[j];
572  Values[NumEntries] = Values_[j];
573  NumEntries++;
574  }
575  } else {
576  if (matrixDomMap.isNodeGlobalElement (gblCol)) {
577  // Don't fill more space than the user gave us. It's an error
578  // for them not to give us enough space, but we still shouldn't
579  // overwrite memory that doesn't belong to us.
580  if (NumEntries < capacity) {
581  Indices[NumEntries] = matrixLclCol;
582  Values[NumEntries] = Values_[j];
583  }
584  NumEntries++;
585  }
586  }
587  }
588 }
589 
590 
591 template<class MatrixType>
592 void
594 getGlobalRowView (global_ordinal_type /*GlobalRow*/,
595  global_inds_host_view_type &/*indices*/,
596  values_host_view_type &/*values*/) const
597 {
598  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
599  "Ifpack2::LocalFilter does not implement getGlobalRowView.");
600 }
601 
602 
603 template<class MatrixType>
604 void
606 getLocalRowView (local_ordinal_type /*LocalRow*/,
607  local_inds_host_view_type &/*indices*/,
608  values_host_view_type &/*values*/) const
609 {
610  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
611  "Ifpack2::LocalFilter does not implement getLocalRowView.");
612 }
613 
614 
615 template<class MatrixType>
616 void
618 getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& diag) const
619 {
620  using Teuchos::RCP;
621  typedef Tpetra::Vector<scalar_type, local_ordinal_type,
622  global_ordinal_type, node_type> vector_type;
623  // This is always correct, and doesn't require a collective check
624  // for sameness of Maps.
625  RCP<vector_type> diagView = diag.offsetViewNonConst (A_->getRowMap (), 0);
626  A_->getLocalDiagCopy (*diagView);
627 }
628 
629 
630 template<class MatrixType>
631 void
633 leftScale (const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
634 {
635  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
636  "Ifpack2::LocalFilter does not implement leftScale.");
637 }
638 
639 
640 template<class MatrixType>
641 void
643 rightScale (const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
644 {
645  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
646  "Ifpack2::LocalFilter does not implement rightScale.");
647 }
648 
649 
650 template<class MatrixType>
651 void
653 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
654  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
655  Teuchos::ETransp mode,
656  scalar_type alpha,
657  scalar_type beta) const
658 {
659  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
660  TEUCHOS_TEST_FOR_EXCEPTION(
661  X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
662  "Ifpack2::LocalFilter::apply: X and Y must have the same number of columns. "
663  "X has " << X.getNumVectors () << " columns, but Y has "
664  << Y.getNumVectors () << " columns.");
665 
666 #ifdef HAVE_IFPACK2_DEBUG
667  {
668  typedef Teuchos::ScalarTraits<magnitude_type> STM;
669  Teuchos::Array<magnitude_type> norms (X.getNumVectors ());
670  X.norm1 (norms ());
671  bool good = true;
672  for (size_t j = 0; j < X.getNumVectors (); ++j) {
673  if (STM::isnaninf (norms[j])) {
674  good = false;
675  break;
676  }
677  }
678  TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::LocalFilter::apply: The 1-norm of the input X is NaN or Inf.");
679  }
680 #endif // HAVE_IFPACK2_DEBUG
681 
682  if (&X == &Y) {
683  // FIXME (mfh 23 Apr 2014) We have to do more work to figure out
684  // if X and Y alias one another. For example, we should check
685  // whether one is a noncontiguous view of the other.
686  //
687  // X and Y alias one another, so we have to copy X.
688  MV X_copy (X, Teuchos::Copy);
689  applyNonAliased (X_copy, Y, mode, alpha, beta);
690  } else {
691  applyNonAliased (X, Y, mode, alpha, beta);
692  }
693 
694 #ifdef HAVE_IFPACK2_DEBUG
695  {
696  typedef Teuchos::ScalarTraits<magnitude_type> STM;
697  Teuchos::Array<magnitude_type> norms (Y.getNumVectors ());
698  Y.norm1 (norms ());
699  bool good = true;
700  for (size_t j = 0; j < Y.getNumVectors (); ++j) {
701  if (STM::isnaninf (norms[j])) {
702  good = false;
703  break;
704  }
705  }
706  TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::LocalFilter::apply: The 1-norm of the output Y is NaN or Inf.");
707  }
708 #endif // HAVE_IFPACK2_DEBUG
709 }
710 
711 template<class MatrixType>
712 void
714 applyNonAliased (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
715  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
716  Teuchos::ETransp mode,
717  scalar_type alpha,
718  scalar_type beta) const
719 {
720  using Teuchos::ArrayView;
721  using Teuchos::ArrayRCP;
722  typedef Teuchos::ScalarTraits<scalar_type> STS;
723 
724  const scalar_type zero = STS::zero ();
725  const scalar_type one = STS::one ();
726 
727  if (beta == zero) {
728  Y.putScalar (zero);
729  }
730  else if (beta != one) {
731  Y.scale (beta);
732  }
733 
734  const size_t NumVectors = Y.getNumVectors ();
735  const size_t numRows = localRowMap_->getLocalNumElements ();
736 
737  // FIXME (mfh 14 Apr 2014) At some point, we would like to
738  // parallelize this using Kokkos. This would require a
739  // Kokkos-friendly version of getLocalRowCopy, perhaps with
740  // thread-local storage.
741 
742  const bool constantStride = X.isConstantStride () && Y.isConstantStride ();
743  if (constantStride) {
744  // Since both X and Y have constant stride, we can work with "1-D"
745  // views of their data.
746  const size_t x_stride = X.getStride();
747  const size_t y_stride = Y.getStride();
748  ArrayRCP<scalar_type> y_rcp = Y.get1dViewNonConst();
749  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
750  ArrayView<scalar_type> y_ptr = y_rcp();
751  ArrayView<const scalar_type> x_ptr = x_rcp();
752  for (size_t i = 0; i < numRows; ++i) {
753  size_t Nnz;
754  // Use this class's getrow to make the below code simpler
755  getLocalRowCopy (i, localIndices_ , Values_ , Nnz);
756  scalar_type* Values = reinterpret_cast<scalar_type*>(Values_.data());
757  if (mode == Teuchos::NO_TRANS) {
758  for (size_t j = 0; j < Nnz; ++j) {
759  const local_ordinal_type col = localIndices_[j];
760  for (size_t k = 0; k < NumVectors; ++k) {
761  y_ptr[i + y_stride*k] +=
762  alpha * Values[j] * x_ptr[col + x_stride*k];
763  }
764  }
765  }
766  else if (mode == Teuchos::TRANS) {
767  for (size_t j = 0; j < Nnz; ++j) {
768  const local_ordinal_type col = localIndices_[j];
769  for (size_t k = 0; k < NumVectors; ++k) {
770  y_ptr[col + y_stride*k] +=
771  alpha * Values[j] * x_ptr[i + x_stride*k];
772  }
773  }
774  }
775  else { //mode==Teuchos::CONJ_TRANS
776  for (size_t j = 0; j < Nnz; ++j) {
777  const local_ordinal_type col = localIndices_[j];
778  for (size_t k = 0; k < NumVectors; ++k) {
779  y_ptr[col + y_stride*k] +=
780  alpha * STS::conjugate (Values[j]) * x_ptr[i + x_stride*k];
781  }
782  }
783  }
784  }
785  }
786  else {
787  // At least one of X or Y does not have constant stride.
788  // This means we must work with "2-D" views of their data.
789  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView();
790  ArrayRCP<ArrayRCP<scalar_type> > y_ptr = Y.get2dViewNonConst();
791 
792  for (size_t i = 0; i < numRows; ++i) {
793  size_t Nnz;
794  // Use this class's getrow to make the below code simpler
795  getLocalRowCopy (i, localIndices_ , Values_ , Nnz);
796  scalar_type* Values = reinterpret_cast<scalar_type*>(Values_.data());
797  if (mode == Teuchos::NO_TRANS) {
798  for (size_t k = 0; k < NumVectors; ++k) {
799  ArrayView<const scalar_type> x_local = (x_ptr())[k]();
800  ArrayView<scalar_type> y_local = (y_ptr())[k]();
801  for (size_t j = 0; j < Nnz; ++j) {
802  y_local[i] += alpha * Values[j] * x_local[localIndices_[j]];
803  }
804  }
805  }
806  else if (mode == Teuchos::TRANS) {
807  for (size_t k = 0; k < NumVectors; ++k) {
808  ArrayView<const scalar_type> x_local = (x_ptr())[k]();
809  ArrayView<scalar_type> y_local = (y_ptr())[k]();
810  for (size_t j = 0; j < Nnz; ++j) {
811  y_local[localIndices_[j]] += alpha * Values[j] * x_local[i];
812  }
813  }
814  }
815  else { //mode==Teuchos::CONJ_TRANS
816  for (size_t k = 0; k < NumVectors; ++k) {
817  ArrayView<const scalar_type> x_local = (x_ptr())[k]();
818  ArrayView<scalar_type> y_local = (y_ptr())[k]();
819  for (size_t j = 0; j < Nnz; ++j) {
820  y_local[localIndices_[j]] +=
821  alpha * STS::conjugate (Values[j]) * x_local[i];
822  }
823  }
824  }
825  }
826  }
827 }
828 
829 template<class MatrixType>
831 {
832  return true;
833 }
834 
835 
836 template<class MatrixType>
838 {
839  return false;
840 }
841 
842 
843 template<class MatrixType>
844 typename
845 LocalFilter<MatrixType>::mag_type
847 {
848  typedef Kokkos::Details::ArithTraits<scalar_type> STS;
849  typedef Kokkos::Details::ArithTraits<mag_type> STM;
850  typedef typename Teuchos::Array<scalar_type>::size_type size_type;
851 
852  const size_type maxNumRowEnt = getLocalMaxNumRowEntries ();
853  nonconst_local_inds_host_view_type ind ("ind",maxNumRowEnt);
854  nonconst_values_host_view_type val ("val",maxNumRowEnt);
855  const size_t numRows = static_cast<size_t> (localRowMap_->getLocalNumElements ());
856 
857  // FIXME (mfh 03 Apr 2013) Scale during sum to avoid overflow.
858  mag_type sumSquared = STM::zero ();
859  for (size_t i = 0; i < numRows; ++i) {
860  size_t numEntries = 0;
861  this->getLocalRowCopy (i, ind, val, numEntries);
862  for (size_type k = 0; k < static_cast<size_type> (numEntries); ++k) {
863  const mag_type v_k_abs = STS::magnitude (val[k]);
864  sumSquared += v_k_abs * v_k_abs;
865  }
866  }
867  return STM::squareroot (sumSquared); // Different for each process; that's OK.
868 }
869 
870 template<class MatrixType>
871 std::string
873 {
874  using Teuchos::TypeNameTraits;
875  std::ostringstream os;
876 
877  os << "Ifpack2::LocalFilter: {";
878  os << "MatrixType: " << TypeNameTraits<MatrixType>::name ();
879  if (this->getObjectLabel () != "") {
880  os << ", Label: \"" << this->getObjectLabel () << "\"";
881  }
882  os << ", Number of rows: " << getGlobalNumRows ()
883  << ", Number of columns: " << getGlobalNumCols ()
884  << "}";
885  return os.str ();
886 }
887 
888 
889 template<class MatrixType>
890 void
892 describe (Teuchos::FancyOStream &out,
893  const Teuchos::EVerbosityLevel verbLevel) const
894 {
895  using Teuchos::OSTab;
896  using Teuchos::TypeNameTraits;
897  using std::endl;
898 
899  const Teuchos::EVerbosityLevel vl =
900  (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
901 
902  if (vl > Teuchos::VERB_NONE) {
903  // describe() starts with a tab, by convention.
904  OSTab tab0 (out);
905 
906  out << "Ifpack2::LocalFilter:" << endl;
907  OSTab tab1 (out);
908  out << "MatrixType: " << TypeNameTraits<MatrixType>::name () << endl;
909  if (this->getObjectLabel () != "") {
910  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
911  }
912  out << "Number of rows: " << getGlobalNumRows () << endl
913  << "Number of columns: " << getGlobalNumCols () << endl
914  << "Number of nonzeros: " << NumNonzeros_ << endl;
915 
916  if (vl > Teuchos::VERB_LOW) {
917  out << "Row Map:" << endl;
918  localRowMap_->describe (out, vl);
919  out << "Domain Map:" << endl;
920  localDomainMap_->describe (out, vl);
921  out << "Range Map:" << endl;
922  localRangeMap_->describe (out, vl);
923  }
924  }
925 }
926 
927 template<class MatrixType>
928 Teuchos::RCP<const Tpetra::RowMatrix<typename MatrixType::scalar_type,
929  typename MatrixType::local_ordinal_type,
930  typename MatrixType::global_ordinal_type,
931  typename MatrixType::node_type> >
933 {
934  return A_;
935 }
936 
937 
938 } // namespace Ifpack2
939 
940 #define IFPACK2_LOCALFILTER_INSTANT(S,LO,GO,N) \
941  template class Ifpack2::LocalFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
942 
943 #endif
virtual void getGlobalRowCopy(global_ordinal_type GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const
Get the entries in the given row, using global indices.
Definition: Ifpack2_LocalFilter_def.hpp:443
virtual bool isFillComplete() const
Returns true if fillComplete() has been called.
Definition: Ifpack2_LocalFilter_def.hpp:434
virtual void rightScale(const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &x)
Scales the RowMatrix on the right with the Vector x.
Definition: Ifpack2_LocalFilter_def.hpp:643
virtual mag_type getFrobeniusNorm() const
The Frobenius norm of the (locally filtered) matrix.
Definition: Ifpack2_LocalFilter_def.hpp:846
virtual Teuchos::RCP< const row_matrix_type > getUnderlyingMatrix() const
Return matrix that LocalFilter was built on.
Definition: Ifpack2_LocalFilter_def.hpp:932
virtual global_size_t getGlobalNumCols() const
The number of global columns in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:314
virtual bool isLocallyIndexed() const
Whether the underlying sparse matrix is locally (opposite of globally) indexed.
Definition: Ifpack2_LocalFilter_def.hpp:420
virtual size_t getLocalNumRows() const
The number of rows owned on the calling process.
Definition: Ifpack2_LocalFilter_def.hpp:321
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object to the given output stream.
Definition: Ifpack2_LocalFilter_def.hpp:892
virtual global_size_t getGlobalNumRows() const
The number of global rows in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:307
virtual Teuchos::RCP< const map_type > getRangeMap() const
Returns the Map that describes the range distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:287
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Type of the Tpetra::Map specialization that this class uses.
Definition: Ifpack2_LocalFilter_decl.hpp:216
virtual bool supportsRowViews() const
Returns true if RowViews are supported.
Definition: Ifpack2_LocalFilter_def.hpp:837
virtual Teuchos::RCP< const map_type > getColMap() const
Returns the Map that describes the column distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:267
virtual void leftScale(const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &x)
Scales the RowMatrix on the left with the Vector x.
Definition: Ifpack2_LocalFilter_def.hpp:633
virtual Teuchos::RCP< const Tpetra::RowGraph< local_ordinal_type, global_ordinal_type, node_type > > getGraph() const
The (locally filtered) matrix&#39;s graph.
Definition: Ifpack2_LocalFilter_def.hpp:297
virtual void getLocalRowCopy(local_ordinal_type LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const
Get the entries in the given row, using local indices.
Definition: Ifpack2_LocalFilter_def.hpp:488
virtual void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Compute Y = beta*Y + alpha*A_local*X.
Definition: Ifpack2_LocalFilter_def.hpp:653
virtual global_ordinal_type getIndexBase() const
Returns the index base for global indices for this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:336
virtual bool hasColMap() const
Whether this matrix has a well-defined column Map.
Definition: Ifpack2_LocalFilter_def.hpp:413
virtual size_t getLocalNumCols() const
The number of columns in the (locally filtered) matrix.
Definition: Ifpack2_LocalFilter_def.hpp:328
virtual size_t getGlobalMaxNumRowEntries() const
The maximum number of entries across all rows/columns on all processes.
Definition: Ifpack2_LocalFilter_def.hpp:399
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns the communicator.
Definition: Ifpack2_LocalFilter_def.hpp:245
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalFilter_def.hpp:872
LocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_LocalFilter_def.hpp:98
virtual ~LocalFilter()
Destructor.
Definition: Ifpack2_LocalFilter_def.hpp:239
virtual size_t getLocalNumEntries() const
Returns the local number of entries in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:350
virtual global_size_t getGlobalNumEntries() const
Returns the global number of entries in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:343
virtual Teuchos::RCP< const map_type > getRowMap() const
Returns the Map that describes the row distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:257
virtual size_t getLocalMaxNumRowEntries() const
The maximum number of entries across all rows/columns on this process.
Definition: Ifpack2_LocalFilter_def.hpp:406
virtual bool isGloballyIndexed() const
Whether the underlying sparse matrix is globally (opposite of locally) indexed.
Definition: Ifpack2_LocalFilter_def.hpp:427
virtual void getLocalDiagCopy(Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &diag) const
Get the diagonal entries of the (locally filtered) matrix.
Definition: Ifpack2_LocalFilter_def.hpp:618
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160
virtual Teuchos::RCP< const map_type > getDomainMap() const
Returns the Map that describes the domain distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:277
virtual size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const
The current number of entries on this node in the specified global row.
Definition: Ifpack2_LocalFilter_def.hpp:359
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:74
virtual void getLocalRowView(local_ordinal_type LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const
Extract a const, non-persisting view of local indices in a specified row of the matrix.
Definition: Ifpack2_LocalFilter_def.hpp:606
virtual bool hasTransposeApply() const
Whether this operator supports applying the transpose or conjugate transpose.
Definition: Ifpack2_LocalFilter_def.hpp:830
virtual void getGlobalRowView(global_ordinal_type GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const
Extract a const, non-persisting view of global indices in a specified row of the matrix.
Definition: Ifpack2_LocalFilter_def.hpp:594
virtual size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const
The current number of entries on this node in the specified local row.
Definition: Ifpack2_LocalFilter_def.hpp:378