Tpetra parallel linear algebra  Version of the Day
Tpetra_BlockMultiVector_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
41 #define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
42 
44 #include "Tpetra_BlockView.hpp"
45 #include "Teuchos_OrdinalTraits.hpp"
46 
47 namespace { // anonymous
48 
60  template<class MultiVectorType>
61  struct RawHostPtrFromMultiVector {
62  typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
63 
64  static impl_scalar_type* getRawPtr (MultiVectorType& X) {
65  // NOTE (mfh 09 Jun 2016) This does NOT sync to host, or mark
66  // host as modified. This is on purpose, because we don't want
67  // the BlockMultiVector sync'd to host unnecessarily.
68  // Otherwise, all the MultiVector and BlockMultiVector kernels
69  // would run on host instead of device. See Github Issue #428.
70  auto X_view_host = X.template getLocalView<typename MultiVectorType::dual_view_type::t_host::device_type> ();
71  impl_scalar_type* X_raw = X_view_host.data ();
72  return X_raw;
73  }
74  };
75 
88  template<class S, class LO, class GO, class N>
90  getRawHostPtrFromMultiVector (Tpetra::MultiVector<S, LO, GO, N>& X) {
92  return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
93  }
94 
95 } // namespace (anonymous)
96 
97 namespace Tpetra {
98 
99 template<class Scalar, class LO, class GO, class Node>
103 {
104  return mv_;
105 }
106 
107 template<class Scalar, class LO, class GO, class Node>
108 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
111 {
113  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
114  TEUCHOS_TEST_FOR_EXCEPTION(
115  src_bmv == nullptr, std::invalid_argument, "Tpetra::"
116  "BlockMultiVector: The source object of an Import or Export to a "
117  "BlockMultiVector, must also be a BlockMultiVector.");
118  return Teuchos::rcp (src_bmv, false); // nonowning RCP
119 }
120 
121 template<class Scalar, class LO, class GO, class Node>
124  const Teuchos::DataAccess copyOrView) :
125  dist_object_type (in),
126  meshMap_ (in.meshMap_),
127  pointMap_ (in.pointMap_),
128  mv_ (in.mv_, copyOrView),
129  mvData_ (getRawHostPtrFromMultiVector (mv_)),
130  blockSize_ (in.blockSize_)
131 {}
132 
133 template<class Scalar, class LO, class GO, class Node>
135 BlockMultiVector (const map_type& meshMap,
136  const LO blockSize,
137  const LO numVecs) :
138  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
139  meshMap_ (meshMap),
140  pointMap_ (makePointMap (meshMap, blockSize)),
141  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
142  mvData_ (getRawHostPtrFromMultiVector (mv_)),
143  blockSize_ (blockSize)
144 {}
145 
146 template<class Scalar, class LO, class GO, class Node>
148 BlockMultiVector (const map_type& meshMap,
149  const map_type& pointMap,
150  const LO blockSize,
151  const LO numVecs) :
152  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
153  meshMap_ (meshMap),
154  pointMap_ (pointMap),
155  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
156  mvData_ (getRawHostPtrFromMultiVector (mv_)),
157  blockSize_ (blockSize)
158 {}
159 
160 template<class Scalar, class LO, class GO, class Node>
163  const map_type& meshMap,
164  const LO blockSize) :
165  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
166  meshMap_ (meshMap),
167  mvData_ (nullptr), // just for now
168  blockSize_ (blockSize)
169 {
170  using Teuchos::RCP;
171 
172  if (X_mv.getCopyOrView () == Teuchos::View) {
173  // The input MultiVector has view semantics, so assignment just
174  // does a shallow copy.
175  mv_ = X_mv;
176  }
177  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
178  // The input MultiVector has copy semantics. We can't change
179  // that, but we can make a view of the input MultiVector and
180  // change the view to have view semantics. (That sounds silly;
181  // shouldn't views always have view semantics? but remember that
182  // "view semantics" just governs the default behavior of the copy
183  // constructor and assignment operator.)
184  RCP<const mv_type> X_view_const;
185  const size_t numCols = X_mv.getNumVectors ();
186  if (numCols == 0) {
187  Teuchos::Array<size_t> cols (0); // view will have zero columns
188  X_view_const = X_mv.subView (cols ());
189  } else { // Range1D is an inclusive range
190  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
191  }
192  TEUCHOS_TEST_FOR_EXCEPTION(
193  X_view_const.is_null (), std::logic_error, "Tpetra::"
194  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
195  "should never happen. Please report this bug to the Tpetra developers.");
196 
197  // It's perfectly OK to cast away const here. Those view methods
198  // should be marked const anyway, because views can't change the
199  // allocation (just the entries themselves).
200  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
201  TEUCHOS_TEST_FOR_EXCEPTION(
202  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
203  "BlockMultiVector constructor: We just set a MultiVector "
204  "to have view semantics, but it claims that it doesn't have view "
205  "semantics. This should never happen. "
206  "Please report this bug to the Tpetra developers.");
207  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
208  }
209 
210  // At this point, mv_ has been assigned, so we can ignore X_mv.
211  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
212  if (! pointMap.is_null ()) {
213  pointMap_ = *pointMap; // Map::operator= also does a shallow copy
214  }
215  mvData_ = getRawHostPtrFromMultiVector (mv_);
216 }
217 
218 template<class Scalar, class LO, class GO, class Node>
221  const map_type& newMeshMap,
222  const map_type& newPointMap,
223  const size_t offset) :
224  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
225  meshMap_ (newMeshMap),
226  pointMap_ (newPointMap),
227  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
228  mvData_ (getRawHostPtrFromMultiVector (mv_)),
229  blockSize_ (X.getBlockSize ())
230 {}
231 
232 template<class Scalar, class LO, class GO, class Node>
235  const map_type& newMeshMap,
236  const size_t offset) :
237  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
238  meshMap_ (newMeshMap),
239  pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
240  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
241  mvData_ (getRawHostPtrFromMultiVector (mv_)),
242  blockSize_ (X.getBlockSize ())
243 {}
244 
245 template<class Scalar, class LO, class GO, class Node>
248  dist_object_type (Teuchos::null),
249  mvData_ (nullptr),
250  blockSize_ (0)
251 {}
252 
253 template<class Scalar, class LO, class GO, class Node>
256 makePointMap (const map_type& meshMap, const LO blockSize)
257 {
258  typedef Tpetra::global_size_t GST;
259  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
260 
261  const GST gblNumMeshMapInds =
262  static_cast<GST> (meshMap.getGlobalNumElements ());
263  const size_t lclNumMeshMapIndices =
264  static_cast<size_t> (meshMap.getNodeNumElements ());
265  const GST gblNumPointMapInds =
266  gblNumMeshMapInds * static_cast<GST> (blockSize);
267  const size_t lclNumPointMapInds =
268  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
269  const GO indexBase = meshMap.getIndexBase ();
270 
271  if (meshMap.isContiguous ()) {
272  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
273  meshMap.getComm ());
274  }
275  else {
276  // "Hilbert's Hotel" trick: multiply each process' GIDs by
277  // blockSize, and fill in. That ensures correctness even if the
278  // mesh Map is overlapping.
279  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getNodeElementList ();
280  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
281  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
282  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
283  const GO meshGid = lclMeshGblInds[g];
284  const GO pointGidStart = indexBase +
285  (meshGid - indexBase) * static_cast<GO> (blockSize);
286  const size_type offset = g * static_cast<size_type> (blockSize);
287  for (LO k = 0; k < blockSize; ++k) {
288  const GO pointGid = pointGidStart + static_cast<GO> (k);
289  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
290  }
291  }
292  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
293  meshMap.getComm ());
294  }
295 }
296 
297 
298 template<class Scalar, class LO, class GO, class Node>
299 void
301 replaceLocalValuesImpl (const LO localRowIndex,
302  const LO colIndex,
303  const Scalar vals[]) const
304 {
305  auto X_dst = getLocalBlock (localRowIndex, colIndex);
306  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
307  getBlockSize ());
308  Kokkos::deep_copy (X_dst, X_src);
309 }
310 
311 
312 template<class Scalar, class LO, class GO, class Node>
313 bool
315 replaceLocalValues (const LO localRowIndex,
316  const LO colIndex,
317  const Scalar vals[]) const
318 {
319  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
320  return false;
321  } else {
322  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
323  return true;
324  }
325 }
326 
327 template<class Scalar, class LO, class GO, class Node>
328 bool
330 replaceGlobalValues (const GO globalRowIndex,
331  const LO colIndex,
332  const Scalar vals[]) const
333 {
334  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
335  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
336  return false;
337  } else {
338  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
339  return true;
340  }
341 }
342 
343 template<class Scalar, class LO, class GO, class Node>
344 void
346 sumIntoLocalValuesImpl (const LO localRowIndex,
347  const LO colIndex,
348  const Scalar vals[]) const
349 {
350  auto X_dst = getLocalBlock (localRowIndex, colIndex);
351  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
352  getBlockSize ());
353  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
354 }
355 
356 template<class Scalar, class LO, class GO, class Node>
357 bool
359 sumIntoLocalValues (const LO localRowIndex,
360  const LO colIndex,
361  const Scalar vals[]) const
362 {
363  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
364  return false;
365  } else {
366  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
367  return true;
368  }
369 }
370 
371 template<class Scalar, class LO, class GO, class Node>
372 bool
374 sumIntoGlobalValues (const GO globalRowIndex,
375  const LO colIndex,
376  const Scalar vals[]) const
377 {
378  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
379  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
380  return false;
381  } else {
382  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
383  return true;
384  }
385 }
386 
387 template<class Scalar, class LO, class GO, class Node>
388 bool
390 getLocalRowView (const LO localRowIndex, const LO colIndex, Scalar*& vals) const
391 {
392  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
393  return false;
394  } else {
395  auto X_ij = getLocalBlock (localRowIndex, colIndex);
396  vals = reinterpret_cast<Scalar*> (X_ij.data ());
397  return true;
398  }
399 }
400 
401 template<class Scalar, class LO, class GO, class Node>
402 bool
404 getGlobalRowView (const GO globalRowIndex, const LO colIndex, Scalar*& vals) const
405 {
406  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
407  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
408  return false;
409  } else {
410  auto X_ij = getLocalBlock (localRowIndex, colIndex);
411  vals = reinterpret_cast<Scalar*> (X_ij.data ());
412  return true;
413  }
414 }
415 
416 template<class Scalar, class LO, class GO, class Node>
419 getLocalBlock (const LO localRowIndex,
420  const LO colIndex) const
421 {
422  // NOTE (mfh 07 Jul 2016) It should be correct to add the
423  // commented-out test below. However, I've conservatively commented
424  // it out, since users might not realize that they need to have
425  // things sync'd correctly.
426 
427 // #ifdef HAVE_TPETRA_DEBUG
428 // TEUCHOS_TEST_FOR_EXCEPTION
429 // (mv_.need_sync_host (), std::runtime_error,
430 // "Tpetra::BlockMultiVector::getLocalBlock: This method "
431 // "accesses host data, but the object is not in sync on host." );
432 // #endif // HAVE_TPETRA_DEBUG
433 
434  if (! isValidLocalMeshIndex (localRowIndex)) {
435  return typename little_vec_type::HostMirror ();
436  } else {
437  const size_t blockSize = getBlockSize ();
438  const size_t offset = colIndex * this->getStrideY () +
439  localRowIndex * blockSize;
440  impl_scalar_type* blockRaw = this->getRawPtr () + offset;
441  return typename little_vec_type::HostMirror (blockRaw, blockSize);
442  }
443 }
444 
445 template<class Scalar, class LO, class GO, class Node>
446 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
449 {
450  using Teuchos::rcp;
451  using Teuchos::rcpFromRef;
452 
453  // The source object of an Import or Export must be a
454  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
455  // them in that order; one must succeed. Note that the target of
456  // the Import or Export calls checkSizes in DistObject's doTransfer.
457  typedef BlockMultiVector<Scalar, LO, GO, Node> this_type;
458  const this_type* srcBlkVec = dynamic_cast<const this_type*> (&src);
459  if (srcBlkVec == nullptr) {
460  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
461  if (srcMultiVec == nullptr) {
462  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
463  // currently does a shallow copy by default. This is why we
464  // return by RCP, rather than by value.
465  return rcp (new mv_type ());
466  } else { // src is a MultiVector
467  return rcp (srcMultiVec, false); // nonowning RCP
468  }
469  } else { // src is a BlockMultiVector
470  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
471  }
472 }
473 
474 template<class Scalar, class LO, class GO, class Node>
477 {
478  return ! getMultiVectorFromSrcDistObject (src).is_null ();
479 }
480 
481 template<class Scalar, class LO, class GO, class Node>
484 (const SrcDistObject& src,
485  const size_t numSameIDs,
486  const Kokkos::DualView<const local_ordinal_type*,
487  buffer_device_type>& permuteToLIDs,
488  const Kokkos::DualView<const local_ordinal_type*,
489  buffer_device_type>& permuteFromLIDs)
490 {
491  TEUCHOS_TEST_FOR_EXCEPTION
492  (true, std::logic_error,
493  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
494  "instead, create a point importer using makePointMap function.");
495 }
496 
497 template<class Scalar, class LO, class GO, class Node>
498 void BlockMultiVector<Scalar, LO, GO, Node>::
499 packAndPrepare
500 (const SrcDistObject& src,
501  const Kokkos::DualView<const local_ordinal_type*,
502  buffer_device_type>& exportLIDs,
503  Kokkos::DualView<packet_type*,
504  buffer_device_type>& exports,
505  Kokkos::DualView<size_t*,
506  buffer_device_type> numPacketsPerLID,
507  size_t& constantNumPackets,
508  Distributor& distor)
509 {
510  TEUCHOS_TEST_FOR_EXCEPTION
511  (true, std::logic_error,
512  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
513  "instead, create a point importer using makePointMap function.");
514 }
515 
516 template<class Scalar, class LO, class GO, class Node>
517 void BlockMultiVector<Scalar, LO, GO, Node>::
518 unpackAndCombine
519 (const Kokkos::DualView<const local_ordinal_type*,
520  buffer_device_type>& importLIDs,
521  Kokkos::DualView<packet_type*,
522  buffer_device_type> imports,
523  Kokkos::DualView<size_t*,
524  buffer_device_type> numPacketsPerLID,
525  const size_t constantNumPackets,
526  Distributor& distor,
527  const CombineMode combineMode)
528 {
529  TEUCHOS_TEST_FOR_EXCEPTION
530  (true, std::logic_error,
531  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
532  "instead, create a point importer using makePointMap function.");
533 }
534 
535 template<class Scalar, class LO, class GO, class Node>
537 isValidLocalMeshIndex (const LO meshLocalIndex) const
538 {
539  return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
540  meshMap_.isNodeLocalElement (meshLocalIndex);
541 }
542 
543 template<class Scalar, class LO, class GO, class Node>
545 putScalar (const Scalar& val)
546 {
547  mv_.putScalar (val);
548 }
549 
550 template<class Scalar, class LO, class GO, class Node>
552 scale (const Scalar& val)
553 {
554  mv_.scale (val);
555 }
556 
557 template<class Scalar, class LO, class GO, class Node>
559 update (const Scalar& alpha,
561  const Scalar& beta)
562 {
563  mv_.update (alpha, X.mv_, beta);
564 }
565 
566 namespace Impl {
567 // Y := alpha * D * X
568 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
569 struct BlockWiseMultiply {
570  typedef typename ViewD::size_type Size;
571 
572 private:
573  typedef typename ViewD::device_type Device;
574  typedef typename ViewD::non_const_value_type ImplScalar;
575  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
576 
577  template <typename View>
578  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
579  typename View::device_type, Unmanaged>;
580  typedef UnmanagedView<ViewY> UnMViewY;
581  typedef UnmanagedView<ViewD> UnMViewD;
582  typedef UnmanagedView<ViewX> UnMViewX;
583 
584  const Size block_size_;
585  Scalar alpha_;
586  UnMViewY Y_;
587  UnMViewD D_;
588  UnMViewX X_;
589 
590 public:
591  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
592  const ViewY& Y, const ViewD& D, const ViewX& X)
593  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
594  {}
595 
596  KOKKOS_INLINE_FUNCTION
597  void operator() (const Size k) const {
598  const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
599  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
600  const auto num_vecs = X_.extent(1);
601  for (Size i = 0; i < num_vecs; ++i) {
602  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
603  auto X_curBlk = Kokkos::subview(X_, kslice, i);
604  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
605  // Y_curBlk := alpha * D_curBlk * X_curBlk.
606  // Recall that GEMV does an update (+=) of the last argument.
607  Tpetra::FILL(Y_curBlk, zero);
608  Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
609  }
610  }
611 };
612 
613 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
614 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
615 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
616  const ViewY& Y, const ViewD& D, const ViewX& X) {
617  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
618 }
619 
620 template <typename ViewY,
621  typename Scalar,
622  typename ViewD,
623  typename ViewZ,
624  typename LO = typename ViewY::size_type>
625 class BlockJacobiUpdate {
626 private:
627  typedef typename ViewD::device_type Device;
628  typedef typename ViewD::non_const_value_type ImplScalar;
629  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
630 
631  template <typename ViewType>
632  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
633  typename ViewType::array_layout,
634  typename ViewType::device_type,
635  Unmanaged>;
636  typedef UnmanagedView<ViewY> UnMViewY;
637  typedef UnmanagedView<ViewD> UnMViewD;
638  typedef UnmanagedView<ViewZ> UnMViewZ;
639 
640  const LO blockSize_;
641  UnMViewY Y_;
642  const Scalar alpha_;
643  UnMViewD D_;
644  UnMViewZ Z_;
645  const Scalar beta_;
646 
647 public:
648  BlockJacobiUpdate (const ViewY& Y,
649  const Scalar& alpha,
650  const ViewD& D,
651  const ViewZ& Z,
652  const Scalar& beta) :
653  blockSize_ (D.extent (1)),
654  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
655  Y_ (Y),
656  alpha_ (alpha),
657  D_ (D),
658  Z_ (Z),
659  beta_ (beta)
660  {
661  static_assert (static_cast<int> (ViewY::rank) == 1,
662  "Y must have rank 1.");
663  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
664  static_assert (static_cast<int> (ViewZ::rank) == 1,
665  "Z must have rank 1.");
666  // static_assert (static_cast<int> (ViewZ::rank) ==
667  // static_cast<int> (ViewY::rank),
668  // "Z must have the same rank as Y.");
669  }
670 
671  KOKKOS_INLINE_FUNCTION void
672  operator() (const LO& k) const
673  {
674  using Kokkos::ALL;
675  using Kokkos::subview;
676  typedef Kokkos::pair<LO, LO> range_type;
677  typedef Kokkos::Details::ArithTraits<Scalar> KAT;
678 
679  // We only have to implement the alpha != 0 case.
680 
681  auto D_curBlk = subview (D_, k, ALL (), ALL ());
682  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
683 
684  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
685 
686  auto Z_curBlk = subview (Z_, kslice);
687  auto Y_curBlk = subview (Y_, kslice);
688  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
689  if (beta_ == KAT::zero ()) {
690  Tpetra::FILL (Y_curBlk, KAT::zero ());
691  }
692  else if (beta_ != KAT::one ()) {
693  Tpetra::SCAL (beta_, Y_curBlk);
694  }
695  Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
696  }
697 };
698 
699 template<class ViewY,
700  class Scalar,
701  class ViewD,
702  class ViewZ,
703  class LO = typename ViewD::size_type>
704 void
705 blockJacobiUpdate (const ViewY& Y,
706  const Scalar& alpha,
707  const ViewD& D,
708  const ViewZ& Z,
709  const Scalar& beta)
710 {
711  static_assert (Kokkos::Impl::is_view<ViewY>::value, "Y must be a Kokkos::View.");
712  static_assert (Kokkos::Impl::is_view<ViewD>::value, "D must be a Kokkos::View.");
713  static_assert (Kokkos::Impl::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
714  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
715  "Y and Z must have the same rank.");
716  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
717 
718  const auto lclNumMeshRows = D.extent (0);
719 
720 #ifdef HAVE_TPETRA_DEBUG
721  // D.extent(0) is the (local) number of mesh rows.
722  // D.extent(1) is the block size. Thus, their product should be
723  // the local number of point rows, that is, the number of rows in Y.
724  const auto blkSize = D.extent (1);
725  const auto lclNumPtRows = lclNumMeshRows * blkSize;
726  TEUCHOS_TEST_FOR_EXCEPTION
727  (Y.extent (0) != lclNumPtRows, std::invalid_argument,
728  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
729  "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
730  << " = " << lclNumPtRows << ".");
731  TEUCHOS_TEST_FOR_EXCEPTION
732  (Y.extent (0) != Z.extent (0), std::invalid_argument,
733  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
734  "Z.extent(0) = " << Z.extent (0) << ".");
735  TEUCHOS_TEST_FOR_EXCEPTION
736  (Y.extent (1) != Z.extent (1), std::invalid_argument,
737  "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
738  "Z.extent(1) = " << Z.extent (1) << ".");
739 #endif // HAVE_TPETRA_DEBUG
740 
741  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
742  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
743  // lclNumMeshRows must fit in LO, else the Map would not be correct.
744  range_type range (0, static_cast<LO> (lclNumMeshRows));
745  Kokkos::parallel_for (range, functor);
746 }
747 
748 } // namespace Impl
749 
750 template<class Scalar, class LO, class GO, class Node>
752 blockWiseMultiply (const Scalar& alpha,
753  const Kokkos::View<const impl_scalar_type***,
754  device_type, Kokkos::MemoryUnmanaged>& D,
756 {
757  using Kokkos::ALL;
758  typedef typename device_type::execution_space execution_space;
759  typedef typename device_type::memory_space memory_space;
760  const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
761 
762  if (alpha == STS::zero ()) {
763  this->putScalar (STS::zero ());
764  }
765  else { // alpha != 0
766  const LO blockSize = this->getBlockSize ();
767  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
768  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
769  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
770  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
771 
772  // Use an explicit RangePolicy with the desired execution space.
773  // Otherwise, if you just use a number, it runs on the default
774  // execution space. For example, if the default execution space
775  // is Cuda but the current execution space is Serial, using just a
776  // number would incorrectly run with Cuda.
777  Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
778  Kokkos::parallel_for (range, bwm);
779  }
780 }
781 
782 template<class Scalar, class LO, class GO, class Node>
784 blockJacobiUpdate (const Scalar& alpha,
785  const Kokkos::View<const impl_scalar_type***,
786  device_type, Kokkos::MemoryUnmanaged>& D,
789  const Scalar& beta)
790 {
791  using Kokkos::ALL;
792  using Kokkos::subview;
793  typedef typename device_type::memory_space memory_space;
794  typedef impl_scalar_type IST;
795 
796  const IST alphaImpl = static_cast<IST> (alpha);
797  const IST betaImpl = static_cast<IST> (beta);
798  const LO numVecs = mv_.getNumVectors ();
799 
800  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
801  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
802  auto Z_lcl = Z.mv_.template getLocalView<memory_space> ();
803 
804  if (alpha == STS::zero ()) { // Y := beta * Y
805  this->scale (beta);
806  }
807  else { // alpha != 0
808  Z.update (STS::one (), X, -STS::one ());
809  for (LO j = 0; j < numVecs; ++j) {
810  auto X_lcl_j = subview (X_lcl, ALL (), j);
811  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
812  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
813  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
814  }
815  }
816 }
817 
818 } // namespace Tpetra
819 
820 //
821 // Explicit instantiation macro
822 //
823 // Must be expanded from within the Tpetra namespace!
824 //
825 #define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
826  template class BlockMultiVector< S, LO, GO, NODE >;
827 
828 #endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using local row and column indices.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector&#39;s data.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
One or more distributed dense vectors.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a global index.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
Linear algebra kernels for small dense matrices and vectors.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
MultiVector for multiple degrees of freedom per mesh point.
bool getGlobalRowView(const GO globalRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a global index.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
typename device_type::execution_space execution_space
The Kokkos execution space.
global_ordinal_type getIndexBase() const
The index base for this Map.
typename mv_type::device_type device_type
The Kokkos Device type.
CombineMode
Rule for combining data in an Import or Export.
size_t getNumVectors() const
Number of columns in the multivector.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using a global index.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Abstract base class for objects that can be the source of an Import or Export operation.
Teuchos::ArrayView< const global_ordinal_type > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
bool getLocalRowView(const LO localRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a local index. ...
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a local index.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.