40 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
41 #define TPETRA_MULTIVECTOR_DEF_HPP
52 #include "Tpetra_Vector.hpp"
55 #include "Tpetra_Details_checkView.hpp"
64 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
65 # include "Teuchos_SerialDenseMatrix.hpp"
67 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
68 #include "KokkosCompat_View.hpp"
69 #include "KokkosBlas.hpp"
70 #include "KokkosKernels_Utils.hpp"
71 #include "Kokkos_Random.hpp"
72 #include "Kokkos_ArithTraits.hpp"
76 #ifdef HAVE_TPETRA_INST_FLOAT128
79 template<
class Generator>
80 struct rand<Generator, __float128> {
81 static KOKKOS_INLINE_FUNCTION __float128 max ()
83 return static_cast<__float128
> (1.0);
85 static KOKKOS_INLINE_FUNCTION __float128
90 const __float128 scalingFactor =
91 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
92 static_cast<__float128
> (2.0);
93 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
94 const __float128 lowerOrderTerm =
95 static_cast<__float128
> (gen.drand ()) * scalingFactor;
96 return higherOrderTerm + lowerOrderTerm;
98 static KOKKOS_INLINE_FUNCTION __float128
99 draw (Generator& gen,
const __float128& range)
102 const __float128 scalingFactor =
103 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
104 static_cast<__float128
> (2.0);
105 const __float128 higherOrderTerm =
106 static_cast<__float128
> (gen.drand (range));
107 const __float128 lowerOrderTerm =
108 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
109 return higherOrderTerm + lowerOrderTerm;
111 static KOKKOS_INLINE_FUNCTION __float128
112 draw (Generator& gen,
const __float128& start,
const __float128& end)
115 const __float128 scalingFactor =
116 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
117 static_cast<__float128
> (2.0);
118 const __float128 higherOrderTerm =
119 static_cast<__float128
> (gen.drand (start, end));
120 const __float128 lowerOrderTerm =
121 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
122 return higherOrderTerm + lowerOrderTerm;
144 template<
class ST,
class LO,
class GO,
class NT>
146 allocDualView (
const size_t lclNumRows,
147 const size_t numCols,
148 const bool zeroOut =
true,
149 const bool allowPadding =
false)
151 using ::Tpetra::Details::Behavior;
152 using Kokkos::AllowPadding;
153 using Kokkos::view_alloc;
154 using Kokkos::WithoutInitializing;
156 typedef typename dual_view_type::t_dev dev_view_type;
161 const std::string label (
"MV::DualView");
162 const bool debug = Behavior::debug ();
172 dev_view_type d_view;
175 d_view = dev_view_type (view_alloc (label, AllowPadding),
176 lclNumRows, numCols);
179 d_view = dev_view_type (view_alloc (label),
180 lclNumRows, numCols);
185 d_view = dev_view_type (view_alloc (label,
188 lclNumRows, numCols);
191 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
192 lclNumRows, numCols);
203 const ST nan = Kokkos::Details::ArithTraits<ST>::nan ();
204 KokkosBlas::fill (d_view, nan);
208 TEUCHOS_TEST_FOR_EXCEPTION
209 (
static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
210 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
211 "allocDualView: d_view's dimensions actual dimensions do not match "
212 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
213 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
214 <<
". Please report this bug to the Tpetra developers.");
217 dual_view_type dv (d_view, Kokkos::create_mirror_view (d_view));
231 template<
class T,
class ExecSpace>
232 struct MakeUnmanagedView {
245 typedef typename Kokkos::Impl::if_c<
246 Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
247 typename ExecSpace::memory_space,
248 Kokkos::HostSpace>::value,
249 typename ExecSpace::device_type,
250 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
251 typedef Kokkos::LayoutLeft array_layout;
252 typedef Kokkos::View<T*, array_layout, host_exec_space,
253 Kokkos::MemoryUnmanaged> view_type;
255 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
257 const size_t numEnt =
static_cast<size_t> (x_in.size ());
261 return view_type (x_in.getRawPtr (), numEnt);
269 template<
class DualViewType>
271 takeSubview (
const DualViewType& X,
272 const Kokkos::Impl::ALL_t&,
273 const std::pair<size_t, size_t>& colRng)
275 if (X.extent (0) == 0 && X.extent (1) != 0) {
276 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
279 return subview (X, Kokkos::ALL (), colRng);
286 template<
class DualViewType>
288 takeSubview (
const DualViewType& X,
289 const std::pair<size_t, size_t>& rowRng,
290 const std::pair<size_t, size_t>& colRng)
292 if (X.extent (0) == 0 && X.extent (1) != 0) {
293 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
296 return subview (X, rowRng, colRng);
300 template<
class DualViewType>
302 getDualViewStride (
const DualViewType& dv)
306 const size_t LDA = dv.d_view.stride (1);
307 const size_t numRows = dv.extent (0);
310 return (numRows == 0) ? size_t (1) : numRows;
317 template<
class ViewType>
319 getViewStride (
const ViewType& view)
321 const size_t LDA = view.stride (1);
322 const size_t numRows = view.extent (0);
325 return (numRows == 0) ? size_t (1) : numRows;
332 template <
class impl_scalar_type,
class buffer_device_type>
334 runKernelOnHost ( Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports )
336 if (! imports.need_sync_device ()) {
341 return imports.extent(0) <= localLengthThreshold;
346 template <
class SC,
class LO,
class GO,
class NT>
348 runKernelOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
350 if (! X.need_sync_device ()) {
354 constexpr
size_t localLengthThreshold = 10000;
355 return X.getLocalLength () <= localLengthThreshold;
359 template <
class SC,
class LO,
class GO,
class NT>
361 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
367 using val_type =
typename MV::impl_scalar_type;
368 using mag_type =
typename MV::mag_type;
369 using dual_view_type =
typename MV::dual_view_type;
372 auto comm = map.is_null () ? nullptr : map->getComm ().getRawPtr ();
373 auto whichVecs = getMultiVectorWhichVectors (X);
377 const bool runOnHost = runKernelOnHost (X);
379 using view_type =
typename dual_view_type::t_host;
380 using array_layout =
typename view_type::array_layout;
381 using device_type =
typename view_type::device_type;
387 normImpl<val_type, array_layout, device_type,
388 mag_type> (norms, X_lcl, whichNorm, whichVecs,
389 isConstantStride, isDistributed, comm);
392 using view_type =
typename dual_view_type::t_dev;
393 using array_layout =
typename view_type::array_layout;
394 using device_type =
typename view_type::device_type;
400 normImpl<val_type, array_layout, device_type,
401 mag_type> (norms, X_lcl, whichNorm, whichVecs,
402 isConstantStride, isDistributed, comm);
410 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
412 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
413 vectorIndexOutOfRange (
const size_t VectorIndex)
const {
414 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
417 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
423 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
425 MultiVector (
const Teuchos::RCP<const map_type>& map,
426 const size_t numVecs,
427 const bool zeroOut) :
433 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
437 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
440 const Teuchos::DataAccess copyOrView) :
442 view_ (source.view_),
443 origView_ (source.origView_),
444 whichVectors_ (source.whichVectors_)
447 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
448 "const Teuchos::DataAccess): ";
452 if (copyOrView == Teuchos::Copy) {
456 this->
view_ = cpy.view_;
460 else if (copyOrView == Teuchos::View) {
463 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
464 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
465 "invalid value " << copyOrView <<
". Valid values include "
466 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
467 << Teuchos::View <<
".");
471 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
473 MultiVector (
const Teuchos::RCP<const map_type>& map,
479 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
480 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
481 map->getNodeNumElements ();
482 const size_t lclNumRows_view = view.extent (0);
483 const size_t LDA = getDualViewStride (view);
485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
486 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
487 std::invalid_argument,
"Kokkos::DualView does not match Map. "
488 "map->getNodeNumElements() = " << lclNumRows_map
489 <<
", view.extent(0) = " << lclNumRows_view
490 <<
", and getStride() = " << LDA <<
".");
492 using ::Tpetra::Details::Behavior;
493 const bool debug = Behavior::debug ();
495 using ::Tpetra::Details::checkGlobalDualViewValidity;
496 std::ostringstream gblErrStrm;
497 const bool verbose = Behavior::verbose ();
498 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
499 const bool gblValid =
500 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
502 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
503 (! gblValid, std::runtime_error, gblErrStrm.str ());
508 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
510 MultiVector (
const Teuchos::RCP<const map_type>& map,
511 const typename dual_view_type::t_dev& d_view) :
514 using Teuchos::ArrayRCP;
516 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
520 const size_t LDA = getViewStride (d_view);
521 const size_t lclNumRows = map->getNodeNumElements ();
522 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
523 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
524 "Kokkos::View. map->getNodeNumElements() = " << lclNumRows
525 <<
", View's column stride = " << LDA
526 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
528 auto h_view = Kokkos::create_mirror_view (d_view);
531 using ::Tpetra::Details::Behavior;
532 const bool debug = Behavior::debug ();
534 using ::Tpetra::Details::checkGlobalDualViewValidity;
535 std::ostringstream gblErrStrm;
536 const bool verbose = Behavior::verbose ();
537 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
538 const bool gblValid =
539 checkGlobalDualViewValidity (&gblErrStrm,
view_, verbose,
541 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
542 (! gblValid, std::runtime_error, gblErrStrm.str ());
551 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
553 MultiVector (
const Teuchos::RCP<const map_type>& map,
560 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
562 const size_t LDA = getDualViewStride (origView);
564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
565 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
566 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
567 <<
". This may also mean that the input origView's first dimension (number "
568 "of rows = " << origView.extent (0) <<
") does not not match the number "
569 "of entries in the Map on the calling process.");
571 using ::Tpetra::Details::Behavior;
572 const bool debug = Behavior::debug ();
574 using ::Tpetra::Details::checkGlobalDualViewValidity;
575 std::ostringstream gblErrStrm;
576 const bool verbose = Behavior::verbose ();
577 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
578 const bool gblValid_0 =
579 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
581 const bool gblValid_1 =
582 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
584 const bool gblValid = gblValid_0 && gblValid_1;
585 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
586 (! gblValid, std::runtime_error, gblErrStrm.str ());
591 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
593 MultiVector (
const Teuchos::RCP<const map_type>& map,
595 const Teuchos::ArrayView<const size_t>& whichVectors) :
599 whichVectors_ (whichVectors.begin (), whichVectors.end ())
602 using Kokkos::subview;
603 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
605 using ::Tpetra::Details::Behavior;
606 const bool debug = Behavior::debug ();
608 using ::Tpetra::Details::checkGlobalDualViewValidity;
609 std::ostringstream gblErrStrm;
610 const bool verbose = Behavior::verbose ();
611 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
612 const bool gblValid =
613 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
615 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
616 (! gblValid, std::runtime_error, gblErrStrm.str ());
619 const size_t lclNumRows = map.is_null () ? size_t (0) :
620 map->getNodeNumElements ();
627 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
628 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
629 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
630 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
631 if (whichVectors.size () != 0) {
632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
633 view.extent (1) != 0 && view.extent (1) == 0,
634 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
635 " = " << whichVectors.size () <<
" > 0.");
636 size_t maxColInd = 0;
637 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
638 for (size_type k = 0; k < whichVectors.size (); ++k) {
639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
640 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
641 std::invalid_argument,
"whichVectors[" << k <<
"] = "
642 "Teuchos::OrdinalTraits<size_t>::invalid().");
643 maxColInd = std::max (maxColInd, whichVectors[k]);
645 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
646 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
647 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
648 <<
" <= max(whichVectors) = " << maxColInd <<
".");
653 const size_t LDA = getDualViewStride (view);
654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
655 (LDA < lclNumRows, std::invalid_argument,
656 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
658 if (whichVectors.size () == 1) {
667 const std::pair<size_t, size_t> colRng (whichVectors[0],
668 whichVectors[0] + 1);
676 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
678 MultiVector (
const Teuchos::RCP<const map_type>& map,
681 const Teuchos::ArrayView<const size_t>& whichVectors) :
684 origView_ (origView),
685 whichVectors_ (whichVectors.begin (), whichVectors.end ())
688 using Kokkos::subview;
689 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
691 using ::Tpetra::Details::Behavior;
692 const bool debug = Behavior::debug ();
694 using ::Tpetra::Details::checkGlobalDualViewValidity;
695 std::ostringstream gblErrStrm;
696 const bool verbose = Behavior::verbose ();
697 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
698 const bool gblValid_0 =
699 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
701 const bool gblValid_1 =
702 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
704 const bool gblValid = gblValid_0 && gblValid_1;
705 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
706 (! gblValid, std::runtime_error, gblErrStrm.str ());
709 const size_t lclNumRows = this->getLocalLength ();
716 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
717 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
718 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
719 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
720 if (whichVectors.size () != 0) {
721 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
722 view.extent (1) != 0 && view.extent (1) == 0,
723 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
724 " = " << whichVectors.size () <<
" > 0.");
725 size_t maxColInd = 0;
726 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
727 for (size_type k = 0; k < whichVectors.size (); ++k) {
728 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
729 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
730 std::invalid_argument,
"whichVectors[" << k <<
"] = "
731 "Teuchos::OrdinalTraits<size_t>::invalid().");
732 maxColInd = std::max (maxColInd, whichVectors[k]);
734 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
735 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
736 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
737 <<
" <= max(whichVectors) = " << maxColInd <<
".");
742 const size_t LDA = getDualViewStride (origView);
743 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
744 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
745 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
746 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
747 <<
", origView.stride(1) = " << origView.d_view.stride(1) <<
".");
749 if (whichVectors.size () == 1) {
758 const std::pair<size_t, size_t> colRng (whichVectors[0],
759 whichVectors[0] + 1);
760 view_ = takeSubview (view_, ALL (), colRng);
761 origView_ = takeSubview (origView_, ALL (), colRng);
763 whichVectors_.clear ();
767 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
769 MultiVector (
const Teuchos::RCP<const map_type>& map,
770 const Teuchos::ArrayView<const Scalar>& data,
772 const size_t numVecs) :
775 typedef LocalOrdinal LO;
776 typedef GlobalOrdinal GO;
777 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
783 const size_t lclNumRows =
784 map.is_null () ? size_t (0) : map->getNodeNumElements ();
785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
786 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
787 "map->getNodeNumElements() = " << lclNumRows <<
".");
789 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
790 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
791 (
static_cast<size_t> (data.size ()) < minNumEntries,
792 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
793 "entries, given the input Map and number of vectors in the MultiVector."
794 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
795 "map->getNodeNumElements () = " << minNumEntries <<
".");
798 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
811 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
812 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
813 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
818 const size_t outStride =
819 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
820 if (LDA == outStride) {
826 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
828 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
830 for (
size_t j = 0; j < numVecs; ++j) {
831 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
832 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
838 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
840 MultiVector (
const Teuchos::RCP<const map_type>& map,
841 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
842 const size_t numVecs) :
846 typedef LocalOrdinal LO;
847 typedef GlobalOrdinal GO;
848 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
851 const size_t lclNumRows =
852 map.is_null () ? size_t (0) : map->getNodeNumElements ();
853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
854 (numVecs < 1 || numVecs !=
static_cast<size_t> (ArrayOfPtrs.size ()),
855 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
856 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
857 for (
size_t j = 0; j < numVecs; ++j) {
858 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
859 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
860 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
861 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
862 << X_j_av.size () <<
" < map->getNodeNumElements() = " << lclNumRows
866 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
867 this->modify_device ();
868 auto X_out = this->getLocalViewDevice ();
872 using array_layout =
typename decltype (X_out)::array_layout;
873 using input_col_view_type =
typename Kokkos::View<
const IST*,
876 Kokkos::MemoryUnmanaged>;
878 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
879 for (
size_t j = 0; j < numVecs; ++j) {
880 Teuchos::ArrayView<const IST> X_j_av =
881 Teuchos::av_reinterpret_cast<const IST> (ArrayOfPtrs[j]);
882 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
883 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
889 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
892 return whichVectors_.empty ();
895 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
900 if (this->getMap ().is_null ()) {
901 return static_cast<size_t> (0);
903 return this->getMap ()->getNodeNumElements ();
907 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
912 if (this->getMap ().is_null ()) {
913 return static_cast<size_t> (0);
915 return this->getMap ()->getGlobalNumElements ();
919 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
924 return isConstantStride () ? getDualViewStride (origView_) : size_t (0);
927 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
937 if (src ==
nullptr) {
947 return src->getNumVectors () == this->getNumVectors ();
951 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
955 return this->getNumVectors ();
958 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
963 const size_t numSameIDs,
964 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
965 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs)
967 using ::Tpetra::Details::Behavior;
969 using ::Tpetra::Details::ProfilingRegion;
971 using KokkosRefactor::Details::permute_array_multi_column;
972 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
973 using Kokkos::Compat::create_const_view;
975 const char tfecfFuncName[] =
"copyAndPermute: ";
976 ProfilingRegion regionCAP (
"Tpetra::MultiVector::copyAndPermute");
978 const bool verbose = Behavior::verbose ();
979 std::unique_ptr<std::string> prefix;
981 auto map = this->getMap ();
982 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
983 const int myRank = comm.is_null () ? -1 : comm->getRank ();
984 std::ostringstream os;
985 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
986 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
987 os <<
"Start" << endl;
988 std::cerr << os.str ();
991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
992 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
993 std::logic_error,
"permuteToLIDs.extent(0) = "
994 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
995 << permuteFromLIDs.extent (0) <<
".");
998 MV& sourceMV =
const_cast<MV &
>(
dynamic_cast<const MV&
> (sourceObj));
999 const size_t numCols = this->getNumVectors ();
1003 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1004 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1005 std::logic_error,
"Input MultiVector needs sync to both host "
1007 const bool copyOnHost = runKernelOnHost(sourceMV);
1009 std::ostringstream os;
1010 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1011 std::cerr << os.str ();
1015 sourceMV.sync_host();
1017 this->modify_host ();
1020 sourceMV.sync_device();
1021 if (this->need_sync_device ()) {
1022 this->sync_device ();
1024 this->modify_device ();
1028 std::ostringstream os;
1029 os << *prefix <<
"Copy" << endl;
1030 std::cerr << os.str ();
1055 if (numSameIDs > 0) {
1056 const std::pair<size_t, size_t> rows (0, numSameIDs);
1058 auto tgt_h = this->getLocalViewHost ();
1059 auto src_h = create_const_view (sourceMV.getLocalViewHost ());
1061 for (
size_t j = 0; j < numCols; ++j) {
1062 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1063 const size_t srcCol =
1064 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1066 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1067 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1072 auto tgt_d = this->getLocalViewDevice ();
1073 auto src_d = create_const_view (sourceMV.getLocalViewDevice ());
1075 for (
size_t j = 0; j < numCols; ++j) {
1076 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1077 const size_t srcCol =
1078 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1080 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1081 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1097 if (permuteFromLIDs.extent (0) == 0 ||
1098 permuteToLIDs.extent (0) == 0) {
1100 std::ostringstream os;
1101 os << *prefix <<
"No permutations. Done!" << endl;
1102 std::cerr << os.str ();
1108 std::ostringstream os;
1109 os << *prefix <<
"Permute" << endl;
1110 std::cerr << os.str ();
1115 const bool nonConstStride =
1116 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1119 std::ostringstream os;
1120 os << *prefix <<
"nonConstStride="
1121 << (nonConstStride ?
"true" :
"false") << endl;
1122 std::cerr << os.str ();
1129 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1130 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1131 if (nonConstStride) {
1132 if (this->whichVectors_.size () == 0) {
1133 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1134 tmpTgt.modify_host ();
1135 for (
size_t j = 0; j < numCols; ++j) {
1136 tmpTgt.h_view(j) = j;
1139 tmpTgt.sync_device ();
1141 tgtWhichVecs = tmpTgt;
1144 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1146 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1150 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1151 (
static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1152 this->getNumVectors (),
1153 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1154 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1155 this->getNumVectors () <<
".");
1157 if (sourceMV.whichVectors_.size () == 0) {
1158 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1159 tmpSrc.modify_host ();
1160 for (
size_t j = 0; j < numCols; ++j) {
1161 tmpSrc.h_view(j) = j;
1164 tmpSrc.sync_device ();
1166 srcWhichVecs = tmpSrc;
1169 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1170 sourceMV.whichVectors_ ();
1172 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1176 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1177 (
static_cast<size_t> (srcWhichVecs.extent (0)) !=
1178 sourceMV.getNumVectors (), std::logic_error,
1179 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1180 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1186 std::ostringstream os;
1187 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1188 std::cerr << os.str ();
1190 auto tgt_h = this->getLocalViewHost ();
1191 auto src_h = create_const_view (sourceMV.getLocalViewHost ());
1193 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1194 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1195 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1196 auto permuteFromLIDs_h =
1197 create_const_view (permuteFromLIDs.view_host ());
1200 std::ostringstream os;
1201 os << *prefix <<
"Permute on host" << endl;
1202 std::cerr << os.str ();
1204 if (nonConstStride) {
1207 auto tgtWhichVecs_h =
1208 create_const_view (tgtWhichVecs.view_host ());
1209 auto srcWhichVecs_h =
1210 create_const_view (srcWhichVecs.view_host ());
1211 permute_array_multi_column_variable_stride (tgt_h, src_h,
1215 srcWhichVecs_h, numCols);
1218 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1219 permuteFromLIDs_h, numCols);
1224 std::ostringstream os;
1225 os << *prefix <<
"Get permute LIDs on device" << endl;
1226 std::cerr << os.str ();
1228 auto tgt_d = this->getLocalViewDevice ();
1229 auto src_d = create_const_view (sourceMV.getLocalViewDevice ());
1231 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1232 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1233 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1234 auto permuteFromLIDs_d =
1235 create_const_view (permuteFromLIDs.view_device ());
1238 std::ostringstream os;
1239 os << *prefix <<
"Permute on device" << endl;
1240 std::cerr << os.str ();
1242 if (nonConstStride) {
1245 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1246 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1247 permute_array_multi_column_variable_stride (tgt_d, src_d,
1251 srcWhichVecs_d, numCols);
1254 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1255 permuteFromLIDs_d, numCols);
1260 std::ostringstream os;
1261 os << *prefix <<
"Done!" << endl;
1262 std::cerr << os.str ();
1266 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1271 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1272 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1273 Kokkos::DualView<size_t*, buffer_device_type> ,
1274 size_t& constantNumPackets,
1277 using ::Tpetra::Details::Behavior;
1278 using ::Tpetra::Details::ProfilingRegion;
1280 using Kokkos::Compat::create_const_view;
1281 using Kokkos::Compat::getKokkosViewDeepCopy;
1284 const char tfecfFuncName[] =
"packAndPrepare: ";
1285 ProfilingRegion regionPAP (
"Tpetra::MultiVector::packAndPrepare");
1293 const bool debugCheckIndices = Behavior::debug ();
1298 const bool printDebugOutput = Behavior::verbose ();
1300 std::unique_ptr<std::string> prefix;
1301 if (printDebugOutput) {
1302 auto map = this->getMap ();
1303 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1304 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1305 std::ostringstream os;
1306 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1307 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1308 os <<
"Start" << endl;
1309 std::cerr << os.str ();
1313 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
> (sourceObj));
1315 const size_t numCols = sourceMV.getNumVectors ();
1320 constantNumPackets = numCols;
1324 if (exportLIDs.extent (0) == 0) {
1325 if (printDebugOutput) {
1326 std::ostringstream os;
1327 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1328 std::cerr << os.str ();
1348 const size_t numExportLIDs = exportLIDs.extent (0);
1349 const size_t newExportsSize = numCols * numExportLIDs;
1350 if (printDebugOutput) {
1351 std::ostringstream os;
1352 os << *prefix <<
"realloc: "
1353 <<
"numExportLIDs: " << numExportLIDs
1354 <<
", exports.extent(0): " << exports.extent (0)
1355 <<
", newExportsSize: " << newExportsSize << std::endl;
1356 std::cerr << os.str ();
1362 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1363 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1364 std::logic_error,
"Input MultiVector needs sync to both host "
1366 const bool packOnHost = runKernelOnHost(sourceMV);
1367 auto src_dev = sourceMV.getLocalViewHost ();
1368 auto src_host = sourceMV.getLocalViewDevice ();
1369 if (printDebugOutput) {
1370 std::ostringstream os;
1371 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1372 std::cerr << os.str ();
1384 exports.clear_sync_state ();
1385 exports.modify_host ();
1386 sourceMV.sync_host();
1394 exports.clear_sync_state ();
1395 exports.modify_device ();
1396 sourceMV.sync_device();
1412 if (sourceMV.isConstantStride ()) {
1413 using KokkosRefactor::Details::pack_array_single_column;
1414 if (printDebugOutput) {
1415 std::ostringstream os;
1416 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1417 std::cerr << os.str ();
1420 pack_array_single_column (exports.view_host (),
1421 create_const_view (src_host),
1422 exportLIDs.view_host (),
1427 pack_array_single_column (exports.view_device (),
1428 create_const_view (src_dev),
1429 exportLIDs.view_device (),
1435 using KokkosRefactor::Details::pack_array_single_column;
1436 if (printDebugOutput) {
1437 std::ostringstream os;
1438 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1439 std::cerr << os.str ();
1442 pack_array_single_column (exports.view_host (),
1443 create_const_view (src_host),
1444 exportLIDs.view_host (),
1445 sourceMV.whichVectors_[0],
1449 pack_array_single_column (exports.view_device (),
1450 create_const_view (src_dev),
1451 exportLIDs.view_device (),
1452 sourceMV.whichVectors_[0],
1458 if (sourceMV.isConstantStride ()) {
1459 using KokkosRefactor::Details::pack_array_multi_column;
1460 if (printDebugOutput) {
1461 std::ostringstream os;
1462 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1463 std::cerr << os.str ();
1466 pack_array_multi_column (exports.view_host (),
1467 create_const_view (src_host),
1468 exportLIDs.view_host (),
1473 pack_array_multi_column (exports.view_device (),
1474 create_const_view (src_dev),
1475 exportLIDs.view_device (),
1481 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1482 if (printDebugOutput) {
1483 std::ostringstream os;
1484 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1486 std::cerr << os.str ();
1491 using IST = impl_scalar_type;
1492 using DV = Kokkos::DualView<IST*, device_type>;
1493 using HES =
typename DV::t_host::execution_space;
1494 using DES =
typename DV::t_dev::execution_space;
1495 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1497 pack_array_multi_column_variable_stride
1498 (exports.view_host (),
1499 create_const_view (src_host),
1500 exportLIDs.view_host (),
1501 getKokkosViewDeepCopy<HES> (whichVecs),
1506 pack_array_multi_column_variable_stride
1507 (exports.view_device (),
1508 create_const_view (src_dev),
1509 exportLIDs.view_device (),
1510 getKokkosViewDeepCopy<DES> (whichVecs),
1517 if (printDebugOutput) {
1518 std::ostringstream os;
1519 os << *prefix <<
"Done!" << endl;
1520 std::cerr << os.str ();
1525 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1527 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1529 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1530 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1531 Kokkos::DualView<size_t*, buffer_device_type> ,
1532 const size_t constantNumPackets,
1536 using ::Tpetra::Details::Behavior;
1537 using ::Tpetra::Details::ProfilingRegion;
1538 using KokkosRefactor::Details::unpack_array_multi_column;
1539 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1540 using Kokkos::Compat::getKokkosViewDeepCopy;
1543 const char longFuncName[] =
"Tpetra::MultiVector::unpackAndCombine";
1544 const char tfecfFuncName[] =
"unpackAndCombine: ";
1545 ProfilingRegion regionUAC (longFuncName);
1553 const bool debugCheckIndices = Behavior::debug ();
1555 const bool printDebugOutput = Behavior::verbose ();
1556 std::unique_ptr<std::string> prefix;
1557 if (printDebugOutput) {
1558 auto map = this->getMap ();
1559 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1560 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1561 std::ostringstream os;
1562 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1563 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1564 os <<
"Start" << endl;
1565 std::cerr << os.str ();
1569 if (importLIDs.extent (0) == 0) {
1570 if (printDebugOutput) {
1571 std::ostringstream os;
1572 os << *prefix <<
"No imports. Done!" << endl;
1577 const size_t numVecs = getNumVectors ();
1578 if (debugCheckIndices) {
1579 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1580 (
static_cast<size_t> (imports.extent (0)) !=
1581 numVecs * importLIDs.extent (0),
1583 "imports.extent(0) = " << imports.extent (0)
1584 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1585 <<
" * " << importLIDs.extent (0) <<
" = "
1586 << numVecs * importLIDs.extent (0) <<
".");
1588 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1589 (constantNumPackets ==
static_cast<size_t> (0), std::runtime_error,
1590 "constantNumPackets input argument must be nonzero.");
1592 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1593 (
static_cast<size_t> (numVecs) !=
1594 static_cast<size_t> (constantNumPackets),
1595 std::runtime_error,
"constantNumPackets must equal numVecs.");
1601 const bool unpackOnHost = runKernelOnHost(imports);
1603 if (printDebugOutput) {
1604 std::ostringstream os;
1605 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1607 std::cerr << os.str ();
1613 imports.sync_host();
1615 this->modify_host ();
1618 imports.sync_device();
1619 this->sync_device ();
1620 this->modify_device ();
1622 auto X_d = this->getLocalViewDevice ();
1623 auto X_h = this->getLocalViewHost ();
1624 auto imports_d = imports.view_device ();
1625 auto imports_h = imports.view_host ();
1626 auto importLIDs_d = importLIDs.view_device ();
1627 auto importLIDs_h = importLIDs.view_host ();
1629 Kokkos::DualView<size_t*, device_type> whichVecs;
1630 if (! isConstantStride ()) {
1631 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1632 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1634 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1636 whichVecs.modify_host ();
1640 whichVecs.modify_device ();
1644 auto whichVecs_d = whichVecs.view_device ();
1645 auto whichVecs_h = whichVecs.view_host ();
1655 if (numVecs > 0 && importLIDs.extent (0) > 0) {
1656 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
1657 using host_exec_space =
typename dual_view_type::t_host::execution_space;
1660 const bool use_atomic_updates = unpackOnHost ?
1661 host_exec_space::concurrency () != 1 :
1662 dev_exec_space::concurrency () != 1;
1664 if (printDebugOutput) {
1665 std::ostringstream os;
1667 std::cerr << os.str ();
1674 using op_type = KokkosRefactor::Details::InsertOp<IST>;
1675 if (isConstantStride ()) {
1677 unpack_array_multi_column (host_exec_space (),
1678 X_h, imports_h, importLIDs_h,
1679 op_type (), numVecs,
1685 unpack_array_multi_column (dev_exec_space (),
1686 X_d, imports_d, importLIDs_d,
1687 op_type (), numVecs,
1694 unpack_array_multi_column_variable_stride (host_exec_space (),
1704 unpack_array_multi_column_variable_stride (dev_exec_space (),
1715 else if (CM ==
ADD) {
1716 using op_type = KokkosRefactor::Details::AddOp<IST>;
1717 if (isConstantStride ()) {
1719 unpack_array_multi_column (host_exec_space (),
1720 X_h, imports_h, importLIDs_h,
1721 op_type (), numVecs,
1726 unpack_array_multi_column (dev_exec_space (),
1727 X_d, imports_d, importLIDs_d,
1728 op_type (), numVecs,
1735 unpack_array_multi_column_variable_stride (host_exec_space (),
1745 unpack_array_multi_column_variable_stride (dev_exec_space (),
1757 using op_type = KokkosRefactor::Details::AbsMaxOp<IST>;
1758 if (isConstantStride ()) {
1760 unpack_array_multi_column (host_exec_space (),
1761 X_h, imports_h, importLIDs_h,
1762 op_type (), numVecs,
1767 unpack_array_multi_column (dev_exec_space (),
1768 X_d, imports_d, importLIDs_d,
1769 op_type (), numVecs,
1776 unpack_array_multi_column_variable_stride (host_exec_space (),
1786 unpack_array_multi_column_variable_stride (dev_exec_space (),
1798 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1799 (
true, std::logic_error,
"Invalid CombineMode");
1803 if (printDebugOutput) {
1804 std::ostringstream os;
1805 os << *prefix <<
"Nothing to unpack" << endl;
1806 std::cerr << os.str ();
1810 if (printDebugOutput) {
1811 std::ostringstream os;
1812 os << *prefix <<
"Done!" << endl;
1813 std::cerr << os.str ();
1817 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1822 if (isConstantStride ()) {
1823 return static_cast<size_t> (view_.extent (1));
1825 return static_cast<size_t> (whichVectors_.size ());
1833 gblDotImpl (
const RV& dotsOut,
1834 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
1835 const bool distributed)
1837 using Teuchos::REDUCE_MAX;
1838 using Teuchos::REDUCE_SUM;
1839 using Teuchos::reduceAll;
1840 typedef typename RV::non_const_value_type dot_type;
1842 const size_t numVecs = dotsOut.extent (0);
1857 if (distributed && ! comm.is_null ()) {
1860 const int nv =
static_cast<int> (numVecs);
1861 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
1863 if (commIsInterComm) {
1867 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
1869 const dot_type*
const lclSum = lclDots.data ();
1870 dot_type*
const gblSum = dotsOut.data ();
1871 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
1874 dot_type*
const inout = dotsOut.data ();
1875 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
1881 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1885 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
1887 using ::Tpetra::Details::Behavior;
1888 using Kokkos::subview;
1889 using Teuchos::Comm;
1890 using Teuchos::null;
1893 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
1895 typedef typename dual_view_type::t_dev XMV;
1896 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
1900 const size_t numVecs = this->getNumVectors ();
1904 const size_t lclNumRows = this->getLocalLength ();
1905 const size_t numDots =
static_cast<size_t> (dots.extent (0));
1906 const bool debug = Behavior::debug ();
1909 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
1910 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1911 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
1912 "compatible with the input MultiVector A. We only test for this "
1923 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1925 "MultiVectors do not have the same local length. "
1926 "this->getLocalLength() = " << lclNumRows <<
" != "
1928 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1930 "MultiVectors must have the same number of columns (vectors). "
1931 "this->getNumVectors() = " << numVecs <<
" != "
1933 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1934 numDots != numVecs, std::runtime_error,
1935 "The output array 'dots' must have the same number of entries as the "
1936 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
1937 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
1939 const std::pair<size_t, size_t> colRng (0, numVecs);
1940 RV dotsOut = subview (dots, colRng);
1941 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
1942 this->getMap ()->getComm ();
1945 if (this->need_sync_device ()) {
1946 const_cast<MV&
>(*this).sync_device ();
1949 const_cast<MV&
>(A).sync_device ();
1952 auto thisView = this->getLocalViewDevice ();
1955 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
1956 this->whichVectors_.getRawPtr (),
1959 gblDotImpl (dotsOut, comm, this->isDistributed ());
1963 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1965 multiVectorSingleColumnDot (MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& x,
1966 const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& y)
1968 using ::Tpetra::Details::ProfilingRegion;
1970 using dot_type =
typename MV::dot_type;
1971 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
1973 auto map = x.getMap ();
1974 Teuchos::RCP<const Teuchos::Comm<int> > comm =
1975 map.is_null () ? Teuchos::null : map->getComm ();
1976 if (comm.is_null ()) {
1977 return Kokkos::ArithTraits<dot_type>::zero ();
1980 using LO = LocalOrdinal;
1984 const LO lclNumRows =
static_cast<LO
> (std::min (x.getLocalLength (),
1985 y.getLocalLength ()));
1986 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
1987 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
1988 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
1991 if (x.need_sync_device ()) {
1994 if (y.need_sync_device ()) {
1995 const_cast<MV&
>(y).sync_device ();
1999 auto x_2d = x.getLocalViewDevice ();
2000 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
2001 auto y_2d = y.getLocalViewDevice ();
2002 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
2003 lclDot = KokkosBlas::dot (x_1d, y_1d);
2005 if (x.isDistributed ()) {
2006 using Teuchos::outArg;
2007 using Teuchos::REDUCE_SUM;
2008 using Teuchos::reduceAll;
2009 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2021 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2025 const Teuchos::ArrayView<dot_type>& dots)
const
2028 const char tfecfFuncName[] =
"dot: ";
2031 const size_t numVecs = this->getNumVectors ();
2032 const size_t lclNumRows = this->getLocalLength ();
2033 const size_t numDots =
static_cast<size_t> (dots.size ());
2043 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2045 "MultiVectors do not have the same local length. "
2046 "this->getLocalLength() = " << lclNumRows <<
" != "
2048 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2050 "MultiVectors must have the same number of columns (vectors). "
2051 "this->getNumVectors() = " << numVecs <<
" != "
2053 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2054 (numDots != numVecs, std::runtime_error,
2055 "The output array 'dots' must have the same number of entries as the "
2056 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2057 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2060 const dot_type gblDot = multiVectorSingleColumnDot (
const_cast<MV&
> (*
this), A);
2064 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2068 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2071 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2074 using ::Tpetra::Details::NORM_TWO;
2075 using ::Tpetra::Details::ProfilingRegion;
2076 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2079 MV& X =
const_cast<MV&
> (*this);
2080 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2083 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2086 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2088 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2089 this->norm2 (norms_av);
2093 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2096 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2099 using ::Tpetra::Details::NORM_ONE;
2100 using ::Tpetra::Details::ProfilingRegion;
2101 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2104 MV& X =
const_cast<MV&
> (*this);
2105 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2108 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2111 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2113 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2114 this->norm1 (norms_av);
2117 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2120 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2123 using ::Tpetra::Details::NORM_INF;
2124 using ::Tpetra::Details::ProfilingRegion;
2125 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2128 MV& X =
const_cast<MV&
> (*this);
2129 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2132 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2135 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2137 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2138 this->normInf (norms_av);
2141 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2144 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2148 using Kokkos::subview;
2149 using Teuchos::Comm;
2151 using Teuchos::reduceAll;
2152 using Teuchos::REDUCE_SUM;
2153 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
2155 const size_t lclNumRows = this->getLocalLength ();
2156 const size_t numVecs = this->getNumVectors ();
2157 const size_t numMeans =
static_cast<size_t> (means.size ());
2159 TEUCHOS_TEST_FOR_EXCEPTION(
2160 numMeans != numVecs, std::runtime_error,
2161 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2162 <<
" != this->getNumVectors() = " << numVecs <<
".");
2164 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2165 const std::pair<size_t, size_t> colRng (0, numVecs);
2170 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2172 typename local_view_type::HostMirror::array_layout,
2174 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2175 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2177 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2178 this->getMap ()->getComm ();
2181 const bool useHostVersion = this->need_sync_device ();
2182 if (useHostVersion) {
2184 auto X_lcl = subview (getLocalViewHost (),
2185 rowRng, Kokkos::ALL ());
2187 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2188 if (isConstantStride ()) {
2189 KokkosBlas::sum (lclSums, X_lcl);
2192 for (
size_t j = 0; j < numVecs; ++j) {
2193 const size_t col = whichVectors_[j];
2194 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2201 if (! comm.is_null () && this->isDistributed ()) {
2202 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2203 lclSums.data (), meansOut.data ());
2211 auto X_lcl = subview (this->getLocalViewDevice (),
2212 rowRng, Kokkos::ALL ());
2215 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2216 if (isConstantStride ()) {
2217 KokkosBlas::sum (lclSums, X_lcl);
2220 for (
size_t j = 0; j < numVecs; ++j) {
2221 const size_t col = whichVectors_[j];
2222 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2230 if (! comm.is_null () && this->isDistributed ()) {
2231 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2232 lclSums.data (), meansOut.data ());
2242 const impl_scalar_type OneOverN =
2243 ATS::one () /
static_cast<mag_type
> (this->getGlobalLength ());
2244 for (
size_t k = 0; k < numMeans; ++k) {
2245 meansOut(k) = meansOut(k) * OneOverN;
2250 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2256 typedef Kokkos::Details::ArithTraits<IST> ATS;
2257 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2258 typedef typename pool_type::generator_type generator_type;
2260 const IST max = Kokkos::rand<generator_type, IST>::max ();
2261 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2263 this->randomize (
static_cast<Scalar
> (min),
static_cast<Scalar
> (max));
2267 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2270 randomize (
const Scalar& minVal,
const Scalar& maxVal)
2273 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2284 const uint64_t myRank =
2285 static_cast<uint64_t
> (this->getMap ()->getComm ()->getRank ());
2286 uint64_t seed64 =
static_cast<uint64_t
> (std::rand ()) + myRank + 17311uLL;
2287 unsigned int seed =
static_cast<unsigned int> (seed64&0xffffffff);
2289 pool_type rand_pool (seed);
2290 const IST max =
static_cast<IST
> (maxVal);
2291 const IST min =
static_cast<IST
> (minVal);
2296 this->view_.clear_sync_state();
2298 this->modify_device ();
2299 auto thisView = this->getLocalViewDevice ();
2301 if (isConstantStride ()) {
2302 Kokkos::fill_random (thisView, rand_pool, min, max);
2305 const size_t numVecs = getNumVectors ();
2306 for (
size_t k = 0; k < numVecs; ++k) {
2307 const size_t col = whichVectors_[k];
2308 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2309 Kokkos::fill_random (X_k, rand_pool, min, max);
2314 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2319 using ::Tpetra::Details::ProfilingRegion;
2320 using ::Tpetra::Details::Blas::fill;
2321 using DES =
typename dual_view_type::t_dev::execution_space;
2322 using HES =
typename dual_view_type::t_host::execution_space;
2323 using LO = LocalOrdinal;
2324 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2329 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
2330 const LO numVecs =
static_cast<LO
> (this->getNumVectors ());
2336 const bool runOnHost = runKernelOnHost(*
this);
2338 this->clear_sync_state();
2340 this->modify_device ();
2341 auto X = this->getLocalViewDevice ();
2342 if (this->isConstantStride ()) {
2343 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2346 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2347 this->whichVectors_.getRawPtr ());
2351 this->modify_host ();
2352 auto X = this->getLocalViewHost ();
2353 if (this->isConstantStride ()) {
2354 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2357 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2358 this->whichVectors_.getRawPtr ());
2364 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2367 replaceMap (
const Teuchos::RCP<const map_type>& newMap)
2369 using Teuchos::ArrayRCP;
2370 using Teuchos::Comm;
2373 using LO = LocalOrdinal;
2374 using GO = GlobalOrdinal;
2380 TEUCHOS_TEST_FOR_EXCEPTION(
2381 ! this->isConstantStride (), std::logic_error,
2382 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2383 "if the MultiVector is a column view of another MultiVector (that is, if "
2384 "isConstantStride() == false).");
2414 #ifdef HAVE_TEUCHOS_DEBUG
2430 if (this->getMap ().is_null ()) {
2435 TEUCHOS_TEST_FOR_EXCEPTION(
2436 newMap.is_null (), std::invalid_argument,
2437 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2438 "This probably means that the input Map is incorrect.");
2442 const size_t newNumRows = newMap->getNodeNumElements ();
2443 const size_t origNumRows = view_.extent (0);
2444 const size_t numCols = this->getNumVectors ();
2446 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2447 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2450 else if (newMap.is_null ()) {
2453 const size_t newNumRows =
static_cast<size_t> (0);
2454 const size_t numCols = this->getNumVectors ();
2455 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2458 this->map_ = newMap;
2461 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2464 scale (
const Scalar& alpha)
2469 const IST theAlpha =
static_cast<IST
> (alpha);
2470 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2473 const size_t lclNumRows = getLocalLength ();
2474 const size_t numVecs = getNumVectors ();
2475 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2476 const std::pair<size_t, size_t> colRng (0, numVecs);
2484 const bool useHostVersion = need_sync_device ();
2485 if (useHostVersion) {
2486 auto Y_lcl = Kokkos::subview (getLocalViewHost (), rowRng, ALL ());
2487 if (isConstantStride ()) {
2488 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2491 for (
size_t k = 0; k < numVecs; ++k) {
2492 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2493 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2494 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2499 auto Y_lcl = Kokkos::subview (getLocalViewDevice (), rowRng, ALL ());
2500 if (isConstantStride ()) {
2501 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2504 for (
size_t k = 0; k < numVecs; ++k) {
2505 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2506 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2507 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2514 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2517 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2519 const size_t numVecs = this->getNumVectors ();
2520 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2521 TEUCHOS_TEST_FOR_EXCEPTION(
2522 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2523 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2527 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2528 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2529 k_alphas.modify_host ();
2530 for (
size_t i = 0; i < numAlphas; ++i) {
2533 k_alphas.sync_device ();
2535 this->scale (k_alphas.view_device ());
2538 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2541 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2544 using Kokkos::subview;
2546 const size_t lclNumRows = this->getLocalLength ();
2547 const size_t numVecs = this->getNumVectors ();
2548 TEUCHOS_TEST_FOR_EXCEPTION(
2549 static_cast<size_t> (alphas.extent (0)) != numVecs,
2550 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2551 "alphas.extent(0) = " << alphas.extent (0)
2552 <<
" != this->getNumVectors () = " << numVecs <<
".");
2553 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2554 const std::pair<size_t, size_t> colRng (0, numVecs);
2564 const bool useHostVersion = this->need_sync_device ();
2565 if (useHostVersion) {
2568 auto alphas_h = Kokkos::create_mirror_view (alphas);
2571 auto Y_lcl = subview (this->getLocalViewHost (), rowRng, ALL ());
2572 if (isConstantStride ()) {
2573 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2576 for (
size_t k = 0; k < numVecs; ++k) {
2577 const size_t Y_col = this->isConstantStride () ? k :
2578 this->whichVectors_[k];
2579 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2582 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2587 auto Y_lcl = subview (this->getLocalViewDevice (), rowRng, ALL ());
2588 if (isConstantStride ()) {
2589 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2596 auto alphas_h = Kokkos::create_mirror_view (alphas);
2599 for (
size_t k = 0; k < numVecs; ++k) {
2600 const size_t Y_col = this->isConstantStride () ? k :
2601 this->whichVectors_[k];
2602 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2603 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2609 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2612 scale (
const Scalar& alpha,
2616 using Kokkos::subview;
2618 const char tfecfFuncName[] =
"scale: ";
2620 const size_t lclNumRows = getLocalLength ();
2621 const size_t numVecs = getNumVectors ();
2623 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2625 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2627 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2629 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2633 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2634 const std::pair<size_t, size_t> colRng (0, numVecs);
2637 if (this->need_sync_device ()) {
2638 this->sync_device ();
2641 const_cast<MV&
>(A).sync_device ();
2644 this->modify_device ();
2645 auto Y_lcl_orig = this->getLocalViewDevice ();
2647 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
2648 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
2651 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2655 for (
size_t k = 0; k < numVecs; ++k) {
2656 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2658 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2659 auto X_k = subview (X_lcl, ALL (), X_col);
2661 KokkosBlas::scal (Y_k, theAlpha, X_k);
2668 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2674 const char tfecfFuncName[] =
"reciprocal: ";
2676 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2678 "MultiVectors do not have the same local length. "
2679 "this->getLocalLength() = " << getLocalLength ()
2681 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2682 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2683 ": MultiVectors do not have the same number of columns (vectors). "
2684 "this->getNumVectors() = " << getNumVectors ()
2685 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2687 const size_t numVecs = getNumVectors ();
2690 if (this->need_sync_device ()) {
2691 this->sync_device ();
2694 const_cast<MV&
>(A).sync_device ();
2696 this->modify_device ();
2698 auto this_view_dev = this->getLocalViewDevice ();
2702 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
2706 using Kokkos::subview;
2707 for (
size_t k = 0; k < numVecs; ++k) {
2708 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2709 auto vector_k = subview (this_view_dev, ALL (), this_col);
2710 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2711 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2712 KokkosBlas::reciprocal (vector_k, vector_Ak);
2717 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2723 const char tfecfFuncName[] =
"abs";
2725 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2727 ": MultiVectors do not have the same local length. "
2728 "this->getLocalLength() = " << getLocalLength ()
2730 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2731 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2732 ": MultiVectors do not have the same number of columns (vectors). "
2733 "this->getNumVectors() = " << getNumVectors ()
2734 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2735 const size_t numVecs = getNumVectors ();
2738 if (this->need_sync_device ()) {
2739 this->sync_device ();
2742 const_cast<MV&
>(A).sync_device ();
2744 this->modify_device ();
2746 auto this_view_dev = this->getLocalViewDevice ();
2750 KokkosBlas::abs (this_view_dev, A_view_dev);
2754 using Kokkos::subview;
2756 for (
size_t k=0; k < numVecs; ++k) {
2757 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2758 auto vector_k = subview (this_view_dev, ALL (), this_col);
2759 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2760 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2761 KokkosBlas::abs (vector_k, vector_Ak);
2766 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2769 update (
const Scalar& alpha,
2773 const char tfecfFuncName[] =
"update: ";
2774 using Kokkos::subview;
2780 const size_t lclNumRows = getLocalLength ();
2781 const size_t numVecs = getNumVectors ();
2783 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2785 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2787 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2789 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2793 if (this->need_sync_device ()) {
2794 this->sync_device ();
2797 const_cast<MV&
>(A).sync_device ();
2802 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2803 const std::pair<size_t, size_t> colRng (0, numVecs);
2805 auto Y_lcl_orig = this->getLocalViewDevice ();
2806 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2808 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2811 this->modify_device ();
2813 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
2817 for (
size_t k = 0; k < numVecs; ++k) {
2818 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2820 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2821 auto X_k = subview (X_lcl, ALL (), X_col);
2823 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
2828 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2831 update (
const Scalar& alpha,
2835 const Scalar& gamma)
2838 using Kokkos::subview;
2841 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
2845 const size_t lclNumRows = this->getLocalLength ();
2846 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2848 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
2849 "row(s), but this MultiVector has " << lclNumRows <<
" local "
2851 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2853 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
2854 "row(s), but this MultiVector has " << lclNumRows <<
" local "
2856 const size_t numVecs = getNumVectors ();
2857 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2859 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
2860 "but this MultiVector has " << numVecs <<
" column(s).");
2861 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2863 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
2864 "but this MultiVector has " << numVecs <<
" column(s).");
2871 if (this->need_sync_device ()) this->sync_device ();
2876 this->modify_device ();
2878 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2879 const std::pair<size_t, size_t> colRng (0, numVecs);
2884 auto C_lcl = subview (this->getLocalViewDevice (), rowRng, ALL ());
2889 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
2894 for (
size_t k = 0; k < numVecs; ++k) {
2895 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2898 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
2899 theBeta, subview (B_lcl, rowRng, B_col),
2900 theGamma, subview (C_lcl, rowRng, this_col));
2905 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2906 Teuchos::ArrayRCP<const Scalar>
2913 const char tfecfFuncName[] =
"getData: ";
2921 auto hostView = getLocalViewHost ();
2922 const size_t col = isConstantStride () ? j : whichVectors_[j];
2923 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
2924 Teuchos::ArrayRCP<const IST> dataAsArcp =
2925 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
2927 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2928 (
static_cast<size_t> (hostView_j.extent (0)) <
2929 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
2930 "hostView_j.extent(0) = " << hostView_j.extent (0)
2931 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
2932 "Please report this bug to the Tpetra developers.");
2934 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
2937 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2938 Teuchos::ArrayRCP<Scalar>
2943 using Kokkos::subview;
2946 const char tfecfFuncName[] =
"getDataNonConst: ";
2952 const_cast<MV*
> (
this)->sync_host ();
2957 auto hostView = getLocalViewHost ();
2958 const size_t col = isConstantStride () ? j : whichVectors_[j];
2959 auto hostView_j = subview (hostView, ALL (), col);
2960 Teuchos::ArrayRCP<IST> dataAsArcp =
2961 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
2963 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2964 (
static_cast<size_t> (hostView_j.extent (0)) <
2965 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
2966 "hostView_j.extent(0) = " << hostView_j.extent (0)
2967 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
2968 "Please report this bug to the Tpetra developers.");
2970 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
2973 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2974 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
2976 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
2983 bool contiguous =
true;
2984 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
2985 for (
size_t j = 1; j < numCopyVecs; ++j) {
2986 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
2991 if (contiguous && numCopyVecs > 0) {
2992 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
2995 RCP<const MV> X_sub = this->subView (cols);
2996 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3002 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3003 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3005 subCopy (
const Teuchos::Range1D &colRng)
const
3010 RCP<const MV> X_sub = this->subView (colRng);
3011 RCP<MV> Y (
new MV (this->getMap (),
static_cast<size_t> (colRng.size ()),
false));
3016 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3020 return origView_.extent (0);
3023 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3027 return origView_.extent (1);
3030 template <
class Scalar,
class LO,
class GO,
class Node>
3033 const Teuchos::RCP<const map_type>& subMap,
3034 const local_ordinal_type rowOffset) :
3038 using Kokkos::subview;
3039 using Teuchos::outArg;
3042 using Teuchos::reduceAll;
3043 using Teuchos::REDUCE_MIN;
3046 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3047 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3050 std::unique_ptr<std::ostringstream> errStrm;
3057 const auto comm = subMap->getComm ();
3058 TEUCHOS_ASSERT( ! comm.is_null () );
3059 const int myRank = comm->getRank ();
3061 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3063 const LO newNumRows =
static_cast<LO
> (subMap->getNodeNumElements ());
3065 std::ostringstream os;
3066 os <<
"Proc " << myRank <<
": " << prefix
3067 <<
"X: {lclNumRows: " << lclNumRowsBefore
3069 <<
", numCols: " << numCols <<
"}, "
3070 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3071 std::cerr << os.str ();
3076 const bool tooManyElts =
3079 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3080 *errStrm <<
" Proc " << myRank <<
": subMap->getNodeNumElements() (="
3081 << newNumRows <<
") + rowOffset (=" << rowOffset
3085 TEUCHOS_TEST_FOR_EXCEPTION
3086 (! debug && tooManyElts, std::invalid_argument,
3087 prefix << errStrm->str () << suffix);
3091 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3093 std::ostringstream gblErrStrm;
3094 const std::string myErrStr =
3095 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3096 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3097 TEUCHOS_TEST_FOR_EXCEPTION
3098 (
true, std::invalid_argument, gblErrStrm.str ());
3102 using range_type = std::pair<LO, LO>;
3103 const range_type origRowRng
3104 (rowOffset,
static_cast<LO
> (X.
origView_.extent (0)));
3105 const range_type rowRng
3106 (rowOffset, rowOffset + newNumRows);
3120 subview (rowOffset == 0 ? X.
origView_ : X.
view_, rowRng, ALL ());
3127 if (newOrigView.extent (0) == 0 &&
3128 newOrigView.extent (1) != X.
origView_.extent (1)) {
3130 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3132 if (newView.extent (0) == 0 &&
3133 newView.extent (1) != X.
view_.extent (1)) {
3135 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3139 MV (subMap, newView, newOrigView) :
3143 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3144 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3145 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3147 if (errStrm.get () ==
nullptr) {
3148 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3150 *errStrm <<
" Proc " << myRank <<
3151 ": subMap.getNodeNumElements(): " << newNumRows <<
3152 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3153 ", X.getNumVectors(): " << numCols <<
3154 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3156 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3158 std::ostringstream gblErrStrm;
3160 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3161 "dimensions on one or more processes:" << endl;
3163 const std::string myErrStr =
3164 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3165 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3166 gblErrStrm << suffix << endl;
3167 TEUCHOS_TEST_FOR_EXCEPTION
3168 (
true, std::invalid_argument, gblErrStrm.str ());
3173 std::ostringstream os;
3174 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3175 std::cerr << os.str ();
3181 std::ostringstream os;
3182 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3183 std::cerr << os.str ();
3187 template <
class Scalar,
class LO,
class GO,
class Node>
3189 MultiVector (
const MultiVector<Scalar, LO, GO, Node>& X,
3190 const map_type& subMap,
3191 const size_t rowOffset) :
3192 MultiVector (X, Teuchos::RCP<const map_type> (new map_type (subMap)),
3196 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3197 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3199 offsetView (
const Teuchos::RCP<const map_type>& subMap,
3200 const size_t offset)
const
3203 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3206 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3207 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3210 const size_t offset)
3213 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3216 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3217 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3219 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3221 using Teuchos::Array;
3225 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3226 TEUCHOS_TEST_FOR_EXCEPTION(
3227 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3228 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3229 "contain at least one entry, but cols.size() = " << cols.size ()
3234 bool contiguous =
true;
3235 for (
size_t j = 1; j < numViewCols; ++j) {
3236 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3242 if (numViewCols == 0) {
3244 return rcp (
new MV (this->getMap (), numViewCols));
3247 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3251 if (isConstantStride ()) {
3252 return rcp (
new MV (this->getMap (), view_, origView_, cols));
3255 Array<size_t> newcols (cols.size ());
3256 for (
size_t j = 0; j < numViewCols; ++j) {
3257 newcols[j] = whichVectors_[cols[j]];
3259 return rcp (
new MV (this->getMap (), view_, origView_, newcols ()));
3264 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3265 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3267 subView (
const Teuchos::Range1D& colRng)
const
3269 using ::Tpetra::Details::Behavior;
3271 using Kokkos::subview;
3272 using Teuchos::Array;
3276 const char tfecfFuncName[] =
"subView(Range1D): ";
3278 const size_t lclNumRows = this->getLocalLength ();
3279 const size_t numVecs = this->getNumVectors ();
3283 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3284 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3285 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3287 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3288 numVecs != 0 && colRng.size () != 0 &&
3289 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3290 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3291 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3292 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3293 "[0, " << numVecs <<
"].");
3295 RCP<const MV> X_ret;
3305 const std::pair<size_t, size_t> rows (0, lclNumRows);
3306 if (colRng.size () == 0) {
3307 const std::pair<size_t, size_t> cols (0, 0);
3309 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3313 if (isConstantStride ()) {
3314 const std::pair<size_t, size_t> cols (colRng.lbound (),
3315 colRng.ubound () + 1);
3317 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3320 if (
static_cast<size_t> (colRng.size ()) ==
static_cast<size_t> (1)) {
3323 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3324 whichVectors_[0] + colRng.ubound () + 1);
3326 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3329 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3330 whichVectors_.begin () + colRng.ubound () + 1);
3331 X_ret = rcp (
new MV (this->getMap (), view_, origView_, which));
3336 const bool debug = Behavior::debug ();
3338 using Teuchos::Comm;
3339 using Teuchos::outArg;
3340 using Teuchos::REDUCE_MIN;
3341 using Teuchos::reduceAll;
3343 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3344 Teuchos::null : this->getMap ()->getComm ();
3345 if (! comm.is_null ()) {
3349 if (X_ret.is_null ()) {
3352 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3354 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3355 "MultiVector; the return value of this method) is null on some MPI "
3356 "process in this MultiVector's communicator. This should never "
3357 "happen. Please report this bug to the Tpetra developers.");
3358 if (! X_ret.is_null () &&
3359 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3362 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3363 outArg (gblSuccess));
3364 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3365 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3366 "colRng.size(), on at least one MPI process in this MultiVector's "
3367 "communicator. This should never happen. "
3368 "Please report this bug to the Tpetra developers.");
3375 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3376 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3381 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3385 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3386 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3391 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3395 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3401 using Kokkos::subview;
3402 typedef std::pair<size_t, size_t> range_type;
3403 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3406 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3407 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3408 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3410 static_cast<size_t> (j) :
3412 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3424 const size_t newSize = X.
imports_.extent (0) / numCols;
3425 const size_t offset = jj*newSize;
3427 newImports.d_view = subview (X.
imports_.d_view,
3428 range_type (offset, offset+newSize));
3429 newImports.h_view = subview (X.
imports_.h_view,
3430 range_type (offset, offset+newSize));
3434 const size_t newSize = X.
exports_.extent (0) / numCols;
3435 const size_t offset = jj*newSize;
3437 newExports.d_view = subview (X.
exports_.d_view,
3438 range_type (offset, offset+newSize));
3439 newExports.h_view = subview (X.
exports_.h_view,
3440 range_type (offset, offset+newSize));
3451 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3452 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3457 return Teuchos::rcp (
new V (*
this, j));
3461 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3462 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3467 return Teuchos::rcp (
new V (*
this, j));
3471 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3474 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3476 using dev_view_type =
typename dual_view_type::t_dev;
3477 using host_view_type =
typename dual_view_type::t_host;
3479 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3481 Kokkos::MemoryUnmanaged>;
3482 const char tfecfFuncName[] =
"get1dCopy: ";
3484 const size_t numRows = this->getLocalLength ();
3485 const size_t numCols = this->getNumVectors ();
3487 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3488 (LDA < numRows, std::runtime_error,
3489 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3490 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3491 (numRows >
size_t (0) && numCols >
size_t (0) &&
3492 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3494 "A.size() = " << A.size () <<
", but its size must be at least "
3495 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3497 const std::pair<size_t, size_t> rowRange (0, numRows);
3498 const std::pair<size_t, size_t> colRange (0, numCols);
3500 input_view_type A_view_orig (
reinterpret_cast<IST*
> (A.getRawPtr ()),
3502 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3509 const bool useHostVersion = this->need_sync_device ();
3511 dev_view_type srcView_dev;
3512 host_view_type srcView_host;
3513 if (useHostVersion) {
3514 srcView_host = this->getLocalViewHost ();
3517 srcView_dev = this->getLocalViewDevice ();
3520 if (this->isConstantStride ()) {
3521 if (useHostVersion) {
3529 for (
size_t j = 0; j < numCols; ++j) {
3530 const size_t srcCol = this->whichVectors_[j];
3531 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3533 if (useHostVersion) {
3534 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3538 auto srcColView_dev = Kokkos::subview (srcView_dev, rowRange, srcCol);
3546 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3549 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3552 const char tfecfFuncName[] =
"get2dCopy: ";
3553 const size_t numRows = this->getLocalLength ();
3554 const size_t numCols = this->getNumVectors ();
3556 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3557 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3558 std::runtime_error,
"Input array of pointers must contain as many "
3559 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3560 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3562 if (numRows != 0 && numCols != 0) {
3564 for (
size_t j = 0; j < numCols; ++j) {
3565 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3566 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3567 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3568 "the input array of arrays is not long enough to fit all entries in "
3569 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3570 <<
" < getLocalLength() = " << numRows <<
".");
3574 for (
size_t j = 0; j < numCols; ++j) {
3575 Teuchos::RCP<const V> X_j = this->getVector (j);
3576 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3577 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3583 template <
class SC,
class LO,
class GO,
class NT>
3586 const bool markModified)
3603 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3604 Teuchos::ArrayRCP<const Scalar>
3608 if (getLocalLength () == 0 || getNumVectors () == 0) {
3609 return Teuchos::null;
3611 TEUCHOS_TEST_FOR_EXCEPTION(
3612 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3613 "get1dView: This MultiVector does not have constant stride, so it is "
3614 "not possible to view its data as a single array. You may check "
3615 "whether a MultiVector has constant stride by calling "
3616 "isConstantStride().");
3620 constexpr
bool markModified =
false;
3622 auto X_lcl = syncMVToHostIfNeededAndGetHostView (
const_cast<MV&
> (*
this),
3624 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3625 Kokkos::Compat::persistingView (X_lcl);
3626 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3630 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3631 Teuchos::ArrayRCP<Scalar>
3635 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3636 return Teuchos::null;
3639 TEUCHOS_TEST_FOR_EXCEPTION
3640 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3641 "get1dViewNonConst: This MultiVector does not have constant stride, "
3642 "so it is not possible to view its data as a single array. You may "
3643 "check whether a MultiVector has constant stride by calling "
3644 "isConstantStride().");
3645 constexpr
bool markModified =
true;
3646 auto X_lcl = syncMVToHostIfNeededAndGetHostView (*
this, markModified);
3647 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3648 Kokkos::Compat::persistingView (X_lcl);
3649 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3653 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3654 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3658 constexpr
bool markModified =
true;
3659 auto X_lcl = syncMVToHostIfNeededAndGetHostView (*
this, markModified);
3665 const size_t myNumRows = this->getLocalLength ();
3666 const size_t numCols = this->getNumVectors ();
3667 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3669 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3670 for (
size_t j = 0; j < numCols; ++j) {
3671 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3672 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3673 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3674 Kokkos::Compat::persistingView (X_lcl_j);
3675 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3680 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3681 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3688 constexpr
bool markModified =
false;
3690 auto X_lcl = syncMVToHostIfNeededAndGetHostView (
const_cast<MV&
> (*
this),
3696 const size_t myNumRows = this->getLocalLength ();
3697 const size_t numCols = this->getNumVectors ();
3698 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3700 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
3701 for (
size_t j = 0; j < numCols; ++j) {
3702 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3703 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3704 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
3705 Kokkos::Compat::persistingView (X_lcl_j);
3706 views[j] = Teuchos::arcp_reinterpret_cast<const Scalar> (X_lcl_j_arcp);
3711 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3715 Teuchos::ETransp transB,
3716 const Scalar& alpha,
3721 using ::Tpetra::Details::ProfilingRegion;
3722 using Teuchos::CONJ_TRANS;
3723 using Teuchos::NO_TRANS;
3724 using Teuchos::TRANS;
3727 using Teuchos::rcpFromRef;
3729 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
3731 using STS = Teuchos::ScalarTraits<Scalar>;
3733 const char tfecfFuncName[] =
"multiply: ";
3734 ProfilingRegion region (
"Tpetra::MV::multiply");
3766 const bool C_is_local = ! this->isDistributed ();
3776 auto myMap = this->getMap ();
3777 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
3778 using Teuchos::REDUCE_MIN;
3779 using Teuchos::reduceAll;
3780 using Teuchos::outArg;
3782 auto comm = myMap->getComm ();
3783 const size_t A_nrows =
3785 const size_t A_ncols =
3787 const size_t B_nrows =
3789 const size_t B_ncols =
3792 const bool lclBad = this->getLocalLength () != A_nrows ||
3793 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
3795 const int myRank = comm->getRank ();
3796 std::ostringstream errStrm;
3797 if (this->getLocalLength () != A_nrows) {
3798 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
3799 << this->getLocalLength () <<
" != A_nrows=" << A_nrows
3800 <<
"." << std::endl;
3802 if (this->getNumVectors () != B_ncols) {
3803 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
3804 << this->getNumVectors () <<
" != B_ncols=" << B_ncols
3805 <<
"." << std::endl;
3807 if (A_ncols != B_nrows) {
3808 errStrm <<
"Proc " << myRank <<
": A_ncols="
3809 << A_ncols <<
" != B_nrows=" << B_nrows
3810 <<
"." << std::endl;
3813 const int lclGood = lclBad ? 0 : 1;
3815 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3817 std::ostringstream os;
3818 ::Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
3820 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3821 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
3822 "least one process in this object's communicator." << std::endl
3824 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
3826 << (transA == Teuchos::TRANS ?
"^T" :
3827 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3828 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
3830 << (transA == Teuchos::TRANS ?
"^T" :
3831 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3832 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
3833 <<
"Global dimensions: C(" << this->getGlobalLength () <<
", "
3843 const bool Case1 = C_is_local && A_is_local && B_is_local;
3845 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
3846 transA != NO_TRANS &&
3849 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
3853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3854 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
3855 "Multiplication of op(A) and op(B) into *this is not a "
3856 "supported use case.");
3858 if (beta != STS::zero () && Case2) {
3864 const int myRank = this->getMap ()->getComm ()->getRank ();
3866 beta_local = ATS::zero ();
3875 if (! isConstantStride ()) {
3876 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
3878 C_tmp = rcp (
this,
false);
3881 RCP<const MV> A_tmp;
3883 A_tmp = rcp (
new MV (A, Teuchos::Copy));
3885 A_tmp = rcpFromRef (A);
3888 RCP<const MV> B_tmp;
3890 B_tmp = rcp (
new MV (B, Teuchos::Copy));
3892 B_tmp = rcpFromRef (B);
3895 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3896 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
3897 ! A_tmp->isConstantStride (), std::logic_error,
3898 "Failed to make temporary constant-stride copies of MultiVectors.");
3901 if (A_tmp->need_sync_device ()) {
3902 const_cast<MV&
> (*A_tmp).sync_device ();
3904 const LO A_lclNumRows = A_tmp->getLocalLength ();
3905 const LO A_numVecs = A_tmp->getNumVectors ();
3906 auto A_lcl = A_tmp->getLocalViewDevice ();
3907 auto A_sub = Kokkos::subview (A_lcl,
3908 std::make_pair (LO (0), A_lclNumRows),
3909 std::make_pair (LO (0), A_numVecs));
3911 if (B_tmp->need_sync_device ()) {
3912 const_cast<MV&
> (*B_tmp).sync_device ();
3914 const LO B_lclNumRows = B_tmp->getLocalLength ();
3915 const LO B_numVecs = B_tmp->getNumVectors ();
3916 auto B_lcl = B_tmp->getLocalViewDevice ();
3917 auto B_sub = Kokkos::subview (B_lcl,
3918 std::make_pair (LO (0), B_lclNumRows),
3919 std::make_pair (LO (0), B_numVecs));
3921 if (C_tmp->need_sync_device ()) {
3922 const_cast<MV&
> (*C_tmp).sync_device ();
3924 const LO C_lclNumRows = C_tmp->getLocalLength ();
3925 const LO C_numVecs = C_tmp->getNumVectors ();
3926 auto C_lcl = C_tmp->getLocalViewDevice ();
3927 auto C_sub = Kokkos::subview (C_lcl,
3928 std::make_pair (LO (0), C_lclNumRows),
3929 std::make_pair (LO (0), C_numVecs));
3930 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
3931 (transA == Teuchos::TRANS ?
'T' :
'C'));
3932 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
3933 (transB == Teuchos::TRANS ?
'T' :
'C'));
3936 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
3938 this->modify_device ();
3940 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
3944 if (! isConstantStride ()) {
3949 A_tmp = Teuchos::null;
3950 B_tmp = Teuchos::null;
3959 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3968 using Kokkos::subview;
3971 const char tfecfFuncName[] =
"elementWiseMultiply: ";
3973 const size_t lclNumRows = this->getLocalLength ();
3974 const size_t numVecs = this->getNumVectors ();
3976 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3978 std::runtime_error,
"MultiVectors do not have the same local length.");
3979 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3980 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
3981 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
3985 if (this->need_sync_device ()) {
3986 this->sync_device ();
3989 const_cast<V&
>(A).sync_device ();
3992 const_cast<MV&
>(B).sync_device ();
3994 this->modify_device ();
3996 auto this_view = this->getLocalViewDevice ();
4006 KokkosBlas::mult (scalarThis,
4009 subview (A_view, ALL (), 0),
4013 for (
size_t j = 0; j < numVecs; ++j) {
4014 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4016 KokkosBlas::mult (scalarThis,
4017 subview (this_view, ALL (), C_col),
4019 subview (A_view, ALL (), 0),
4020 subview (B_view, ALL (), B_col));
4025 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4031 using ::Tpetra::Details::ProfilingRegion;
4032 ProfilingRegion region (
"Tpetra::MV::reduce");
4034 const auto map = this->getMap ();
4035 if (map.get () ==
nullptr) {
4038 const auto comm = map->getComm ();
4039 if (comm.get () ==
nullptr) {
4045 const bool changed_on_device = this->need_sync_host ();
4046 if (changed_on_device) {
4050 this->modify_device ();
4051 auto X_lcl = this->getLocalViewDevice ();
4055 this->modify_host ();
4056 auto X_lcl = this->getLocalViewHost ();
4061 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4068 #ifdef HAVE_TPETRA_DEBUG
4069 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4070 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4071 TEUCHOS_TEST_FOR_EXCEPTION(
4072 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4074 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4075 <<
" is invalid. The range of valid row indices on this process "
4076 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4077 <<
", " << maxLocalIndex <<
"].");
4078 TEUCHOS_TEST_FOR_EXCEPTION(
4079 vectorIndexOutOfRange(col),
4081 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4082 <<
" of the multivector is invalid.");
4084 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4085 view_.h_view (lclRow, colInd) = ScalarValue;
4089 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4095 const bool atomic)
const
4097 #ifdef HAVE_TPETRA_DEBUG
4098 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4099 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4100 TEUCHOS_TEST_FOR_EXCEPTION(
4101 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4103 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4104 <<
" is invalid. The range of valid row indices on this process "
4105 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4106 <<
", " << maxLocalIndex <<
"].");
4107 TEUCHOS_TEST_FOR_EXCEPTION(
4108 vectorIndexOutOfRange(col),
4110 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4111 <<
" of the multivector is invalid.");
4113 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4115 Kokkos::atomic_add (& (view_.h_view(lclRow, colInd)), value);
4118 view_.h_view (lclRow, colInd) += value;
4123 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4132 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4133 #ifdef HAVE_TPETRA_DEBUG
4134 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4135 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4136 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4138 "Global row index " << gblRow <<
" is not present on this process "
4139 << this->getMap ()->getComm ()->getRank () <<
".");
4140 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4141 (this->vectorIndexOutOfRange (col), std::runtime_error,
4142 "Vector index " << col <<
" of the MultiVector is invalid.");
4144 this->replaceLocalValue (lclRow, col, ScalarValue);
4147 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4153 const bool atomic)
const
4157 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4158 #ifdef HAVE_TEUCHOS_DEBUG
4159 TEUCHOS_TEST_FOR_EXCEPTION(
4160 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4162 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4163 <<
" is not present on this process "
4164 << this->getMap ()->getComm ()->getRank () <<
".");
4165 TEUCHOS_TEST_FOR_EXCEPTION(
4166 vectorIndexOutOfRange(col),
4168 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4169 <<
" of the multivector is invalid.");
4171 this->sumIntoLocalValue (lclRow, col, value, atomic);
4174 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4176 Teuchos::ArrayRCP<T>
4182 typename dual_view_type::array_layout,
4184 const size_t col = isConstantStride () ? j : whichVectors_[j];
4185 col_dual_view_type X_col =
4186 Kokkos::subview (view_, Kokkos::ALL (), col);
4187 return Kokkos::Compat::persistingView (X_col.d_view);
4191 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4195 view_.clear_sync_state ();
4198 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4216 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4220 view_.sync_device ();
4223 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4227 return view_.need_sync_host ();
4230 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4234 return view_.need_sync_device ();
4237 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4241 view_.modify_device ();
4244 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4248 view_.modify_host ();
4251 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4255 return view_.view_device ();
4258 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4262 return view_.view_host ();
4265 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4270 using Teuchos::TypeNameTraits;
4272 std::ostringstream out;
4273 out <<
"\"" << className <<
"\": {";
4274 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4275 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4276 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4277 <<
", Node" << Node::name ()
4279 if (this->getObjectLabel () !=
"") {
4280 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4282 out <<
", numRows: " << this->getGlobalLength ();
4283 if (className !=
"Tpetra::Vector") {
4284 out <<
", numCols: " << this->getNumVectors ()
4285 <<
", isConstantStride: " << this->isConstantStride ();
4287 if (this->isConstantStride ()) {
4288 out <<
", columnStride: " << this->getStride ();
4295 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4300 return this->descriptionImpl (
"Tpetra::MultiVector");
4303 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4308 typedef LocalOrdinal LO;
4311 if (vl <= Teuchos::VERB_LOW) {
4312 return std::string ();
4314 auto map = this->getMap ();
4315 if (map.is_null ()) {
4316 return std::string ();
4318 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4319 auto outp = Teuchos::getFancyOStream (outStringP);
4320 Teuchos::FancyOStream& out = *outp;
4321 auto comm = map->getComm ();
4322 const int myRank = comm->getRank ();
4323 const int numProcs = comm->getSize ();
4325 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4326 Teuchos::OSTab tab1 (out);
4329 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4330 out <<
"Local number of rows: " << lclNumRows << endl;
4332 if (vl > Teuchos::VERB_MEDIUM) {
4335 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4336 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4338 if (this->isConstantStride ()) {
4339 out <<
"Column stride: " << this->getStride () << endl;
4342 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4346 typename dual_view_type::t_host X_host;
4347 if (need_sync_host ()) {
4353 auto X_dev = getLocalViewDevice ();
4354 auto X_host_copy = Kokkos::create_mirror_view (X_dev);
4356 X_host = X_host_copy;
4362 X_host = getLocalViewHost ();
4366 out <<
"Values: " << endl
4368 const LO numCols =
static_cast<LO
> (this->getNumVectors ());
4370 for (LO i = 0; i < lclNumRows; ++i) {
4372 if (i + 1 < lclNumRows) {
4378 for (LO i = 0; i < lclNumRows; ++i) {
4379 for (LO j = 0; j < numCols; ++j) {
4381 if (j + 1 < numCols) {
4385 if (i + 1 < lclNumRows) {
4395 return outStringP->str ();
4398 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4402 const std::string& className,
4403 const Teuchos::EVerbosityLevel verbLevel)
const
4405 using Teuchos::TypeNameTraits;
4406 using Teuchos::VERB_DEFAULT;
4407 using Teuchos::VERB_NONE;
4408 using Teuchos::VERB_LOW;
4410 const Teuchos::EVerbosityLevel vl =
4411 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4413 if (vl == VERB_NONE) {
4420 auto map = this->getMap ();
4421 if (map.is_null ()) {
4424 auto comm = map->getComm ();
4425 if (comm.is_null ()) {
4429 const int myRank = comm->getRank ();
4438 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4442 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4443 out <<
"\"" << className <<
"\":" << endl;
4444 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4446 out <<
"Template parameters:" << endl;
4447 Teuchos::OSTab tab2 (out);
4448 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4449 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4450 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4451 <<
"Node: " << Node::name () << endl;
4453 if (this->getObjectLabel () !=
"") {
4454 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4456 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4457 if (className !=
"Tpetra::Vector") {
4458 out <<
"Number of columns: " << this->getNumVectors () << endl;
4465 if (vl > VERB_LOW) {
4466 const std::string lclStr = this->localDescribeToString (vl);
4467 ::Tpetra::Details::gathervPrint (out, lclStr, *comm);
4471 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4474 describe (Teuchos::FancyOStream &out,
4475 const Teuchos::EVerbosityLevel verbLevel)
const
4477 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4480 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4485 replaceMap (newMap);
4488 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4493 using ::Tpetra::Details::localDeepCopy;
4494 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4496 TEUCHOS_TEST_FOR_EXCEPTION
4498 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4499 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4500 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4501 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4502 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4504 TEUCHOS_TEST_FOR_EXCEPTION
4505 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4506 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4507 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4508 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4514 this->clear_sync_state();
4515 this->modify_device ();
4520 if (src_last_updated_on_host) {
4521 localDeepCopy (this->getLocalViewDevice (),
4523 this->isConstantStride (),
4525 this->whichVectors_ (),
4529 localDeepCopy (this->getLocalViewDevice (),
4531 this->isConstantStride (),
4533 this->whichVectors_ (),
4538 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4543 using ::Tpetra::Details::PackTraits;
4546 const size_t l1 = this->getLocalLength();
4548 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4555 auto v1 = this->getLocalViewHost ();
4557 if (PackTraits<ST>::packValueCount (v1(0,0)) !=
4558 PackTraits<ST>::packValueCount (v2(0,0))) {
4565 template <
class ST,
class LO,
class GO,
class NT>
4568 std::swap(mv.
map_, this->map_);
4569 std::swap(mv.
view_, this->view_);
4570 std::swap(mv.
origView_, this->origView_);
4574 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4575 template <
class ST,
class LO,
class GO,
class NT>
4578 const Teuchos::SerialDenseMatrix<int, ST>& src)
4580 using ::Tpetra::Details::localDeepCopy;
4582 using IST =
typename MV::impl_scalar_type;
4583 using input_view_type =
4584 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4585 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4586 using pair_type = std::pair<LO, LO>;
4588 const LO numRows =
static_cast<LO
> (src.numRows ());
4589 const LO numCols =
static_cast<LO
> (src.numCols ());
4591 TEUCHOS_TEST_FOR_EXCEPTION
4594 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4595 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4597 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4599 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4600 input_view_type src_orig (src_raw, src.stride (), numCols);
4601 input_view_type src_view =
4602 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4606 constexpr
bool src_isConstantStride =
true;
4607 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4611 src_isConstantStride,
4612 getMultiVectorWhichVectors (dst),
4616 template <
class ST,
class LO,
class GO,
class NT>
4618 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4619 const MultiVector<ST, LO, GO, NT>& src)
4621 using ::Tpetra::Details::localDeepCopy;
4622 using MV = MultiVector<ST, LO, GO, NT>;
4623 using IST =
typename MV::impl_scalar_type;
4624 using output_view_type =
4625 Kokkos::View<IST**, Kokkos::LayoutLeft,
4626 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4627 using pair_type = std::pair<LO, LO>;
4629 const LO numRows =
static_cast<LO
> (dst.numRows ());
4630 const LO numCols =
static_cast<LO
> (dst.numCols ());
4632 TEUCHOS_TEST_FOR_EXCEPTION
4633 (numRows !=
static_cast<LO
> (src.getLocalLength ()) ||
4634 numCols !=
static_cast<LO
> (src.getNumVectors ()),
4635 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4636 << src.getMap ()->getComm ()->getRank () <<
", src is "
4637 << src.getLocalLength () <<
" x " << src.getNumVectors ()
4638 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4640 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4641 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4643 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4645 constexpr
bool dst_isConstantStride =
true;
4646 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4649 if (src.need_sync_host ()) {
4650 localDeepCopy (dst_view,
4651 src.getLocalViewDevice (),
4652 dst_isConstantStride,
4653 src.isConstantStride (),
4655 getMultiVectorWhichVectors (src));
4659 src.getLocalViewHost (),
4660 dst_isConstantStride,
4661 src.isConstantStride (),
4663 getMultiVectorWhichVectors (src));
4668 template <
class Scalar,
class LO,
class GO,
class NT>
4669 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4673 typedef MultiVector<Scalar, LO, GO, NT> MV;
4674 return Teuchos::rcp (
new MV (map, numVectors));
4677 template <
class ST,
class LO,
class GO,
class NT>
4678 MultiVector<ST, LO, GO, NT>
4695 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4696 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4697 template class MultiVector< SCALAR , LO , GO , NODE >; \
4698 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4699 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4700 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4701 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4704 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4705 template class MultiVector< SCALAR , LO , GO , NODE >; \
4706 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
void fill(const ExecutionSpace &execSpace, const ViewType &X, const ValueType &alpha, const IndexType numRows, const IndexType numCols)
Fill the entries of the given 1-D or 2-D Kokkos::View with the given scalar value alpha.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::isInterComm.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Stand-alone utility functions and macros.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
Base class for distributed Tpetra objects that support data redistribution.
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
bool isDistributed() const
Whether this is a globally distributed object.
Sets up and executes a communication plan for a Tpetra DistObject.
A parallel distribution of indices over processes.
One or more distributed dense vectors.
void reciprocal(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,...
dual_view_type origView_
The "original view" of the MultiVector's data.
void normInf(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the infinity-norm of each vector (column), storing the result in a host View.
void get1dCopy(const Teuchos::ArrayView< Scalar > &A, const size_t LDA) const
Fill the given array with a copy of this multivector's local values.
size_t getStride() const
Stride between columns in the multivector.
virtual size_t constantNumberOfPackets() const
Number of packets to send per LID.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
virtual std::string description() const
A simple one-line description of this object.
void reduce()
Sum values of a locally replicated multivector across all processes.
dual_view_type::t_host getLocalViewHost() const
A local Kokkos::View of host memory.
void randomize()
Set all values in the multivector to pseudorandom numbers.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Map and their communicator.
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void dot(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayView< dot_type > &dots) const
Compute the dot product of each corresponding pair of vectors (columns) in A and B.
void describeImpl(Teuchos::FancyOStream &out, const std::string &className, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Implementation of describe() for this class, and its subclass Vector.
void replaceLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &value) const
Replace value in host memory, using local (row) index.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
Teuchos::ArrayRCP< Scalar > get1dViewNonConst()
Nonconst persisting (1-D) view of this multivector's local values.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
void meanValue(const Teuchos::ArrayView< impl_scalar_type > &means) const
Compute mean (average) value of each column.
std::string descriptionImpl(const std::string &className) const
Implementation of description() for this class, and its subclass Vector.
void multiply(Teuchos::ETransp transA, Teuchos::ETransp transB, const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Scalar &beta)
Matrix-matrix multiplication: this = beta*this + alpha*op(A)*op(B).
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
void clear_sync_state()
Clear "modified" flags on both host and device sides.
void swap(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &mv)
Swap contents of mv with contents of *this.
size_t getLocalLength() const
Local number of rows on the calling process.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subCopy(const Teuchos::Range1D &colRng) const
Return a MultiVector with copies of selected columns.
typename Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
Teuchos::ArrayRCP< const Scalar > getData(size_t j) const
Const view of the local values in a particular vector of this multivector.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
void modify_device()
Mark data as modified on the device side.
void sumIntoGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value, const bool atomic=useAtomicUpdatesByDefault) const
Update (+=) a value in host memory, using global row index.
void norm2(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the two-norm of each vector (column), storing the result in a host View.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
Teuchos::ArrayRCP< const Scalar > get1dView() const
Const persisting (1-D) view of this multivector's local values.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs)
Perform copies and permutations that are local to the calling (MPI) process.
Teuchos::ArrayRCP< T > getSubArrayRCP(Teuchos::ArrayRCP< T > arr, size_t j) const
Persisting view of j-th column in the given ArrayRCP.
void sumIntoLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &val, const bool atomic=useAtomicUpdatesByDefault) const
Update (+=) a value in host memory, using local row index.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with the given verbosity level to a FancyOStream.
void elementWiseMultiply(Scalar scalarAB, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarThis)
Multiply a Vector A elementwise by a MultiVector B.
void replaceGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value) const
Replace value in host memory, using global row index.
size_t getNumVectors() const
Number of columns in the multivector.
void sync_host()
Synchronize to Host.
void replaceMap(const Teuchos::RCP< const map_type > &map)
Replace the underlying Map in place.
virtual bool checkSizes(const SrcDistObject &sourceObj)
Whether data redistribution between sourceObj and this object is legal.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
void abs(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise absolute values of input Multi-vector in target: A = abs(this)
void norm1(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the one-norm of each vector (column), storing the result in a host view.
void sync_device()
Synchronize to Device.
void get2dCopy(const Teuchos::ArrayView< const Teuchos::ArrayView< Scalar > > &ArrayOfPtrs) const
Fill the given array with a copy of this multivector's local values.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subViewNonConst(const Teuchos::Range1D &colRng)
Return a MultiVector with views of selected columns.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Teuchos::ArrayRCP< Scalar > getDataNonConst(size_t j)
View of the local values in a particular vector of this multivector.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
size_t getOrigNumLocalCols() const
"Original" number of columns in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< Scalar > > get2dViewNonConst()
Return non-const persisting pointers to values.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetViewNonConst(const Teuchos::RCP< const map_type > &subMap, const size_t offset)
Return a nonconst view of a subset of rows.
std::string localDescribeToString(const Teuchos::EVerbosityLevel vl) const
Print the calling process' verbose describe() information to the returned string.
bool isSameSize(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &vec) const
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< const Scalar > > get2dView() const
Return const persisting pointers to values.
void modify_host()
Mark data as modified on the host side.
dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, execution_space > dual_view_type
Kokkos::DualView specialization used by this class.
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
dual_view_type::t_dev getLocalViewDevice() const
A local Kokkos::View of device memory.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
int local_ordinal_type
Default value of Scalar template parameter.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
EWhichNorm
Input argument for normImpl() (which see).
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
void localDeepCopy(const DstViewType &dst, const SrcViewType &src, const bool dstConstStride, const bool srcConstStride, const DstWhichVecsType &dstWhichVecs, const SrcWhichVecsType &srcWhichVecs)
Implementation of Tpetra::MultiVector deep copy of local data.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ADD
Sum new values into existing values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ INSERT
Insert new values that don't currently exist.