41#ifndef TPETRA_MULTIVECTOR_DEF_HPP
42#define TPETRA_MULTIVECTOR_DEF_HPP
54#include "Tpetra_Vector.hpp"
66#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
67# include "Teuchos_SerialDenseMatrix.hpp"
69#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
70#include "KokkosCompat_View.hpp"
71#include "KokkosBlas.hpp"
72#include "KokkosKernels_Utils.hpp"
73#include "Kokkos_Random.hpp"
74#include "Kokkos_ArithTraits.hpp"
78#ifdef HAVE_TPETRA_INST_FLOAT128
81 template<
class Generator>
82 struct rand<Generator, __float128> {
83 static KOKKOS_INLINE_FUNCTION __float128 max ()
85 return static_cast<__float128
> (1.0);
87 static KOKKOS_INLINE_FUNCTION __float128
92 const __float128 scalingFactor =
93 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
94 static_cast<__float128
> (2.0);
95 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
96 const __float128 lowerOrderTerm =
97 static_cast<__float128
> (gen.drand ()) * scalingFactor;
98 return higherOrderTerm + lowerOrderTerm;
100 static KOKKOS_INLINE_FUNCTION __float128
101 draw (Generator& gen,
const __float128& range)
104 const __float128 scalingFactor =
105 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
106 static_cast<__float128
> (2.0);
107 const __float128 higherOrderTerm =
108 static_cast<__float128
> (gen.drand (range));
109 const __float128 lowerOrderTerm =
110 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
111 return higherOrderTerm + lowerOrderTerm;
113 static KOKKOS_INLINE_FUNCTION __float128
114 draw (Generator& gen,
const __float128& start,
const __float128& end)
117 const __float128 scalingFactor =
118 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
119 static_cast<__float128
> (2.0);
120 const __float128 higherOrderTerm =
121 static_cast<__float128
> (gen.drand (start, end));
122 const __float128 lowerOrderTerm =
123 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
124 return higherOrderTerm + lowerOrderTerm;
146 template<
class ST,
class LO,
class GO,
class NT>
147 typename Tpetra::MultiVector<ST, LO, GO, NT>::wrapped_dual_view_type
148 allocDualView (
const size_t lclNumRows,
149 const size_t numCols,
150 const bool zeroOut =
true,
151 const bool allowPadding =
false)
154 using Kokkos::AllowPadding;
155 using Kokkos::view_alloc;
156 using Kokkos::WithoutInitializing;
158 typedef typename Tpetra::MultiVector<ST, LO, GO, NT>::wrapped_dual_view_type wrapped_dual_view_type;
159 typedef typename dual_view_type::t_dev dev_view_type;
165 const std::string label (
"MV::DualView");
166 const bool debug = Behavior::debug ();
176 dev_view_type d_view;
179 d_view = dev_view_type (view_alloc (label, AllowPadding),
180 lclNumRows, numCols);
183 d_view = dev_view_type (view_alloc (label),
184 lclNumRows, numCols);
189 d_view = dev_view_type (view_alloc (label,
192 lclNumRows, numCols);
195 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
196 lclNumRows, numCols);
207 const ST nan = Kokkos::ArithTraits<ST>::nan ();
208 KokkosBlas::fill (d_view, nan);
212 TEUCHOS_TEST_FOR_EXCEPTION
213 (
static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
214 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
215 "allocDualView: d_view's dimensions actual dimensions do not match "
216 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
217 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
218 <<
". Please report this bug to the Tpetra developers.");
221 return wrapped_dual_view_type(d_view);
228 template<
class T,
class ExecSpace>
229 struct MakeUnmanagedView {
239 typedef typename std::conditional<
240 Kokkos::SpaceAccessibility<
241 typename ExecSpace::memory_space,
242 Kokkos::HostSpace>::accessible,
243 typename ExecSpace::device_type,
244 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
245 typedef Kokkos::LayoutLeft array_layout;
246 typedef Kokkos::View<T*, array_layout, host_exec_space,
247 Kokkos::MemoryUnmanaged> view_type;
249 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
251 const size_t numEnt =
static_cast<size_t> (x_in.size ());
255 return view_type (x_in.getRawPtr (), numEnt);
261 template<
class WrappedDualViewType>
263 takeSubview (
const WrappedDualViewType& X,
264 const std::pair<size_t, size_t>& rowRng,
265 const Kokkos::ALL_t& colRng)
269 return WrappedDualViewType(X,rowRng,colRng);
272 template<
class WrappedDualViewType>
274 takeSubview (
const WrappedDualViewType& X,
275 const Kokkos::ALL_t& rowRng,
276 const std::pair<size_t, size_t>& colRng)
278 using DualViewType =
typename WrappedDualViewType::DVT;
281 if (X.extent (0) == 0 && X.extent (1) != 0) {
282 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
285 return WrappedDualViewType(X,rowRng,colRng);
289 template<
class WrappedDualViewType>
291 takeSubview (
const WrappedDualViewType& X,
292 const std::pair<size_t, size_t>& rowRng,
293 const std::pair<size_t, size_t>& colRng)
295 using DualViewType =
typename WrappedDualViewType::DVT;
305 if (X.extent (0) == 0 && X.extent (1) != 0) {
306 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
309 return WrappedDualViewType(X,rowRng,colRng);
313 template<
class WrappedOrNotDualViewType>
315 getDualViewStride (
const WrappedOrNotDualViewType& dv)
321 size_t strides[WrappedOrNotDualViewType::t_dev::rank+1];
323 const size_t LDA = strides[1];
324 const size_t numRows = dv.extent (0);
327 return (numRows == 0) ? size_t (1) : numRows;
334 template<
class ViewType>
336 getViewStride (
const ViewType& view)
338 const size_t LDA = view.stride (1);
339 const size_t numRows = view.extent (0);
342 return (numRows == 0) ? size_t (1) : numRows;
349 template <
class impl_scalar_type,
class buffer_device_type>
352 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports
355 if (! imports.need_sync_device ()) {
360 size_t localLengthThreshold =
362 return imports.extent(0) <= localLengthThreshold;
367 template <
class SC,
class LO,
class GO,
class NT>
369 runKernelOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
371 if (! X.need_sync_device ()) {
376 size_t localLengthThreshold =
378 return X.getLocalLength () <= localLengthThreshold;
382 template <
class SC,
class LO,
class GO,
class NT>
384 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
386 const ::Tpetra::Details::EWhichNorm whichNorm)
391 using val_type =
typename MV::impl_scalar_type;
392 using mag_type =
typename MV::mag_type;
393 using dual_view_type =
typename MV::dual_view_type;
396 auto comm = map.is_null () ? nullptr : map->getComm ().getRawPtr ();
397 auto whichVecs = getMultiVectorWhichVectors (X);
401 const bool runOnHost = runKernelOnHost (X);
403 using view_type =
typename dual_view_type::t_host;
404 using array_layout =
typename view_type::array_layout;
405 using device_type =
typename view_type::device_type;
409 mag_type> (norms, X_lcl, whichNorm, whichVecs,
410 isConstantStride, isDistributed, comm);
413 using view_type =
typename dual_view_type::t_dev;
414 using array_layout =
typename view_type::array_layout;
415 using device_type =
typename view_type::device_type;
419 mag_type> (norms, X_lcl, whichNorm, whichVecs,
420 isConstantStride, isDistributed, comm);
429 template <
typename DstView,
typename SrcView>
430 struct AddAssignFunctor {
433 AddAssignFunctor(DstView &tgt_, SrcView &src_) : tgt(tgt_), src(src_) {}
435 KOKKOS_INLINE_FUNCTION
void
436 operator () (
const size_t k)
const { tgt(k) += src(k); }
443 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
447 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
450 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
453 base_type (Teuchos::rcp (new
map_type ()))
456 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
458 MultiVector (
const Teuchos::RCP<const map_type>& map,
459 const size_t numVecs,
460 const bool zeroOut) :
466 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
469 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
472 const Teuchos::DataAccess copyOrView) :
478 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
479 "const Teuchos::DataAccess): ";
483 if (copyOrView == Teuchos::Copy) {
487 this->
view_ = cpy.view_;
490 else if (copyOrView == Teuchos::View) {
493 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
494 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
495 "invalid value " << copyOrView <<
". Valid values include "
496 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
497 << Teuchos::View <<
".");
501 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
503 MultiVector (
const Teuchos::RCP<const map_type>& map,
506 view_ (wrapped_dual_view_type(view))
508 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
509 const size_t lclNumRows_map = map.is_null () ?
size_t (0) :
510 map->getLocalNumElements ();
511 const size_t lclNumRows_view = view.extent (0);
512 const size_t LDA = getDualViewStride (
view_);
514 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
515 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
516 std::invalid_argument,
"Kokkos::DualView does not match Map. "
517 "map->getLocalNumElements() = " << lclNumRows_map
518 <<
", view.extent(0) = " << lclNumRows_view
519 <<
", and getStride() = " << LDA <<
".");
522 const bool debug = Behavior::debug ();
524 using ::Tpetra::Details::checkGlobalDualViewValidity;
525 std::ostringstream gblErrStrm;
526 const bool verbose = Behavior::verbose ();
527 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
528 const bool gblValid =
529 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
531 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
532 (! gblValid, std::runtime_error, gblErrStrm.str ());
537 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
539 MultiVector (
const Teuchos::RCP<const map_type>& map,
540 const wrapped_dual_view_type& view) :
544 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
545 const size_t lclNumRows_map = map.is_null () ?
size_t (0) :
546 map->getLocalNumElements ();
547 const size_t lclNumRows_view = view.extent (0);
548 const size_t LDA = getDualViewStride (view);
550 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
551 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
552 std::invalid_argument,
"Kokkos::DualView does not match Map. "
553 "map->getLocalNumElements() = " << lclNumRows_map
554 <<
", view.extent(0) = " << lclNumRows_view
555 <<
", and getStride() = " << LDA <<
".");
558 const bool debug = Behavior::debug ();
560 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
561 std::ostringstream gblErrStrm;
562 const bool verbose = Behavior::verbose ();
563 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
564 const bool gblValid =
565 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
567 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
568 (! gblValid, std::runtime_error, gblErrStrm.str ());
574 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
576 MultiVector (
const Teuchos::RCP<const map_type>& map,
577 const typename dual_view_type::t_dev& d_view) :
580 using Teuchos::ArrayRCP;
582 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
586 const size_t LDA = getViewStride (d_view);
587 const size_t lclNumRows = map->getLocalNumElements ();
588 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
589 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
590 "Kokkos::View. map->getLocalNumElements() = " << lclNumRows
591 <<
", View's column stride = " << LDA
592 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
594 auto h_view = Kokkos::create_mirror_view (d_view);
596 view_ = wrapped_dual_view_type(dual_view);
599 const bool debug = Behavior::debug ();
601 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
602 std::ostringstream gblErrStrm;
603 const bool verbose = Behavior::verbose ();
604 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
605 const bool gblValid =
606 checkGlobalWrappedDualViewValidity (&gblErrStrm,
view_, verbose,
608 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
609 (! gblValid, std::runtime_error, gblErrStrm.str ());
614 dual_view.modify_device ();
617 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
619 MultiVector (
const Teuchos::RCP<const map_type>& map,
623 view_ (wrapped_dual_view_type(view,origView))
625 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
627 const size_t LDA = getDualViewStride (origView);
629 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
630 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
631 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
632 <<
". This may also mean that the input origView's first dimension (number "
633 "of rows = " << origView.extent (0) <<
") does not not match the number "
634 "of entries in the Map on the calling process.");
637 const bool debug = Behavior::debug ();
639 using ::Tpetra::Details::checkGlobalDualViewValidity;
640 std::ostringstream gblErrStrm;
641 const bool verbose = Behavior::verbose ();
642 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
643 const bool gblValid_0 =
644 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
646 const bool gblValid_1 =
647 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
649 const bool gblValid = gblValid_0 && gblValid_1;
650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
651 (! gblValid, std::runtime_error, gblErrStrm.str ());
656 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
658 MultiVector (
const Teuchos::RCP<const map_type>& map,
660 const Teuchos::ArrayView<const size_t>& whichVectors) :
666 using Kokkos::subview;
667 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
670 const bool debug = Behavior::debug ();
672 using ::Tpetra::Details::checkGlobalDualViewValidity;
673 std::ostringstream gblErrStrm;
674 const bool verbose = Behavior::verbose ();
675 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
676 const bool gblValid =
677 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
679 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
680 (! gblValid, std::runtime_error, gblErrStrm.str ());
683 const size_t lclNumRows = map.is_null () ? size_t (0) :
684 map->getLocalNumElements ();
691 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
692 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
693 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
694 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
695 if (whichVectors.size () != 0) {
696 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
697 view.extent (1) != 0 && view.extent (1) == 0,
698 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
699 " = " << whichVectors.size () <<
" > 0.");
700 size_t maxColInd = 0;
701 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
702 for (size_type k = 0; k < whichVectors.size (); ++k) {
703 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
704 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
705 std::invalid_argument,
"whichVectors[" << k <<
"] = "
706 "Teuchos::OrdinalTraits<size_t>::invalid().");
707 maxColInd = std::max (maxColInd, whichVectors[k]);
709 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
710 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
711 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
712 <<
" <= max(whichVectors) = " << maxColInd <<
".");
717 const size_t LDA = getDualViewStride (view);
718 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
719 (LDA < lclNumRows, std::invalid_argument,
720 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
722 if (whichVectors.size () == 1) {
732 const std::pair<size_t, size_t> colRng (whichVectors[0],
733 whichVectors[0] + 1);
740 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
742 MultiVector (
const Teuchos::RCP<const map_type>& map,
743 const wrapped_dual_view_type& view,
744 const Teuchos::ArrayView<const size_t>& whichVectors) :
750 using Kokkos::subview;
751 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
754 const bool debug = Behavior::debug ();
756 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
757 std::ostringstream gblErrStrm;
758 const bool verbose = Behavior::verbose ();
759 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
760 const bool gblValid =
761 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
763 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
764 (! gblValid, std::runtime_error, gblErrStrm.str ());
767 const size_t lclNumRows = map.is_null () ? size_t (0) :
768 map->getLocalNumElements ();
775 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
776 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
777 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
778 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
779 if (whichVectors.size () != 0) {
780 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
781 view.extent (1) != 0 && view.extent (1) == 0,
782 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
783 " = " << whichVectors.size () <<
" > 0.");
784 size_t maxColInd = 0;
785 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
786 for (size_type k = 0; k < whichVectors.size (); ++k) {
787 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
788 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
789 std::invalid_argument,
"whichVectors[" << k <<
"] = "
790 "Teuchos::OrdinalTraits<size_t>::invalid().");
791 maxColInd = std::max (maxColInd, whichVectors[k]);
793 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
794 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
795 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
796 <<
" <= max(whichVectors) = " << maxColInd <<
".");
801 const size_t LDA = getDualViewStride (view);
802 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
803 (LDA < lclNumRows, std::invalid_argument,
804 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
806 if (whichVectors.size () == 1) {
816 const std::pair<size_t, size_t> colRng (whichVectors[0],
817 whichVectors[0] + 1);
825 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
827 MultiVector (
const Teuchos::RCP<const map_type>& map,
830 const Teuchos::ArrayView<const size_t>& whichVectors) :
832 view_ (wrapped_dual_view_type(view,origView)),
836 using Kokkos::subview;
837 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
840 const bool debug = Behavior::debug ();
842 using ::Tpetra::Details::checkGlobalDualViewValidity;
843 std::ostringstream gblErrStrm;
844 const bool verbose = Behavior::verbose ();
845 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
846 const bool gblValid_0 =
847 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
849 const bool gblValid_1 =
850 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
852 const bool gblValid = gblValid_0 && gblValid_1;
853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
854 (! gblValid, std::runtime_error, gblErrStrm.str ());
864 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
865 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
866 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
867 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
868 if (whichVectors.size () != 0) {
869 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
870 view.extent (1) != 0 && view.extent (1) == 0,
871 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
872 " = " << whichVectors.size () <<
" > 0.");
873 size_t maxColInd = 0;
874 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
875 for (size_type k = 0; k < whichVectors.size (); ++k) {
876 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
877 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
878 std::invalid_argument,
"whichVectors[" << k <<
"] = "
879 "Teuchos::OrdinalTraits<size_t>::invalid().");
880 maxColInd = std::max (maxColInd, whichVectors[k]);
882 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
883 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
884 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
885 <<
" <= max(whichVectors) = " << maxColInd <<
".");
890 const size_t LDA = getDualViewStride (origView);
891 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
892 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
893 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
894 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
895 <<
", origView.stride(1) = " << origView.d_view.stride(1) <<
".");
897 if (whichVectors.size () == 1) {
906 const std::pair<size_t, size_t> colRng (whichVectors[0],
907 whichVectors[0] + 1);
908 view_ = takeSubview (view_, ALL (), colRng);
911 whichVectors_.clear ();
915 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
917 MultiVector (
const Teuchos::RCP<const map_type>& map,
918 const Teuchos::ArrayView<const Scalar>& data,
920 const size_t numVecs) :
923 typedef LocalOrdinal LO;
924 typedef GlobalOrdinal GO;
925 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
931 const size_t lclNumRows =
932 map.is_null () ? size_t (0) : map->getLocalNumElements ();
933 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
934 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
935 "map->getLocalNumElements() = " << lclNumRows <<
".");
937 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
938 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
939 (
static_cast<size_t> (data.size ()) < minNumEntries,
940 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
941 "entries, given the input Map and number of vectors in the MultiVector."
942 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
943 "map->getLocalNumElements () = " << minNumEntries <<
".");
946 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
958 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
959 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
960 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
965 const size_t outStride =
966 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
967 if (LDA == outStride) {
974 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
976 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
978 for (
size_t j = 0; j < numVecs; ++j) {
979 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
980 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
987 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
989 MultiVector (
const Teuchos::RCP<const map_type>& map,
990 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
991 const size_t numVecs) :
995 typedef LocalOrdinal LO;
996 typedef GlobalOrdinal GO;
997 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
1000 const size_t lclNumRows =
1001 map.is_null () ? size_t (0) : map->getLocalNumElements ();
1002 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1003 (numVecs < 1 || numVecs !=
static_cast<size_t> (ArrayOfPtrs.size ()),
1004 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
1005 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
1006 for (
size_t j = 0; j < numVecs; ++j) {
1007 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
1008 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1009 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
1010 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
1011 << X_j_av.size () <<
" < map->getLocalNumElements() = " << lclNumRows
1015 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
1020 using array_layout =
typename decltype (X_out)::array_layout;
1021 using input_col_view_type =
typename Kokkos::View<
const IST*,
1024 Kokkos::MemoryUnmanaged>;
1026 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
1027 for (
size_t j = 0; j < numVecs; ++j) {
1028 Teuchos::ArrayView<const IST> X_j_av =
1029 Teuchos::av_reinterpret_cast<const IST> (ArrayOfPtrs[j]);
1030 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
1031 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
1037 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1043 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1048 if (this->
getMap ().is_null ()) {
1049 return static_cast<size_t> (0);
1051 return this->
getMap ()->getLocalNumElements ();
1055 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1060 if (this->
getMap ().is_null ()) {
1061 return static_cast<size_t> (0);
1063 return this->
getMap ()->getGlobalNumElements ();
1067 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1075 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1081 auto thisData =
view_.getDualView().h_view.data();
1082 auto otherData = other.
view_.getDualView().h_view.data();
1083 size_t thisSpan =
view_.getDualView().h_view.span();
1084 size_t otherSpan = other.
view_.getDualView().h_view.span();
1085 return (otherData <= thisData && thisData < otherData + otherSpan)
1086 || (thisData <= otherData && otherData < thisData + thisSpan);
1089 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1098 const MV* src =
dynamic_cast<const MV*
> (&sourceObj);
1099 if (src ==
nullptr) {
1109 return src->getNumVectors () == this->getNumVectors ();
1113 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1120 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1125 const size_t numSameIDs,
1126 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1127 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1135 using KokkosRefactor::Details::permute_array_multi_column;
1136 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
1137 using Kokkos::Compat::create_const_view;
1139 const char tfecfFuncName[] =
"copyAndPermute: ";
1140 ProfilingRegion regionCAP (
"Tpetra::MultiVector::copyAndPermute");
1142 const bool verbose = Behavior::verbose ();
1143 std::unique_ptr<std::string> prefix;
1145 auto map = this->
getMap ();
1146 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1147 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1148 std::ostringstream os;
1149 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
1150 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1151 os <<
"Start" << endl;
1152 std::cerr << os.str ();
1155 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1156 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
1157 std::logic_error,
"permuteToLIDs.extent(0) = "
1158 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
1159 << permuteFromLIDs.extent (0) <<
".");
1162 MV& sourceMV =
const_cast<MV &
>(
dynamic_cast<const MV&
> (sourceObj));
1163 const size_t numCols = this->getNumVectors ();
1167 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1168 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1169 std::logic_error,
"Input MultiVector needs sync to both host "
1171 const bool copyOnHost = runKernelOnHost(sourceMV);
1173 std::ostringstream os;
1174 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1175 std::cerr << os.str ();
1179 std::ostringstream os;
1180 os << *prefix <<
"Copy" << endl;
1181 std::cerr << os.str ();
1206 if (numSameIDs > 0) {
1207 const std::pair<size_t, size_t> rows (0, numSameIDs);
1209 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1210 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1212 for (
size_t j = 0; j < numCols; ++j) {
1213 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1214 const size_t srcCol =
1215 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1217 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1218 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1222 Kokkos::RangePolicy<execution_space, size_t>;
1223 range_t rp(space, 0,numSameIDs);
1224 Tpetra::Details::AddAssignFunctor<
decltype(tgt_j),
decltype(src_j)>
1226 Kokkos::parallel_for(rp, aaf);
1231 Kokkos::deep_copy (space, tgt_j, src_j);
1237 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1238 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1240 for (
size_t j = 0; j < numCols; ++j) {
1241 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1242 const size_t srcCol =
1243 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1245 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1246 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1250 Kokkos::RangePolicy<execution_space, size_t>;
1251 range_t rp(space, 0,numSameIDs);
1252 Tpetra::Details::AddAssignFunctor<
decltype(tgt_j),
decltype(src_j)>
1254 Kokkos::parallel_for(rp, aaf);
1259 Kokkos::deep_copy (space, tgt_j, src_j);
1277 if (permuteFromLIDs.extent (0) == 0 ||
1278 permuteToLIDs.extent (0) == 0) {
1280 std::ostringstream os;
1281 os << *prefix <<
"No permutations. Done!" << endl;
1282 std::cerr << os.str ();
1288 std::ostringstream os;
1289 os << *prefix <<
"Permute" << endl;
1290 std::cerr << os.str ();
1295 const bool nonConstStride =
1296 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1299 std::ostringstream os;
1300 os << *prefix <<
"nonConstStride="
1301 << (nonConstStride ?
"true" :
"false") << endl;
1302 std::cerr << os.str ();
1309 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1310 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1311 if (nonConstStride) {
1313 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1314 tmpTgt.modify_host ();
1315 for (
size_t j = 0; j < numCols; ++j) {
1316 tmpTgt.h_view(j) = j;
1319 tmpTgt.sync_device ();
1321 tgtWhichVecs = tmpTgt;
1324 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->
whichVectors_ ();
1326 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1330 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1331 (
static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1332 this->getNumVectors (),
1333 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1334 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1335 this->getNumVectors () <<
".");
1337 if (sourceMV.whichVectors_.size () == 0) {
1338 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1339 tmpSrc.modify_host ();
1340 for (
size_t j = 0; j < numCols; ++j) {
1341 tmpSrc.h_view(j) = j;
1344 tmpSrc.sync_device ();
1346 srcWhichVecs = tmpSrc;
1349 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1350 sourceMV.whichVectors_ ();
1352 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1356 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1357 (
static_cast<size_t> (srcWhichVecs.extent (0)) !=
1358 sourceMV.getNumVectors (), std::logic_error,
1359 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1360 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1366 std::ostringstream os;
1367 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1368 std::cerr << os.str ();
1370 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1371 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1373 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1374 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1375 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1376 auto permuteFromLIDs_h =
1377 create_const_view (permuteFromLIDs.view_host ());
1380 std::ostringstream os;
1381 os << *prefix <<
"Permute on host" << endl;
1382 std::cerr << os.str ();
1384 if (nonConstStride) {
1387 auto tgtWhichVecs_h =
1388 create_const_view (tgtWhichVecs.view_host ());
1389 auto srcWhichVecs_h =
1390 create_const_view (srcWhichVecs.view_host ());
1392 using op_type = KokkosRefactor::Details::AddOp;
1393 permute_array_multi_column_variable_stride (tgt_h, src_h,
1397 srcWhichVecs_h, numCols,
1401 using op_type = KokkosRefactor::Details::InsertOp;
1402 permute_array_multi_column_variable_stride (tgt_h, src_h,
1406 srcWhichVecs_h, numCols,
1412 using op_type = KokkosRefactor::Details::AddOp;
1413 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1414 permuteFromLIDs_h, numCols, op_type());
1417 using op_type = KokkosRefactor::Details::InsertOp;
1418 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1419 permuteFromLIDs_h, numCols, op_type());
1425 std::ostringstream os;
1426 os << *prefix <<
"Get permute LIDs on device" << endl;
1427 std::cerr << os.str ();
1429 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1430 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1432 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1433 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1434 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1435 auto permuteFromLIDs_d =
1436 create_const_view (permuteFromLIDs.view_device ());
1439 std::ostringstream os;
1440 os << *prefix <<
"Permute on device" << endl;
1441 std::cerr << os.str ();
1443 if (nonConstStride) {
1446 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1447 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1449 using op_type = KokkosRefactor::Details::AddOp;
1450 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1454 srcWhichVecs_d, numCols,
1458 using op_type = KokkosRefactor::Details::InsertOp;
1459 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1463 srcWhichVecs_d, numCols,
1469 using op_type = KokkosRefactor::Details::AddOp;
1470 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1471 permuteFromLIDs_d, numCols, op_type());
1474 using op_type = KokkosRefactor::Details::InsertOp;
1475 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1476 permuteFromLIDs_d, numCols, op_type());
1482 std::ostringstream os;
1483 os << *prefix <<
"Done!" << endl;
1484 std::cerr << os.str ();
1489template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1490void MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::copyAndPermute(
1492 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1494 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1497 copyAndPermute(sourceObj, numSameIDs, permuteToLIDs, permuteFromLIDs, CM,
1502 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1507 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1508 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1509 Kokkos::DualView<size_t*, buffer_device_type> ,
1510 size_t& constantNumPackets,
1516 using Kokkos::Compat::create_const_view;
1517 using Kokkos::Compat::getKokkosViewDeepCopy;
1520 const char tfecfFuncName[] =
"packAndPrepare: ";
1521 ProfilingRegion regionPAP (
"Tpetra::MultiVector::packAndPrepare");
1529 const bool debugCheckIndices = Behavior::debug ();
1534 const bool printDebugOutput = Behavior::verbose ();
1536 std::unique_ptr<std::string> prefix;
1537 if (printDebugOutput) {
1538 auto map = this->getMap ();
1539 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1540 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1541 std::ostringstream os;
1542 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1543 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1544 os <<
"Start" << endl;
1545 std::cerr << os.str ();
1549 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
> (sourceObj));
1551 const size_t numCols = sourceMV.getNumVectors ();
1556 constantNumPackets = numCols;
1560 if (exportLIDs.extent (0) == 0) {
1561 if (printDebugOutput) {
1562 std::ostringstream os;
1563 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1564 std::cerr << os.str ();
1584 const size_t numExportLIDs = exportLIDs.extent (0);
1585 const size_t newExportsSize = numCols * numExportLIDs;
1586 if (printDebugOutput) {
1587 std::ostringstream os;
1588 os << *prefix <<
"realloc: "
1589 <<
"numExportLIDs: " << numExportLIDs
1590 <<
", exports.extent(0): " << exports.extent (0)
1591 <<
", newExportsSize: " << newExportsSize << std::endl;
1592 std::cerr << os.str ();
1594 reallocDualViewIfNeeded (exports, newExportsSize,
"exports");
1598 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1599 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1600 std::logic_error,
"Input MultiVector needs sync to both host "
1602 const bool packOnHost = runKernelOnHost(sourceMV);
1603 if (printDebugOutput) {
1604 std::ostringstream os;
1605 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1606 std::cerr << os.str ();
1618 exports.clear_sync_state ();
1619 exports.modify_host ();
1627 exports.clear_sync_state ();
1628 exports.modify_device ();
1644 if (sourceMV.isConstantStride ()) {
1645 using KokkosRefactor::Details::pack_array_single_column;
1646 if (printDebugOutput) {
1647 std::ostringstream os;
1648 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1649 std::cerr << os.str ();
1652 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1653 pack_array_single_column (exports.view_host (),
1655 exportLIDs.view_host (),
1660 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1661 pack_array_single_column (exports.view_device (),
1663 exportLIDs.view_device (),
1670 using KokkosRefactor::Details::pack_array_single_column;
1671 if (printDebugOutput) {
1672 std::ostringstream os;
1673 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1674 std::cerr << os.str ();
1677 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1678 pack_array_single_column (exports.view_host (),
1680 exportLIDs.view_host (),
1681 sourceMV.whichVectors_[0],
1685 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1686 pack_array_single_column (exports.view_device (),
1688 exportLIDs.view_device (),
1689 sourceMV.whichVectors_[0],
1696 if (sourceMV.isConstantStride ()) {
1697 using KokkosRefactor::Details::pack_array_multi_column;
1698 if (printDebugOutput) {
1699 std::ostringstream os;
1700 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1701 std::cerr << os.str ();
1704 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1705 pack_array_multi_column (exports.view_host (),
1707 exportLIDs.view_host (),
1712 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1713 pack_array_multi_column (exports.view_device (),
1715 exportLIDs.view_device (),
1722 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1723 if (printDebugOutput) {
1724 std::ostringstream os;
1725 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1727 std::cerr << os.str ();
1733 using DV = Kokkos::DualView<IST*, device_type>;
1734 using HES =
typename DV::t_host::execution_space;
1735 using DES =
typename DV::t_dev::execution_space;
1736 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1738 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1739 pack_array_multi_column_variable_stride
1740 (exports.view_host (),
1742 exportLIDs.view_host (),
1743 getKokkosViewDeepCopy<HES> (whichVecs),
1748 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1749 pack_array_multi_column_variable_stride
1750 (exports.view_device (),
1752 exportLIDs.view_device (),
1753 getKokkosViewDeepCopy<DES> (whichVecs),
1755 debugCheckIndices, space);
1760 if (printDebugOutput) {
1761 std::ostringstream os;
1762 os << *prefix <<
"Done!" << endl;
1763 std::cerr << os.str ();
1769 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1774 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1775 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1776 Kokkos::DualView<size_t*, buffer_device_type> numExportPacketsPerLID,
1777 size_t& constantNumPackets) {
1778 packAndPrepare(sourceObj, exportLIDs, exports, numExportPacketsPerLID, constantNumPackets,
execution_space());
1782 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1784 typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1785 typename NO::device_type::memory_space>::value,
1790 const std::string* prefix,
1791 const bool areRemoteLIDsContiguous,
1800 std::ostringstream os;
1801 os << *prefix <<
"Realloc (if needed) imports_ from "
1802 << this->
imports_.extent (0) <<
" to " << newSize << std::endl;
1803 std::cerr << os.str ();
1806 bool reallocated =
false;
1809 using DV = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
1821 if ((dual_view_type::impl_dualview_is_single_device::value ||
1824 areRemoteLIDsContiguous &&
1826 (getNumVectors() == 1) &&
1829 size_t offset = getLocalLength () - newSize;
1830 reallocated = this->imports_.d_view.data() != view_.getDualView().d_view.data() + offset;
1832 typedef std::pair<size_t, size_t> range_type;
1833 this->imports_ = DV(view_.getDualView(),
1834 range_type (offset, getLocalLength () ),
1838 std::ostringstream os;
1839 os << *prefix <<
"Aliased imports_ to MV.view_" << std::endl;
1840 std::cerr << os.str ();
1848 reallocDualViewIfNeeded (this->unaliased_imports_, newSize,
"imports");
1850 std::ostringstream os;
1851 os << *prefix <<
"Finished realloc'ing unaliased_imports_" << std::endl;
1852 std::cerr << os.str ();
1854 this->imports_ = this->unaliased_imports_;
1859 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1861 typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1862 typename NO::device_type::memory_space>::value,
1867 const std::string* prefix,
1868 const bool areRemoteLIDsContiguous,
1874 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1879 const std::string* prefix,
1880 const bool areRemoteLIDsContiguous,
1883 return reallocImportsIfNeededImpl<Node>(newSize, verbose, prefix, areRemoteLIDsContiguous, CM);
1886 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1890 return (this->imports_.d_view.data() + this->imports_.d_view.extent(0) ==
1891 view_.getDualView().d_view.data() +
view_.getDualView().d_view.extent(0));
1895 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1899 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1900 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1901 Kokkos::DualView<size_t*, buffer_device_type> ,
1902 const size_t constantNumPackets,
1908 using KokkosRefactor::Details::unpack_array_multi_column;
1909 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1910 using Kokkos::Compat::getKokkosViewDeepCopy;
1912 const char longFuncName[] =
"Tpetra::MultiVector::unpackAndCombine";
1913 const char tfecfFuncName[] =
"unpackAndCombine: ";
1914 ProfilingRegion regionUAC (longFuncName);
1922 const bool debugCheckIndices = Behavior::debug ();
1924 const bool printDebugOutput = Behavior::verbose ();
1925 std::unique_ptr<std::string> prefix;
1926 if (printDebugOutput) {
1928 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1929 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1930 std::ostringstream os;
1931 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1932 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1933 os <<
"Start" << endl;
1934 std::cerr << os.str ();
1938 if (importLIDs.extent (0) == 0) {
1939 if (printDebugOutput) {
1940 std::ostringstream os;
1941 os << *prefix <<
"No imports. Done!" << endl;
1942 std::cerr << os.str ();
1948 if (importsAreAliased()) {
1949 if (printDebugOutput) {
1950 std::ostringstream os;
1951 os << *prefix <<
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" << endl;
1952 std::cerr << os.str ();
1958 const size_t numVecs = getNumVectors ();
1959 if (debugCheckIndices) {
1960 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1961 (
static_cast<size_t> (imports.extent (0)) !=
1962 numVecs * importLIDs.extent (0),
1964 "imports.extent(0) = " << imports.extent (0)
1965 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1966 <<
" * " << importLIDs.extent (0) <<
" = "
1967 << numVecs * importLIDs.extent (0) <<
".");
1969 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1970 (constantNumPackets ==
static_cast<size_t> (0), std::runtime_error,
1971 "constantNumPackets input argument must be nonzero.");
1973 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1974 (
static_cast<size_t> (numVecs) !=
1975 static_cast<size_t> (constantNumPackets),
1976 std::runtime_error,
"constantNumPackets must equal numVecs.");
1984 const bool unpackOnHost = runKernelOnHost(imports);
1986 if (this->imports_.need_sync_host()) this->imports_.sync_host();
1989 if (this->imports_.need_sync_device()) this->imports_.sync_device();
1992 if (printDebugOutput) {
1993 std::ostringstream os;
1994 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1996 std::cerr << os.str ();
2001 auto imports_d = imports.view_device ();
2002 auto imports_h = imports.view_host ();
2003 auto importLIDs_d = importLIDs.view_device ();
2004 auto importLIDs_h = importLIDs.view_host ();
2006 Kokkos::DualView<size_t*, device_type> whichVecs;
2007 if (! isConstantStride ()) {
2008 Kokkos::View<
const size_t*, Kokkos::HostSpace,
2009 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
2011 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
2013 whichVecs.modify_host ();
2015 Kokkos::deep_copy (whichVecs.view_host (), whichVecsIn);
2018 whichVecs.modify_device ();
2020 Kokkos::deep_copy (whichVecs.view_device (), whichVecsIn);
2023 auto whichVecs_d = whichVecs.view_device ();
2024 auto whichVecs_h = whichVecs.view_host ();
2034 if (numVecs > 0 && importLIDs.extent (0) > 0) {
2035 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
2036 using host_exec_space =
typename dual_view_type::t_host::execution_space;
2039 const bool use_atomic_updates = unpackOnHost ?
2040 host_exec_space().concurrency () != 1 :
2041 dev_exec_space().concurrency () != 1;
2043 if (printDebugOutput) {
2044 std::ostringstream os;
2046 std::cerr << os.str ();
2053 using op_type = KokkosRefactor::Details::InsertOp;
2054 if (isConstantStride ()) {
2056 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2057 unpack_array_multi_column (host_exec_space (),
2058 X_h, imports_h, importLIDs_h,
2059 op_type (), numVecs,
2065 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2066 unpack_array_multi_column (space,
2067 X_d, imports_d, importLIDs_d,
2068 op_type (), numVecs,
2076 unpack_array_multi_column_variable_stride (host_exec_space (),
2087 unpack_array_multi_column_variable_stride (space,
2099 using op_type = KokkosRefactor::Details::AddOp;
2100 if (isConstantStride ()) {
2102 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2103 unpack_array_multi_column (host_exec_space (),
2104 X_h, imports_h, importLIDs_h,
2105 op_type (), numVecs,
2110 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2111 unpack_array_multi_column (space,
2112 X_d, imports_d, importLIDs_d,
2113 op_type (), numVecs,
2120 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2121 unpack_array_multi_column_variable_stride (host_exec_space (),
2131 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2132 unpack_array_multi_column_variable_stride (space,
2144 using op_type = KokkosRefactor::Details::AbsMaxOp;
2145 if (isConstantStride ()) {
2147 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2148 unpack_array_multi_column (host_exec_space (),
2149 X_h, imports_h, importLIDs_h,
2150 op_type (), numVecs,
2155 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2156 unpack_array_multi_column (space,
2157 X_d, imports_d, importLIDs_d,
2158 op_type (), numVecs,
2165 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2166 unpack_array_multi_column_variable_stride (host_exec_space (),
2176 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2177 unpack_array_multi_column_variable_stride (space,
2189 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2190 (
true, std::logic_error,
"Invalid CombineMode");
2194 if (printDebugOutput) {
2195 std::ostringstream os;
2196 os << *prefix <<
"Nothing to unpack" << endl;
2197 std::cerr << os.str ();
2201 if (printDebugOutput) {
2202 std::ostringstream os;
2203 os << *prefix <<
"Done!" << endl;
2204 std::cerr << os.str ();
2209 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2213 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
2214 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
2215 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
2216 const size_t constantNumPackets,
2218 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets, CM,
execution_space());
2222 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2228 return static_cast<size_t> (
view_.extent (1));
2238 gblDotImpl (
const RV& dotsOut,
2239 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2240 const bool distributed)
2242 using Teuchos::REDUCE_MAX;
2243 using Teuchos::REDUCE_SUM;
2244 using Teuchos::reduceAll;
2245 typedef typename RV::non_const_value_type dot_type;
2247 const size_t numVecs = dotsOut.extent (0);
2262 if (distributed && ! comm.is_null ()) {
2265 const int nv =
static_cast<int> (numVecs);
2268 if (commIsInterComm) {
2272 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
2274 Kokkos::deep_copy (lclDots, dotsOut);
2275 const dot_type*
const lclSum = lclDots.data ();
2276 dot_type*
const gblSum = dotsOut.data ();
2277 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2280 dot_type*
const inout = dotsOut.data ();
2281 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
2287 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2291 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
2294 using Kokkos::subview;
2295 using Teuchos::Comm;
2296 using Teuchos::null;
2299 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
2300 typedef typename dual_view_type::t_dev_const XMV;
2301 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
2309 const size_t lclNumRows = this->getLocalLength ();
2310 const size_t numDots =
static_cast<size_t> (dots.extent (0));
2311 const bool debug = Behavior::debug ();
2314 const bool compat = this->
getMap ()->isCompatible (* (A.
getMap ()));
2315 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2316 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
2317 "compatible with the input MultiVector A. We only test for this "
2328 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2330 "MultiVectors do not have the same local length. "
2331 "this->getLocalLength() = " << lclNumRows <<
" != "
2333 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2335 "MultiVectors must have the same number of columns (vectors). "
2336 "this->getNumVectors() = " << numVecs <<
" != "
2338 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2339 numDots != numVecs, std::runtime_error,
2340 "The output array 'dots' must have the same number of entries as the "
2341 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2342 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2344 const std::pair<size_t, size_t> colRng (0, numVecs);
2345 RV dotsOut = subview (dots, colRng);
2346 RCP<const Comm<int> > comm = this->
getMap ().is_null () ? null :
2347 this->
getMap ()->getComm ();
2352 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
2364 exec_space_instance.fence();
2366 gblDotImpl (dotsOut, comm, this->isDistributed ());
2370 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2377 using dot_type =
typename MV::dot_type;
2378 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
2381 Teuchos::RCP<const Teuchos::Comm<int> > comm =
2382 map.is_null () ? Teuchos::null : map->getComm ();
2383 if (comm.is_null ()) {
2384 return Kokkos::ArithTraits<dot_type>::zero ();
2387 using LO = LocalOrdinal;
2391 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
2393 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
2394 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
2395 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
2402 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
2404 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
2405 lclDot = KokkosBlas::dot (x_1d, y_1d);
2408 using Teuchos::outArg;
2409 using Teuchos::REDUCE_SUM;
2410 using Teuchos::reduceAll;
2411 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2423 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2427 const Teuchos::ArrayView<dot_type>& dots)
const
2429 const char tfecfFuncName[] =
"dot: ";
2434 const size_t numDots =
static_cast<size_t> (dots.size ());
2444 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2446 "MultiVectors do not have the same local length. "
2447 "this->getLocalLength() = " << lclNumRows <<
" != "
2449 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2451 "MultiVectors must have the same number of columns (vectors). "
2452 "this->getNumVectors() = " << numVecs <<
" != "
2454 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2455 (numDots != numVecs, std::runtime_error,
2456 "The output array 'dots' must have the same number of entries as the "
2457 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2458 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2461 const dot_type gblDot = multiVectorSingleColumnDot (*
this, A);
2465 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2469 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2472 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2475 using ::Tpetra::Details::NORM_TWO;
2477 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2480 MV& X =
const_cast<MV&
> (*this);
2481 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2484 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2487 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2489 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2490 this->
norm2 (norms_av);
2494 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2497 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2500 using ::Tpetra::Details::NORM_ONE;
2502 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2505 MV& X =
const_cast<MV&
> (*this);
2506 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2509 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2512 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2514 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2515 this->
norm1 (norms_av);
2518 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2521 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2524 using ::Tpetra::Details::NORM_INF;
2526 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2529 MV& X =
const_cast<MV&
> (*this);
2530 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2533 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2536 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2538 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2542 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2545 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2549 using Kokkos::subview;
2550 using Teuchos::Comm;
2552 using Teuchos::reduceAll;
2553 using Teuchos::REDUCE_SUM;
2554 typedef Kokkos::ArithTraits<impl_scalar_type> ATS;
2558 const size_t numMeans =
static_cast<size_t> (means.size ());
2560 TEUCHOS_TEST_FOR_EXCEPTION(
2561 numMeans != numVecs, std::runtime_error,
2562 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2563 <<
" != this->getNumVectors() = " << numVecs <<
".");
2565 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2566 const std::pair<size_t, size_t> colRng (0, numVecs);
2571 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2573 typename local_view_type::HostMirror::array_layout,
2575 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2576 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2578 RCP<const Comm<int> > comm = this->
getMap ().is_null () ? Teuchos::null :
2579 this->
getMap ()->getComm ();
2583 if (useHostVersion) {
2586 rowRng, Kokkos::ALL ());
2588 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2590 KokkosBlas::sum (lclSums, X_lcl);
2593 for (
size_t j = 0; j < numVecs; ++j) {
2595 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2602 if (! comm.is_null () && this->isDistributed ()) {
2603 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2604 lclSums.data (), meansOut.data ());
2608 Kokkos::deep_copy (meansOut, lclSums);
2614 rowRng, Kokkos::ALL ());
2617 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2619 KokkosBlas::sum (lclSums, X_lcl);
2622 for (
size_t j = 0; j < numVecs; ++j) {
2624 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2634 exec_space_instance.fence();
2640 if (! comm.is_null () && this->isDistributed ()) {
2641 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2642 lclSums.data (), meansOut.data ());
2646 Kokkos::deep_copy (meansOut, lclSums);
2655 for (
size_t k = 0; k < numMeans; ++k) {
2656 meansOut(k) = meansOut(k) * OneOverN;
2661 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2667 typedef Kokkos::ArithTraits<IST> ATS;
2668 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2669 typedef typename pool_type::generator_type generator_type;
2671 const IST max = Kokkos::rand<generator_type, IST>::max ();
2672 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2674 this->
randomize (
static_cast<Scalar
> (min),
static_cast<Scalar
> (max));
2678 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2681 randomize (
const Scalar& minVal,
const Scalar& maxVal)
2684 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2695 const uint64_t myRank =
2696 static_cast<uint64_t
> (this->
getMap ()->getComm ()->getRank ());
2697 uint64_t seed64 =
static_cast<uint64_t
> (std::rand ()) + myRank + 17311uLL;
2698 unsigned int seed =
static_cast<unsigned int> (seed64&0xffffffff);
2700 pool_type rand_pool (seed);
2701 const IST max =
static_cast<IST
> (maxVal);
2702 const IST min =
static_cast<IST
> (minVal);
2707 Kokkos::fill_random (thisView, rand_pool, min, max);
2711 for (
size_t k = 0; k < numVecs; ++k) {
2713 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2714 Kokkos::fill_random (X_k, rand_pool, min, max);
2719 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2726 using DES =
typename dual_view_type::t_dev::execution_space;
2727 using HES =
typename dual_view_type::t_host::execution_space;
2728 using LO = LocalOrdinal;
2729 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2735 const LO numVecs =
static_cast<LO
> (this->
getNumVectors ());
2741 const bool runOnHost = runKernelOnHost(*
this);
2744 if (this->isConstantStride ()) {
2745 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2748 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2754 if (this->isConstantStride ()) {
2755 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2758 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2765 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2768 replaceMap (
const Teuchos::RCP<const map_type>& newMap)
2770 using Teuchos::ArrayRCP;
2771 using Teuchos::Comm;
2774 using LO = LocalOrdinal;
2775 using GO = GlobalOrdinal;
2781 TEUCHOS_TEST_FOR_EXCEPTION(
2782 ! this->isConstantStride (), std::logic_error,
2783 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2784 "if the MultiVector is a column view of another MultiVector (that is, if "
2785 "isConstantStride() == false).");
2815 if (this->
getMap ().is_null ()) {
2820 TEUCHOS_TEST_FOR_EXCEPTION(
2821 newMap.is_null (), std::invalid_argument,
2822 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2823 "This probably means that the input Map is incorrect.");
2827 const size_t newNumRows = newMap->getLocalNumElements ();
2828 const size_t origNumRows =
view_.extent (0);
2831 if (origNumRows != newNumRows ||
view_.extent (1) != numCols) {
2832 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2835 else if (newMap.is_null ()) {
2838 const size_t newNumRows =
static_cast<size_t> (0);
2840 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2843 this->
map_ = newMap;
2846 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2849 scale (
const Scalar& alpha)
2854 const IST theAlpha =
static_cast<IST
> (alpha);
2855 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2860 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2861 const std::pair<size_t, size_t> colRng (0, numVecs);
2870 if (useHostVersion) {
2871 auto Y_lcl = Kokkos::subview (
getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2873 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2876 for (
size_t k = 0; k < numVecs; ++k) {
2878 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2879 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2884 auto Y_lcl = Kokkos::subview (
getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2886 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2889 for (
size_t k = 0; k < numVecs; ++k) {
2891 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2892 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2899 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2902 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2905 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2906 TEUCHOS_TEST_FOR_EXCEPTION(
2907 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2908 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2912 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2913 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2914 k_alphas.modify_host ();
2915 for (
size_t i = 0; i < numAlphas; ++i) {
2918 k_alphas.sync_device ();
2920 this->
scale (k_alphas.view_device ());
2923 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2926 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2929 using Kokkos::subview;
2933 TEUCHOS_TEST_FOR_EXCEPTION(
2934 static_cast<size_t> (alphas.extent (0)) != numVecs,
2935 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2936 "alphas.extent(0) = " << alphas.extent (0)
2937 <<
" != this->getNumVectors () = " << numVecs <<
".");
2938 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2939 const std::pair<size_t, size_t> colRng (0, numVecs);
2950 if (useHostVersion) {
2953 auto alphas_h = Kokkos::create_mirror_view (alphas);
2955 Kokkos::deep_copy (alphas_h, alphas);
2957 auto Y_lcl = subview (this->
getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2959 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2962 for (
size_t k = 0; k < numVecs; ++k) {
2963 const size_t Y_col = this->isConstantStride () ? k :
2965 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2968 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2975 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2982 auto alphas_h = Kokkos::create_mirror_view (alphas);
2984 Kokkos::deep_copy (alphas_h, alphas);
2986 for (
size_t k = 0; k < numVecs; ++k) {
2987 const size_t Y_col = this->isConstantStride () ? k :
2989 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2990 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2996 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2999 scale (
const Scalar& alpha,
3003 using Kokkos::subview;
3004 const char tfecfFuncName[] =
"scale: ";
3009 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3011 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
3013 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3015 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3019 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3020 const std::pair<size_t, size_t> colRng (0, numVecs);
3024 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
3025 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
3028 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
3032 for (
size_t k = 0; k < numVecs; ++k) {
3033 const size_t Y_col = this->isConstantStride () ? k : this->
whichVectors_[k];
3035 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3036 auto X_k = subview (X_lcl, ALL (), X_col);
3038 KokkosBlas::scal (Y_k, theAlpha, X_k);
3045 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3050 const char tfecfFuncName[] =
"reciprocal: ";
3052 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3054 "MultiVectors do not have the same local length. "
3057 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3058 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3059 ": MultiVectors do not have the same number of columns (vectors). "
3061 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3069 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
3073 using Kokkos::subview;
3074 for (
size_t k = 0; k < numVecs; ++k) {
3076 auto vector_k = subview (this_view_dev, ALL (), this_col);
3078 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3079 KokkosBlas::reciprocal (vector_k, vector_Ak);
3084 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3089 const char tfecfFuncName[] =
"abs";
3091 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3093 ": MultiVectors do not have the same local length. "
3096 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3097 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3098 ": MultiVectors do not have the same number of columns (vectors). "
3100 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3107 KokkosBlas::abs (this_view_dev, A_view_dev);
3111 using Kokkos::subview;
3113 for (
size_t k=0; k < numVecs; ++k) {
3115 auto vector_k = subview (this_view_dev, ALL (), this_col);
3117 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3118 KokkosBlas::abs (vector_k, vector_Ak);
3123 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3126 update (
const Scalar& alpha,
3130 const char tfecfFuncName[] =
"update: ";
3131 using Kokkos::subview;
3140 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3142 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
3144 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3146 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3152 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3153 const std::pair<size_t, size_t> colRng (0, numVecs);
3156 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
3158 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
3162 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
3166 for (
size_t k = 0; k < numVecs; ++k) {
3167 const size_t Y_col = this->isConstantStride () ? k : this->
whichVectors_[k];
3169 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3170 auto X_k = subview (X_lcl, ALL (), X_col);
3172 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
3177 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3180 update (
const Scalar& alpha,
3184 const Scalar& gamma)
3187 using Kokkos::subview;
3189 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3197 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3199 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
3200 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3202 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3204 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
3205 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3207 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3209 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
3210 "but this MultiVector has " << numVecs <<
" column(s).");
3211 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3213 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
3214 "but this MultiVector has " << numVecs <<
" column(s).");
3221 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3222 const std::pair<size_t, size_t> colRng (0, numVecs);
3232 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3237 for (
size_t k = 0; k < numVecs; ++k) {
3241 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3242 theBeta, subview (B_lcl, rowRng, B_col),
3243 theGamma, subview (C_lcl, rowRng, this_col));
3248 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3249 Teuchos::ArrayRCP<const Scalar>
3255 const char tfecfFuncName[] =
"getData: ";
3264 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
3265 Teuchos::ArrayRCP<const IST> dataAsArcp =
3266 Kokkos::Compat::persistingView (hostView_j, 0,
getLocalLength ());
3268 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3269 (
static_cast<size_t> (hostView_j.extent (0)) <
3270 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3271 "hostView_j.extent(0) = " << hostView_j.extent (0)
3272 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3273 "Please report this bug to the Tpetra developers.");
3275 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3278 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3279 Teuchos::ArrayRCP<Scalar>
3284 using Kokkos::subview;
3286 const char tfecfFuncName[] =
"getDataNonConst: ";
3290 auto hostView_j = subview (hostView, ALL (), col);
3291 Teuchos::ArrayRCP<IST> dataAsArcp =
3292 Kokkos::Compat::persistingView (hostView_j, 0,
getLocalLength ());
3294 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3295 (
static_cast<size_t> (hostView_j.extent (0)) <
3296 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3297 "hostView_j.extent(0) = " << hostView_j.extent (0)
3298 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3299 "Please report this bug to the Tpetra developers.");
3301 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3304 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3305 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3307 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3314 bool contiguous =
true;
3315 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3316 for (
size_t j = 1; j < numCopyVecs; ++j) {
3317 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3322 if (contiguous && numCopyVecs > 0) {
3323 return this->
subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3326 RCP<const MV> X_sub = this->
subView (cols);
3327 RCP<MV> Y (
new MV (this->
getMap (), numCopyVecs,
false));
3333 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3334 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3336 subCopy (
const Teuchos::Range1D &colRng)
const
3341 RCP<const MV> X_sub = this->
subView (colRng);
3342 RCP<MV> Y (
new MV (this->
getMap (),
static_cast<size_t> (colRng.size ()),
false));
3347 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3351 return view_.origExtent(0);
3354 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3358 return view_.origExtent(1);
3361 template <
class Scalar,
class LO,
class GO,
class Node>
3364 const Teuchos::RCP<const map_type>& subMap,
3369 using Kokkos::subview;
3370 using Teuchos::outArg;
3373 using Teuchos::reduceAll;
3374 using Teuchos::REDUCE_MIN;
3377 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3378 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3381 std::unique_ptr<std::ostringstream> errStrm;
3388 const auto comm = subMap->getComm ();
3389 TEUCHOS_ASSERT( ! comm.is_null () );
3390 const int myRank = comm->getRank ();
3392 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3394 const LO newNumRows =
static_cast<LO
> (subMap->getLocalNumElements ());
3396 std::ostringstream os;
3397 os <<
"Proc " << myRank <<
": " << prefix
3398 <<
"X: {lclNumRows: " << lclNumRowsBefore
3400 <<
", numCols: " << numCols <<
"}, "
3401 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3402 std::cerr << os.str ();
3407 const bool tooManyElts =
3410 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3411 *errStrm <<
" Proc " << myRank <<
": subMap->getLocalNumElements() (="
3412 << newNumRows <<
") + rowOffset (=" << rowOffset
3416 TEUCHOS_TEST_FOR_EXCEPTION
3417 (! debug && tooManyElts, std::invalid_argument,
3418 prefix << errStrm->str () << suffix);
3422 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3424 std::ostringstream gblErrStrm;
3425 const std::string myErrStr =
3426 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3428 TEUCHOS_TEST_FOR_EXCEPTION
3429 (
true, std::invalid_argument, gblErrStrm.str ());
3433 using range_type = std::pair<LO, LO>;
3434 const range_type origRowRng
3435 (rowOffset,
static_cast<LO
> (X.
view_.origExtent (0)));
3436 const range_type rowRng
3437 (rowOffset, rowOffset + newNumRows);
3439 wrapped_dual_view_type newView = takeSubview (X.
view_, rowRng, ALL ());
3446 if (newView.extent (0) == 0 &&
3447 newView.extent (1) != X.
view_.extent (1)) {
3449 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3453 MV (subMap, newView) :
3457 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3458 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3459 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3461 if (errStrm.get () ==
nullptr) {
3462 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3464 *errStrm <<
" Proc " << myRank <<
3465 ": subMap.getLocalNumElements(): " << newNumRows <<
3466 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3467 ", X.getNumVectors(): " << numCols <<
3468 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3470 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3472 std::ostringstream gblErrStrm;
3474 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3475 "dimensions on one or more processes:" << endl;
3477 const std::string myErrStr =
3478 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3480 gblErrStrm << suffix << endl;
3481 TEUCHOS_TEST_FOR_EXCEPTION
3482 (
true, std::invalid_argument, gblErrStrm.str ());
3487 std::ostringstream os;
3488 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3489 std::cerr << os.str ();
3495 std::ostringstream os;
3496 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3497 std::cerr << os.str ();
3501 template <
class Scalar,
class LO,
class GO,
class Node>
3505 const size_t rowOffset) :
3510 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3511 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3513 offsetView (
const Teuchos::RCP<const map_type>& subMap,
3514 const size_t offset)
const
3517 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3520 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3521 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3524 const size_t offset)
3527 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3530 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3531 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3533 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3535 using Teuchos::Array;
3539 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3540 TEUCHOS_TEST_FOR_EXCEPTION(
3541 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3542 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3543 "contain at least one entry, but cols.size() = " << cols.size ()
3548 bool contiguous =
true;
3549 for (
size_t j = 1; j < numViewCols; ++j) {
3550 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3556 if (numViewCols == 0) {
3558 return rcp (
new MV (this->
getMap (), numViewCols));
3561 return this->
subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3566 return rcp (
new MV (this->
getMap (),
view_, cols));
3569 Array<size_t> newcols (cols.size ());
3570 for (
size_t j = 0; j < numViewCols; ++j) {
3573 return rcp (
new MV (this->
getMap (),
view_, newcols ()));
3578 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3579 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3581 subView (
const Teuchos::Range1D& colRng)
const
3585 using Kokkos::subview;
3586 using Teuchos::Array;
3590 const char tfecfFuncName[] =
"subView(Range1D): ";
3597 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3598 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3599 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3601 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3602 numVecs != 0 && colRng.size () != 0 &&
3603 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3604 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3605 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3606 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3607 "[0, " << numVecs <<
"].");
3609 RCP<const MV> X_ret;
3619 const std::pair<size_t, size_t> rows (0, lclNumRows);
3620 if (colRng.size () == 0) {
3621 const std::pair<size_t, size_t> cols (0, 0);
3622 wrapped_dual_view_type X_sub = takeSubview (this->
view_, ALL (), cols);
3623 X_ret = rcp (
new MV (this->
getMap (), X_sub));
3628 const std::pair<size_t, size_t> cols (colRng.lbound (),
3629 colRng.ubound () + 1);
3630 wrapped_dual_view_type X_sub = takeSubview (this->
view_, ALL (), cols);
3631 X_ret = rcp (
new MV (this->
getMap (), X_sub));
3634 if (
static_cast<size_t> (colRng.size ()) ==
static_cast<size_t> (1)) {
3637 const std::pair<size_t, size_t> col (
whichVectors_[0] + colRng.lbound (),
3639 wrapped_dual_view_type X_sub = takeSubview (
view_, ALL (), col);
3640 X_ret = rcp (
new MV (this->
getMap (), X_sub));
3643 Array<size_t> which (
whichVectors_.begin () + colRng.lbound (),
3645 X_ret = rcp (
new MV (this->
getMap (),
view_, which));
3650 const bool debug = Behavior::debug ();
3652 using Teuchos::Comm;
3653 using Teuchos::outArg;
3654 using Teuchos::REDUCE_MIN;
3655 using Teuchos::reduceAll;
3657 RCP<const Comm<int> > comm = this->
getMap ().is_null () ?
3658 Teuchos::null : this->
getMap ()->getComm ();
3659 if (! comm.is_null ()) {
3663 if (X_ret.is_null ()) {
3666 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3668 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3669 "MultiVector; the return value of this method) is null on some MPI "
3670 "process in this MultiVector's communicator. This should never "
3671 "happen. Please report this bug to the Tpetra developers.");
3672 if (! X_ret.is_null () &&
3673 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3676 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3677 outArg (gblSuccess));
3678 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3679 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3680 "colRng.size(), on at least one MPI process in this MultiVector's "
3681 "communicator. This should never happen. "
3682 "Please report this bug to the Tpetra developers.");
3689 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3690 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3695 return Teuchos::rcp_const_cast<MV> (this->
subView (cols));
3699 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3700 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3705 return Teuchos::rcp_const_cast<MV> (this->
subView (colRng));
3709 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3713 : base_type (X.
getMap ())
3715 using Kokkos::subview;
3716 typedef std::pair<size_t, size_t> range_type;
3717 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3720 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3721 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3722 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3724 static_cast<size_t> (j) :
3726 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3737 const size_t newSize = X.
imports_.extent (0) / numCols;
3738 const size_t offset = jj*newSize;
3740 newImports.d_view = subview (X.
imports_.d_view,
3741 range_type (offset, offset+newSize));
3742 newImports.h_view = subview (X.
imports_.h_view,
3743 range_type (offset, offset+newSize));
3747 const size_t newSize = X.
exports_.extent (0) / numCols;
3748 const size_t offset = jj*newSize;
3750 newExports.d_view = subview (X.
exports_.d_view,
3751 range_type (offset, offset+newSize));
3752 newExports.h_view = subview (X.
exports_.h_view,
3753 range_type (offset, offset+newSize));
3764 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3765 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3770 return Teuchos::rcp (
new V (*
this, j));
3774 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3775 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3780 return Teuchos::rcp (
new V (*
this, j));
3784 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3787 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3790 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3792 Kokkos::MemoryUnmanaged>;
3793 const char tfecfFuncName[] =
"get1dCopy: ";
3798 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3799 (LDA < numRows, std::runtime_error,
3800 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3801 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3802 (numRows >
size_t (0) && numCols >
size_t (0) &&
3803 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3805 "A.size() = " << A.size () <<
", but its size must be at least "
3806 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3808 const std::pair<size_t, size_t> rowRange (0, numRows);
3809 const std::pair<size_t, size_t> colRange (0, numCols);
3811 input_view_type A_view_orig (
reinterpret_cast<IST*
> (A.getRawPtr ()),
3813 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3829 throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
3832 const bool useHostView =
view_.host_view_use_count() >=
view_.device_view_use_count();
3833 if (this->isConstantStride ()) {
3837 Kokkos::deep_copy (A_view, srcView_host);
3841 Kokkos::deep_copy (A_view, srcView_device);
3845 for (
size_t j = 0; j < numCols; ++j) {
3847 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3851 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3853 Kokkos::deep_copy (dstColView, srcColView_host);
3856 auto srcColView_device = Kokkos::subview (srcView_device, rowRange, srcCol);
3858 Kokkos::deep_copy (dstColView, srcColView_device);
3866 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3869 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3872 const char tfecfFuncName[] =
"get2dCopy: ";
3876 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3877 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3878 std::runtime_error,
"Input array of pointers must contain as many "
3879 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3880 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3882 if (numRows != 0 && numCols != 0) {
3884 for (
size_t j = 0; j < numCols; ++j) {
3885 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3886 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3887 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3888 "the input array of arrays is not long enough to fit all entries in "
3889 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3890 <<
" < getLocalLength() = " << numRows <<
".");
3894 for (
size_t j = 0; j < numCols; ++j) {
3895 Teuchos::RCP<const V> X_j = this->
getVector (j);
3896 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3897 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3903 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3904 Teuchos::ArrayRCP<const Scalar>
3909 return Teuchos::null;
3911 TEUCHOS_TEST_FOR_EXCEPTION(
3913 "get1dView: This MultiVector does not have constant stride, so it is "
3914 "not possible to view its data as a single array. You may check "
3915 "whether a MultiVector has constant stride by calling "
3916 "isConstantStride().");
3921 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3922 Kokkos::Compat::persistingView (X_lcl);
3923 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3927 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3928 Teuchos::ArrayRCP<Scalar>
3933 return Teuchos::null;
3936 TEUCHOS_TEST_FOR_EXCEPTION
3938 "get1dViewNonConst: This MultiVector does not have constant stride, "
3939 "so it is not possible to view its data as a single array. You may "
3940 "check whether a MultiVector has constant stride by calling "
3941 "isConstantStride().");
3943 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3944 Kokkos::Compat::persistingView (X_lcl);
3945 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3949 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3950 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3962 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3964 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3965 for (
size_t j = 0; j < numCols; ++j) {
3966 const size_t col = this->isConstantStride () ? j : this->
whichVectors_[j];
3967 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3968 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3969 Kokkos::Compat::persistingView (X_lcl_j);
3970 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3975 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3980 return view_.getHostView(s);
3983 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3988 return view_.getHostView(s);
3991 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3996 return view_.getHostView(s);
3999 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4004 return view_.getDeviceView(s);
4007 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4012 return view_.getDeviceView(s);
4015 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4020 return view_.getDeviceView(s);
4023 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4024 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
4039 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
4041 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
4042 for (
size_t j = 0; j < numCols; ++j) {
4043 const size_t col = this->isConstantStride () ? j : this->
whichVectors_[j];
4044 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
4045 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
4046 Kokkos::Compat::persistingView (X_lcl_j);
4047 views[j] = Teuchos::arcp_reinterpret_cast<const Scalar> (X_lcl_j_arcp);
4052 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4056 Teuchos::ETransp transB,
4057 const Scalar& alpha,
4063 using Teuchos::CONJ_TRANS;
4064 using Teuchos::NO_TRANS;
4065 using Teuchos::TRANS;
4068 using Teuchos::rcpFromRef;
4070 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
4072 using STS = Teuchos::ScalarTraits<Scalar>;
4074 const char tfecfFuncName[] =
"multiply: ";
4075 ProfilingRegion region (
"Tpetra::MV::multiply");
4107 const bool C_is_local = ! this->isDistributed ();
4117 auto myMap = this->
getMap ();
4118 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
4119 using Teuchos::REDUCE_MIN;
4120 using Teuchos::reduceAll;
4121 using Teuchos::outArg;
4123 auto comm = myMap->getComm ();
4124 const size_t A_nrows =
4126 const size_t A_ncols =
4128 const size_t B_nrows =
4130 const size_t B_ncols =
4136 const int myRank = comm->getRank ();
4137 std::ostringstream errStrm;
4139 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
4141 <<
"." << std::endl;
4144 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
4146 <<
"." << std::endl;
4148 if (A_ncols != B_nrows) {
4149 errStrm <<
"Proc " << myRank <<
": A_ncols="
4150 << A_ncols <<
" != B_nrows=" << B_nrows
4151 <<
"." << std::endl;
4154 const int lclGood = lclBad ? 0 : 1;
4156 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
4158 std::ostringstream os;
4161 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4162 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
4163 "least one process in this object's communicator." << std::endl
4165 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
4167 << (transA == Teuchos::TRANS ?
"^T" :
4168 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4169 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
4171 << (transA == Teuchos::TRANS ?
"^T" :
4172 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4173 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
4184 const bool Case1 = C_is_local && A_is_local && B_is_local;
4186 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
4187 transA != NO_TRANS &&
4190 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
4194 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4195 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
4196 "Multiplication of op(A) and op(B) into *this is not a "
4197 "supported use case.");
4199 if (beta != STS::zero () && Case2) {
4205 const int myRank = this->
getMap ()->getComm ()->getRank ();
4207 beta_local = ATS::zero ();
4217 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
4219 C_tmp = rcp (
this,
false);
4222 RCP<const MV> A_tmp;
4224 A_tmp = rcp (
new MV (A, Teuchos::Copy));
4226 A_tmp = rcpFromRef (A);
4229 RCP<const MV> B_tmp;
4231 B_tmp = rcp (
new MV (B, Teuchos::Copy));
4233 B_tmp = rcpFromRef (B);
4236 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4237 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
4238 ! A_tmp->isConstantStride (), std::logic_error,
4239 "Failed to make temporary constant-stride copies of MultiVectors.");
4242 const LO A_lclNumRows = A_tmp->getLocalLength ();
4243 const LO A_numVecs = A_tmp->getNumVectors ();
4244 auto A_lcl = A_tmp->getLocalViewDevice(Access::ReadOnly);
4245 auto A_sub = Kokkos::subview (A_lcl,
4246 std::make_pair (LO (0), A_lclNumRows),
4247 std::make_pair (LO (0), A_numVecs));
4250 const LO B_lclNumRows = B_tmp->getLocalLength ();
4251 const LO B_numVecs = B_tmp->getNumVectors ();
4252 auto B_lcl = B_tmp->getLocalViewDevice(Access::ReadOnly);
4253 auto B_sub = Kokkos::subview (B_lcl,
4254 std::make_pair (LO (0), B_lclNumRows),
4255 std::make_pair (LO (0), B_numVecs));
4257 const LO C_lclNumRows = C_tmp->getLocalLength ();
4258 const LO C_numVecs = C_tmp->getNumVectors ();
4260 auto C_lcl = C_tmp->getLocalViewDevice(Access::ReadWrite);
4261 auto C_sub = Kokkos::subview (C_lcl,
4262 std::make_pair (LO (0), C_lclNumRows),
4263 std::make_pair (LO (0), C_numVecs));
4264 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4265 (transA == Teuchos::TRANS ?
'T' :
'C'));
4266 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4267 (transB == Teuchos::TRANS ?
'T' :
'C'));
4270 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
4272 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4281 A_tmp = Teuchos::null;
4282 B_tmp = Teuchos::null;
4290 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4299 using Kokkos::subview;
4300 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4305 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4307 std::runtime_error,
"MultiVectors do not have the same local length.");
4308 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4309 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4310 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4323 KokkosBlas::mult (scalarThis,
4326 subview (A_view, ALL (), 0),
4330 for (
size_t j = 0; j < numVecs; ++j) {
4333 KokkosBlas::mult (scalarThis,
4334 subview (this_view, ALL (), C_col),
4336 subview (A_view, ALL (), 0),
4337 subview (B_view, ALL (), B_col));
4342 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4349 ProfilingRegion region (
"Tpetra::MV::reduce");
4351 const auto map = this->
getMap ();
4352 if (map.get () ==
nullptr) {
4355 const auto comm = map->getComm ();
4356 if (comm.get () ==
nullptr) {
4363 if (changed_on_device) {
4369 allReduceView (X_lcl, X_lcl, *comm);
4373 allReduceView (X_lcl, X_lcl, *comm);
4377 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4385 const LocalOrdinal minLocalIndex = this->
getMap()->getMinLocalIndex();
4386 const LocalOrdinal maxLocalIndex = this->
getMap()->getMaxLocalIndex();
4387 TEUCHOS_TEST_FOR_EXCEPTION(
4388 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4390 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4391 <<
" is invalid. The range of valid row indices on this process "
4392 << this->
getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4393 <<
", " << maxLocalIndex <<
"].");
4394 TEUCHOS_TEST_FOR_EXCEPTION(
4395 vectorIndexOutOfRange(col),
4397 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4398 <<
" of the multivector is invalid.");
4403 view (lclRow, colInd) = ScalarValue;
4407 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4416 const LocalOrdinal minLocalIndex = this->
getMap()->getMinLocalIndex();
4417 const LocalOrdinal maxLocalIndex = this->
getMap()->getMaxLocalIndex();
4418 TEUCHOS_TEST_FOR_EXCEPTION(
4419 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4421 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4422 <<
" is invalid. The range of valid row indices on this process "
4423 << this->
getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4424 <<
", " << maxLocalIndex <<
"].");
4425 TEUCHOS_TEST_FOR_EXCEPTION(
4426 vectorIndexOutOfRange(col),
4428 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4429 <<
" of the multivector is invalid.");
4436 Kokkos::atomic_add (& (view(lclRow, colInd)), value);
4439 view(lclRow, colInd) += value;
4444 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4453 const LocalOrdinal lclRow = this->
map_->getLocalElement (gblRow);
4456 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4457 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4458 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4460 "Global row index " << gblRow <<
" is not present on this process "
4461 << this->
getMap ()->getComm ()->getRank () <<
".");
4462 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4463 (this->vectorIndexOutOfRange (col), std::runtime_error,
4464 "Vector index " << col <<
" of the MultiVector is invalid.");
4470 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4480 const LocalOrdinal lclRow = this->
map_->getLocalElement (globalRow);
4483 TEUCHOS_TEST_FOR_EXCEPTION(
4484 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4486 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4487 <<
" is not present on this process "
4488 << this->
getMap ()->getComm ()->getRank () <<
".");
4489 TEUCHOS_TEST_FOR_EXCEPTION(
4490 vectorIndexOutOfRange(col),
4492 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4493 <<
" of the multivector is invalid.");
4499 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4501 Teuchos::ArrayRCP<T>
4507 typename dual_view_type::array_layout,
4510 col_dual_view_type X_col =
4511 Kokkos::subview (
view_, Kokkos::ALL (), col);
4512 return Kokkos::Compat::persistingView (X_col.d_view);
4515 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4519 return view_.need_sync_host ();
4522 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4526 return view_.need_sync_device ();
4529 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4534 using Teuchos::TypeNameTraits;
4536 std::ostringstream out;
4537 out <<
"\"" << className <<
"\": {";
4538 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4539 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4540 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4541 <<
", Node" << Node::name ()
4543 if (this->getObjectLabel () !=
"") {
4544 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4547 if (className !=
"Tpetra::Vector") {
4549 <<
", isConstantStride: " << this->isConstantStride ();
4551 if (this->isConstantStride ()) {
4552 out <<
", columnStride: " << this->
getStride ();
4559 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4569 template<
typename ViewType>
4570 void print_vector(Teuchos::FancyOStream & out,
const char* prefix,
const ViewType& v)
4573 static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
4574 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
4575 static_assert(ViewType::rank == 2,
4576 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
4579 out <<
"Values("<<prefix<<
"): " << std::endl
4581 const size_t numRows = v.extent(0);
4582 const size_t numCols = v.extent(1);
4584 for (
size_t i = 0; i < numRows; ++i) {
4586 if (i + 1 < numRows) {
4592 for (
size_t i = 0; i < numRows; ++i) {
4593 for (
size_t j = 0; j < numCols; ++j) {
4595 if (j + 1 < numCols) {
4599 if (i + 1 < numRows) {
4608 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4613 typedef LocalOrdinal LO;
4616 if (vl <= Teuchos::VERB_LOW) {
4617 return std::string ();
4619 auto map = this->
getMap ();
4620 if (map.is_null ()) {
4621 return std::string ();
4623 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4624 auto outp = Teuchos::getFancyOStream (outStringP);
4625 Teuchos::FancyOStream& out = *outp;
4626 auto comm = map->getComm ();
4627 const int myRank = comm->getRank ();
4628 const int numProcs = comm->getSize ();
4630 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4631 Teuchos::OSTab tab1 (out);
4635 out <<
"Local number of rows: " << lclNumRows << endl;
4637 if (vl > Teuchos::VERB_MEDIUM) {
4641 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4643 if (this->isConstantStride ()) {
4644 out <<
"Column stride: " << this->
getStride () << endl;
4647 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4655 auto X_dev =
view_.getDeviceCopy();
4656 auto X_host =
view_.getHostCopy();
4658 if(X_dev.data() == X_host.data()) {
4660 Details::print_vector(out,
"unified",X_host);
4663 Details::print_vector(out,
"host",X_host);
4664 Details::print_vector(out,
"dev",X_dev);
4669 return outStringP->str ();
4672 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4676 const std::string& className,
4677 const Teuchos::EVerbosityLevel verbLevel)
const
4679 using Teuchos::TypeNameTraits;
4680 using Teuchos::VERB_DEFAULT;
4681 using Teuchos::VERB_NONE;
4682 using Teuchos::VERB_LOW;
4684 const Teuchos::EVerbosityLevel vl =
4685 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4687 if (vl == VERB_NONE) {
4694 auto map = this->
getMap ();
4695 if (map.is_null ()) {
4698 auto comm = map->getComm ();
4699 if (comm.is_null ()) {
4703 const int myRank = comm->getRank ();
4712 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4716 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4717 out <<
"\"" << className <<
"\":" << endl;
4718 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4720 out <<
"Template parameters:" << endl;
4721 Teuchos::OSTab tab2 (out);
4722 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4723 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4724 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4725 <<
"Node: " << Node::name () << endl;
4727 if (this->getObjectLabel () !=
"") {
4728 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4731 if (className !=
"Tpetra::Vector") {
4732 out <<
"Number of columns: " << this->
getNumVectors () << endl;
4739 if (vl > VERB_LOW) {
4745 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4748 describe (Teuchos::FancyOStream &out,
4749 const Teuchos::EVerbosityLevel verbLevel)
const
4751 this->
describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4754 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4762 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4768 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4770 TEUCHOS_TEST_FOR_EXCEPTION
4772 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4773 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4774 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4775 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4776 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4778 TEUCHOS_TEST_FOR_EXCEPTION
4780 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4781 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4782 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4797 if (src_last_updated_on_host) {
4800 this->isConstantStride (),
4802 this->whichVectors_ (),
4808 this->isConstantStride (),
4810 this->whichVectors_ (),
4815 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4817 Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
4830 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4846 template <
class ST,
class LO,
class GO,
class NT>
4849 std::swap(mv.
map_, this->map_);
4850 std::swap(mv.
view_, this->view_);
4854#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4855 template <
class ST,
class LO,
class GO,
class NT>
4858 const Teuchos::SerialDenseMatrix<int, ST>& src)
4862 using IST =
typename MV::impl_scalar_type;
4863 using input_view_type =
4864 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4865 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4866 using pair_type = std::pair<LO, LO>;
4868 const LO numRows =
static_cast<LO
> (src.numRows ());
4869 const LO numCols =
static_cast<LO
> (src.numCols ());
4871 TEUCHOS_TEST_FOR_EXCEPTION
4874 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4875 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4877 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4879 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4880 input_view_type src_orig (src_raw, src.stride (), numCols);
4881 input_view_type src_view =
4882 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4884 constexpr bool src_isConstantStride =
true;
4885 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4889 src_isConstantStride,
4890 getMultiVectorWhichVectors (dst),
4894 template <
class ST,
class LO,
class GO,
class NT>
4896 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4901 using IST =
typename MV::impl_scalar_type;
4902 using output_view_type =
4903 Kokkos::View<IST**, Kokkos::LayoutLeft,
4904 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4905 using pair_type = std::pair<LO, LO>;
4907 const LO numRows =
static_cast<LO
> (dst.numRows ());
4908 const LO numCols =
static_cast<LO
> (dst.numCols ());
4910 TEUCHOS_TEST_FOR_EXCEPTION
4913 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4914 << src.
getMap ()->getComm ()->getRank () <<
", src is "
4916 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4918 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4919 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4921 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4923 constexpr bool dst_isConstantStride =
true;
4924 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4927 localDeepCopy (dst_view,
4929 dst_isConstantStride,
4932 getMultiVectorWhichVectors (src));
4936 template <
class Scalar,
class LO,
class GO,
class NT>
4937 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4942 return Teuchos::rcp (
new MV (map, numVectors));
4945 template <
class ST,
class LO,
class GO,
class NT>
4963#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4964# define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4965 template class MultiVector< SCALAR , LO , GO , NODE >; \
4966 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4967 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4968 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4969 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4972# define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4973 template class MultiVector< SCALAR , LO , GO , NODE >; \
4974 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4979#define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
4981 template Teuchos::RCP< MultiVector< SO , LO , GO , NODE > > \
4982 MultiVector< SI , LO , GO , NODE >::convert< SO > () const;
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
void fill(const ExecutionSpace &execSpace, const ViewType &X, const ValueType &alpha, const IndexType numRows, const IndexType numCols)
Fill the entries of the given 1-D or 2-D Kokkos::View with the given scalar value alpha.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::isInterComm.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Teuchos::RCP< const map_type > map_
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
bool isDistributed() const
Whether this is a globally distributed object.
A parallel distribution of indices over processes.
One or more distributed dense vectors.
void reciprocal(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,...
void normInf(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the infinity-norm of each vector (column), storing the result in a host View.
void get1dCopy(const Teuchos::ArrayView< Scalar > &A, const size_t LDA) const
Fill the given array with a copy of this multivector's local values.
size_t getStride() const
Stride between columns in the multivector.
typename Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool areRemoteLIDsContiguous=false, const CombineMode CM=INSERT) override
Reallocate imports_ if needed.
void reduce()
Sum values of a locally replicated multivector across all processes.
virtual bool checkSizes(const SrcDistObject &sourceObj) override
Whether data redistribution between sourceObj and this object is legal.
void randomize()
Set all values in the multivector to pseudorandom numbers.
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void dot(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayView< dot_type > &dots) const
Compute the dot product of each corresponding pair of vectors (columns) in A and B.
void describeImpl(Teuchos::FancyOStream &out, const std::string &className, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Implementation of describe() for this class, and its subclass Vector.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Map and their communicator.
Teuchos::ArrayRCP< Scalar > get1dViewNonConst()
Nonconst persisting (1-D) view of this multivector's local values.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
void meanValue(const Teuchos::ArrayView< impl_scalar_type > &means) const
Compute mean (average) value of each column.
std::string descriptionImpl(const std::string &className) const
Implementation of description() for this class, and its subclass Vector.
void multiply(Teuchos::ETransp transA, Teuchos::ETransp transB, const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Scalar &beta)
Matrix-matrix multiplication: this = beta*this + alpha*op(A)*op(B).
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
wrapped_dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
void replaceLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using local (row) index.
void swap(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &mv)
Swap contents of mv with contents of *this.
size_t getLocalLength() const
Local number of rows on the calling process.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subCopy(const Teuchos::Range1D &colRng) const
Return a MultiVector with copies of selected columns.
Teuchos::ArrayRCP< const Scalar > getData(size_t j) const
Const view of the local values in a particular vector of this multivector.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
void norm2(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the two-norm of each vector (column), storing the result in a host View.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
Teuchos::ArrayRCP< const Scalar > get1dView() const
Const persisting (1-D) view of this multivector's local values.
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
Teuchos::ArrayRCP< T > getSubArrayRCP(Teuchos::ArrayRCP< T > arr, size_t j) const
Persisting view of j-th column in the given ArrayRCP.
void replaceGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using global row index.
void elementWiseMultiply(Scalar scalarAB, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarThis)
Multiply a Vector A elementwise by a MultiVector B.
size_t getNumVectors() const
Number of columns in the multivector.
Map< LocalOrdinal, GlobalOrdinal, Node > map_type
The type of the Map specialization used by this class.
virtual size_t constantNumberOfPackets() const override
Number of packets to send per LID.
void sumIntoLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &val, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using local row index.
void replaceMap(const Teuchos::RCP< const map_type > &map)
Replace the underlying Map in place.
Teuchos::RCP< MultiVector< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another MultiVector with the same entries, but converted to a different Scalar type T.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
void abs(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise absolute values of input Multi-vector in target: A = abs(this).
void norm1(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the one-norm of each vector (column), storing the result in a host view.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print the object with the given verbosity level to a FancyOStream.
void get2dCopy(const Teuchos::ArrayView< const Teuchos::ArrayView< Scalar > > &ArrayOfPtrs) const
Fill the given array with a copy of this multivector's local values.
void sumIntoGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using global row index.
bool aliases(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &other) const
Whether this multivector's memory might alias other. This is conservative: if either this or other is...
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subViewNonConst(const Teuchos::Range1D &colRng)
Return a MultiVector with views of selected columns.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Teuchos::ArrayRCP< Scalar > getDataNonConst(size_t j)
View of the local values in a particular vector of this multivector.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getOrigNumLocalCols() const
"Original" number of columns in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< Scalar > > get2dViewNonConst()
Return non-const persisting pointers to values.
virtual std::string description() const override
A simple one-line description of this object.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetViewNonConst(const Teuchos::RCP< const map_type > &subMap, const size_t offset)
Return a nonconst view of a subset of rows.
std::string localDescribeToString(const Teuchos::EVerbosityLevel vl) const
Print the calling process' verbose describe() information to the returned string.
bool isSameSize(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &vec) const
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< const Scalar > > get2dView() const
Return const persisting pointers to values.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
base_type::impl_scalar_type impl_scalar_type
base_type::dot_type dot_type
Implementation details of Tpetra.
Nonmember function that computes a residual Computes R = B - A * X.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
void localDeepCopy(const DstViewType &dst, const SrcViewType &src, const bool dstConstStride, const bool srcConstStride, const DstWhichVecsType &dstWhichVecs, const SrcWhichVecsType &srcWhichVecs)
Implementation of Tpetra::MultiVector deep copy of local data.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ ADD_ASSIGN
Accumulate new values into existing values (may not be supported in all classes).
@ INSERT
Insert new values that don't currently exist.
Traits class for packing / unpacking data of type T.