Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockMultiVector_def.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
41#define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
42
44#include "Tpetra_BlockView.hpp"
45#include "Teuchos_OrdinalTraits.hpp"
46
47
48namespace Tpetra {
49
50template<class Scalar, class LO, class GO, class Node>
57
58template<class Scalar, class LO, class GO, class Node>
59Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
62{
64 const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
65 TEUCHOS_TEST_FOR_EXCEPTION(
66 src_bmv == nullptr, std::invalid_argument, "Tpetra::"
67 "BlockMultiVector: The source object of an Import or Export to a "
68 "BlockMultiVector, must also be a BlockMultiVector.");
69 return Teuchos::rcp (src_bmv, false); // nonowning RCP
70}
71
72template<class Scalar, class LO, class GO, class Node>
75 const Teuchos::DataAccess copyOrView) :
76 dist_object_type (in),
77 meshMap_ (in.meshMap_),
78 pointMap_ (in.pointMap_),
79 mv_ (in.mv_, copyOrView),
80 blockSize_ (in.blockSize_)
81{}
82
83template<class Scalar, class LO, class GO, class Node>
85BlockMultiVector (const map_type& meshMap,
86 const LO blockSize,
87 const LO numVecs) :
88 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
89 meshMap_ (meshMap),
90 pointMap_ (makePointMapRCP (meshMap, blockSize)),
91 mv_ (pointMap_, numVecs), // nonowning RCP is OK, since pointMap_ won't go away
92 blockSize_ (blockSize)
93{}
94
95template<class Scalar, class LO, class GO, class Node>
97BlockMultiVector (const map_type& meshMap,
98 const map_type& pointMap,
99 const LO blockSize,
100 const LO numVecs) :
101 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
102 meshMap_ (meshMap),
103 pointMap_ (new map_type(pointMap)),
104 mv_ (pointMap_, numVecs),
105 blockSize_ (blockSize)
106{}
107
108template<class Scalar, class LO, class GO, class Node>
110BlockMultiVector (const mv_type& X_mv,
111 const map_type& meshMap,
112 const LO blockSize) :
113 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
114 meshMap_ (meshMap),
115 blockSize_ (blockSize)
116{
117 using Teuchos::RCP;
118
119 if (X_mv.getCopyOrView () == Teuchos::View) {
120 // The input MultiVector has view semantics, so assignment just
121 // does a shallow copy.
122 mv_ = X_mv;
123 }
124 else if (X_mv.getCopyOrView () == Teuchos::Copy) {
125 // The input MultiVector has copy semantics. We can't change
126 // that, but we can make a view of the input MultiVector and
127 // change the view to have view semantics. (That sounds silly;
128 // shouldn't views always have view semantics? but remember that
129 // "view semantics" just governs the default behavior of the copy
130 // constructor and assignment operator.)
131 RCP<const mv_type> X_view_const;
132 const size_t numCols = X_mv.getNumVectors ();
133 if (numCols == 0) {
134 Teuchos::Array<size_t> cols (0); // view will have zero columns
135 X_view_const = X_mv.subView (cols ());
136 } else { // Range1D is an inclusive range
137 X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
138 }
139 TEUCHOS_TEST_FOR_EXCEPTION(
140 X_view_const.is_null (), std::logic_error, "Tpetra::"
141 "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
142 "should never happen. Please report this bug to the Tpetra developers.");
143
144 // It's perfectly OK to cast away const here. Those view methods
145 // should be marked const anyway, because views can't change the
146 // allocation (just the entries themselves).
147 RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
148 TEUCHOS_TEST_FOR_EXCEPTION(
149 X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
150 "BlockMultiVector constructor: We just set a MultiVector "
151 "to have view semantics, but it claims that it doesn't have view "
152 "semantics. This should never happen. "
153 "Please report this bug to the Tpetra developers.");
154 mv_ = *X_view; // MultiVector::operator= does a shallow copy here
155 }
156
157 // At this point, mv_ has been assigned, so we can ignore X_mv.
158 Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
159 pointMap_ = pointMap;
160
161}
162
163template<class Scalar, class LO, class GO, class Node>
166 const map_type& newMeshMap,
167 const map_type& newPointMap,
168 const size_t offset) :
169 dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
170 meshMap_ (newMeshMap),
171 pointMap_ (new map_type(newPointMap)),
172 mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
173 blockSize_ (X.getBlockSize ())
174{}
175
176template<class Scalar, class LO, class GO, class Node>
179 const map_type& newMeshMap,
180 const size_t offset) :
181 dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
182 meshMap_ (newMeshMap),
183 pointMap_ (makePointMapRCP (newMeshMap, X.getBlockSize ())),
184 mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
185 blockSize_ (X.getBlockSize ())
186{}
187
188template<class Scalar, class LO, class GO, class Node>
191 dist_object_type (Teuchos::null),
192 blockSize_ (0)
193{}
194
195template<class Scalar, class LO, class GO, class Node>
198makePointMap (const map_type& meshMap, const LO blockSize)
199{
200typedef Tpetra::global_size_t GST;
201 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
202
203 const GST gblNumMeshMapInds =
204 static_cast<GST> (meshMap.getGlobalNumElements ());
205 const size_t lclNumMeshMapIndices =
206 static_cast<size_t> (meshMap.getLocalNumElements ());
207 const GST gblNumPointMapInds =
208 gblNumMeshMapInds * static_cast<GST> (blockSize);
209 const size_t lclNumPointMapInds =
210 lclNumMeshMapIndices * static_cast<size_t> (blockSize);
211 const GO indexBase = meshMap.getIndexBase ();
212
213 if (meshMap.isContiguous ()) {
214 return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
215 meshMap.getComm ());
216 }
217 else {
218 // "Hilbert's Hotel" trick: multiply each process' GIDs by
219 // blockSize, and fill in. That ensures correctness even if the
220 // mesh Map is overlapping.
221 Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList ();
222 const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
223 Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
224 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
225 const GO meshGid = lclMeshGblInds[g];
226 const GO pointGidStart = indexBase +
227 (meshGid - indexBase) * static_cast<GO> (blockSize);
228 const size_type offset = g * static_cast<size_type> (blockSize);
229 for (LO k = 0; k < blockSize; ++k) {
230 const GO pointGid = pointGidStart + static_cast<GO> (k);
231 lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
232 }
233 }
234 return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
235 meshMap.getComm ());
236
237 }
238}
239
240
241template<class Scalar, class LO, class GO, class Node>
242Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::map_type>
244makePointMapRCP (const map_type& meshMap, const LO blockSize)
245{
246typedef Tpetra::global_size_t GST;
247 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
248
249 const GST gblNumMeshMapInds =
250 static_cast<GST> (meshMap.getGlobalNumElements ());
251 const size_t lclNumMeshMapIndices =
252 static_cast<size_t> (meshMap.getLocalNumElements ());
253 const GST gblNumPointMapInds =
254 gblNumMeshMapInds * static_cast<GST> (blockSize);
255 const size_t lclNumPointMapInds =
256 lclNumMeshMapIndices * static_cast<size_t> (blockSize);
257 const GO indexBase = meshMap.getIndexBase ();
258
259 if (meshMap.isContiguous ()) {
260 return Teuchos::rcp(new map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
261 meshMap.getComm ()));
262 }
263 else {
264 // "Hilbert's Hotel" trick: multiply each process' GIDs by
265 // blockSize, and fill in. That ensures correctness even if the
266 // mesh Map is overlapping.
267 Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList ();
268 const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
269 Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
270 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
271 const GO meshGid = lclMeshGblInds[g];
272 const GO pointGidStart = indexBase +
273 (meshGid - indexBase) * static_cast<GO> (blockSize);
274 const size_type offset = g * static_cast<size_type> (blockSize);
275 for (LO k = 0; k < blockSize; ++k) {
276 const GO pointGid = pointGidStart + static_cast<GO> (k);
277 lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
278 }
279 }
280 return Teuchos::rcp(new map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
281 meshMap.getComm ()));
282
283 }
284}
285
286template<class Scalar, class LO, class GO, class Node>
287void
289replaceLocalValuesImpl (const LO localRowIndex,
290 const LO colIndex,
291 const Scalar vals[])
292{
293 auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
294 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
295 getBlockSize ());
296 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
297 using exec_space = typename device_type::execution_space;
298 Kokkos::deep_copy (exec_space(), X_dst, X_src);
299}
300
301
302template<class Scalar, class LO, class GO, class Node>
303bool
305replaceLocalValues (const LO localRowIndex,
306 const LO colIndex,
307 const Scalar vals[])
308{
309 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
310 return false;
311 } else {
312 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
313 return true;
314 }
315}
316
317template<class Scalar, class LO, class GO, class Node>
318bool
320replaceGlobalValues (const GO globalRowIndex,
321 const LO colIndex,
322 const Scalar vals[])
323{
324 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
325 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
326 return false;
327 } else {
328 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
329 return true;
330 }
331}
332
333template<class Scalar, class LO, class GO, class Node>
334void
336sumIntoLocalValuesImpl (const LO localRowIndex,
337 const LO colIndex,
338 const Scalar vals[])
339{
340 auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
341 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
342 getBlockSize ());
343 AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
344}
345
346template<class Scalar, class LO, class GO, class Node>
347bool
349sumIntoLocalValues (const LO localRowIndex,
350 const LO colIndex,
351 const Scalar vals[])
352{
353 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
354 return false;
355 } else {
356 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
357 return true;
358 }
359}
360
361template<class Scalar, class LO, class GO, class Node>
362bool
364sumIntoGlobalValues (const GO globalRowIndex,
365 const LO colIndex,
366 const Scalar vals[])
367{
368 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
369 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
370 return false;
371 } else {
372 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
373 return true;
374 }
375}
376
377
378template<class Scalar, class LO, class GO, class Node>
379typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
381getLocalBlockHost (const LO localRowIndex,
382 const LO colIndex,
383 const Access::ReadOnlyStruct) const
384{
385 if (!isValidLocalMeshIndex(localRowIndex)) {
386 return const_little_host_vec_type();
387 } else {
388 const size_t blockSize = getBlockSize();
389 auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
390 LO startRow = localRowIndex*blockSize;
391 LO endRow = startRow + blockSize;
392 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
393 colIndex);
394 }
395}
396
397template<class Scalar, class LO, class GO, class Node>
398typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
400getLocalBlockHost (const LO localRowIndex,
401 const LO colIndex,
402 const Access::OverwriteAllStruct)
403{
404 if (!isValidLocalMeshIndex(localRowIndex)) {
405 return little_host_vec_type();
406 } else {
407 const size_t blockSize = getBlockSize();
408 auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
409 LO startRow = localRowIndex*blockSize;
410 LO endRow = startRow + blockSize;
411 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
412 colIndex);
413 }
414}
415
416template<class Scalar, class LO, class GO, class Node>
417typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
419getLocalBlockHost (const LO localRowIndex,
420 const LO colIndex,
421 const Access::ReadWriteStruct)
422{
423 if (!isValidLocalMeshIndex(localRowIndex)) {
424 return little_host_vec_type();
425 } else {
426 const size_t blockSize = getBlockSize();
427 auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
428 LO startRow = localRowIndex*blockSize;
429 LO endRow = startRow + blockSize;
430 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
431 colIndex);
432 }
433}
434
435template<class Scalar, class LO, class GO, class Node>
436Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
438getMultiVectorFromSrcDistObject (const Tpetra::SrcDistObject& src)
439{
440 using Teuchos::rcp;
441 using Teuchos::rcpFromRef;
442
443 // The source object of an Import or Export must be a
444 // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
445 // them in that order; one must succeed. Note that the target of
446 // the Import or Export calls checkSizes in DistObject's doTransfer.
447 typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
448 const this_BMV_type* srcBlkVec = dynamic_cast<const this_BMV_type*> (&src);
449 if (srcBlkVec == nullptr) {
450 const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
451 if (srcMultiVec == nullptr) {
452 // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
453 // currently does a shallow copy by default. This is why we
454 // return by RCP, rather than by value.
455 return rcp (new mv_type ());
456 } else { // src is a MultiVector
457 return rcp (srcMultiVec, false); // nonowning RCP
458 }
459 } else { // src is a BlockMultiVector
460 return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
461 }
462}
463
464template<class Scalar, class LO, class GO, class Node>
467{
468 return ! getMultiVectorFromSrcDistObject (src).is_null ();
469}
470
471template<class Scalar, class LO, class GO, class Node>
474(const SrcDistObject& src,
475 const size_t numSameIDs,
476 const Kokkos::DualView<const local_ordinal_type*,
477 buffer_device_type>& permuteToLIDs,
478 const Kokkos::DualView<const local_ordinal_type*,
479 buffer_device_type>& permuteFromLIDs,
480 const CombineMode CM)
481{
482 TEUCHOS_TEST_FOR_EXCEPTION
483 (true, std::logic_error,
484 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
485 "instead, create a point importer using makePointMap function.");
486}
487
488template<class Scalar, class LO, class GO, class Node>
491(const SrcDistObject& src,
492 const Kokkos::DualView<const local_ordinal_type*,
493 buffer_device_type>& exportLIDs,
494 Kokkos::DualView<packet_type*,
495 buffer_device_type>& exports,
496 Kokkos::DualView<size_t*,
497 buffer_device_type> numPacketsPerLID,
498 size_t& constantNumPackets)
499{
500 TEUCHOS_TEST_FOR_EXCEPTION
501 (true, std::logic_error,
502 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
503 "instead, create a point importer using makePointMap function.");
504}
505
506template<class Scalar, class LO, class GO, class Node>
509(const Kokkos::DualView<const local_ordinal_type*,
510 buffer_device_type>& importLIDs,
511 Kokkos::DualView<packet_type*,
512 buffer_device_type> imports,
513 Kokkos::DualView<size_t*,
514 buffer_device_type> numPacketsPerLID,
515 const size_t constantNumPackets,
516 const CombineMode combineMode)
517{
518 TEUCHOS_TEST_FOR_EXCEPTION
519 (true, std::logic_error,
520 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
521 "instead, create a point importer using makePointMap function.");
522}
523
524template<class Scalar, class LO, class GO, class Node>
526isValidLocalMeshIndex (const LO meshLocalIndex) const
527{
528 return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
529 meshMap_.isNodeLocalElement (meshLocalIndex);
530}
531
532template<class Scalar, class LO, class GO, class Node>
534putScalar (const Scalar& val)
535{
536 mv_.putScalar (val);
537}
538
539template<class Scalar, class LO, class GO, class Node>
541scale (const Scalar& val)
542{
543 mv_.scale (val);
544}
545
546template<class Scalar, class LO, class GO, class Node>
548update (const Scalar& alpha,
550 const Scalar& beta)
551{
552 mv_.update (alpha, X.mv_, beta);
553}
554
555namespace Impl {
556// Y := alpha * D * X
557template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
558struct BlockWiseMultiply {
559 typedef typename ViewD::size_type Size;
560
561private:
562 typedef typename ViewD::device_type Device;
563 typedef typename ViewD::non_const_value_type ImplScalar;
564 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
565
566 template <typename View>
567 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
568 typename View::device_type, Unmanaged>;
569 typedef UnmanagedView<ViewY> UnMViewY;
570 typedef UnmanagedView<ViewD> UnMViewD;
571 typedef UnmanagedView<ViewX> UnMViewX;
572
573 const Size block_size_;
574 Scalar alpha_;
575 UnMViewY Y_;
576 UnMViewD D_;
577 UnMViewX X_;
578
579public:
580 BlockWiseMultiply (const Size block_size, const Scalar& alpha,
581 const ViewY& Y, const ViewD& D, const ViewX& X)
582 : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
583 {}
584
585 KOKKOS_INLINE_FUNCTION
586 void operator() (const Size k) const {
587 const auto zero = Kokkos::ArithTraits<Scalar>::zero();
588 auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
589 const auto num_vecs = X_.extent(1);
590 for (Size i = 0; i < num_vecs; ++i) {
591 Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
592 auto X_curBlk = Kokkos::subview(X_, kslice, i);
593 auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
594 // Y_curBlk := alpha * D_curBlk * X_curBlk.
595 // Recall that GEMV does an update (+=) of the last argument.
596 Tpetra::FILL(Y_curBlk, zero);
597 Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
598 }
599 }
600};
601
602template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
603inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
604createBlockWiseMultiply (const int block_size, const Scalar& alpha,
605 const ViewY& Y, const ViewD& D, const ViewX& X) {
606 return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
607}
608
609template <typename ViewY,
610 typename Scalar,
611 typename ViewD,
612 typename ViewZ,
613 typename LO = typename ViewY::size_type>
614class BlockJacobiUpdate {
615private:
616 typedef typename ViewD::device_type Device;
617 typedef typename ViewD::non_const_value_type ImplScalar;
618 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
619
620 template <typename ViewType>
621 using UnmanagedView = Kokkos::View<typename ViewType::data_type,
622 typename ViewType::array_layout,
623 typename ViewType::device_type,
624 Unmanaged>;
625 typedef UnmanagedView<ViewY> UnMViewY;
626 typedef UnmanagedView<ViewD> UnMViewD;
627 typedef UnmanagedView<ViewZ> UnMViewZ;
628
629 const LO blockSize_;
630 UnMViewY Y_;
631 const Scalar alpha_;
632 UnMViewD D_;
633 UnMViewZ Z_;
634 const Scalar beta_;
635
636public:
637 BlockJacobiUpdate (const ViewY& Y,
638 const Scalar& alpha,
639 const ViewD& D,
640 const ViewZ& Z,
641 const Scalar& beta) :
642 blockSize_ (D.extent (1)),
643 // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
644 Y_ (Y),
645 alpha_ (alpha),
646 D_ (D),
647 Z_ (Z),
648 beta_ (beta)
649 {
650 static_assert (static_cast<int> (ViewY::rank) == 1,
651 "Y must have rank 1.");
652 static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
653 static_assert (static_cast<int> (ViewZ::rank) == 1,
654 "Z must have rank 1.");
655 // static_assert (static_cast<int> (ViewZ::rank) ==
656 // static_cast<int> (ViewY::rank),
657 // "Z must have the same rank as Y.");
658 }
659
660 KOKKOS_INLINE_FUNCTION void
661 operator() (const LO& k) const
662 {
663 using Kokkos::ALL;
664 using Kokkos::subview;
665 typedef Kokkos::pair<LO, LO> range_type;
666 typedef Kokkos::ArithTraits<Scalar> KAT;
667
668 // We only have to implement the alpha != 0 case.
669
670 auto D_curBlk = subview (D_, k, ALL (), ALL ());
671 const range_type kslice (k*blockSize_, (k+1)*blockSize_);
672
673 // Z.update (STS::one (), X, -STS::one ()); // assume this is done
674
675 auto Z_curBlk = subview (Z_, kslice);
676 auto Y_curBlk = subview (Y_, kslice);
677 // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
678 if (beta_ == KAT::zero ()) {
679 Tpetra::FILL (Y_curBlk, KAT::zero ());
680 }
681 else if (beta_ != KAT::one ()) {
682 Tpetra::SCAL (beta_, Y_curBlk);
683 }
684 Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
685 }
686};
687
688template<class ViewY,
689 class Scalar,
690 class ViewD,
691 class ViewZ,
692 class LO = typename ViewD::size_type>
693void
694blockJacobiUpdate (const ViewY& Y,
695 const Scalar& alpha,
696 const ViewD& D,
697 const ViewZ& Z,
698 const Scalar& beta)
699{
700 static_assert (Kokkos::is_view<ViewY>::value, "Y must be a Kokkos::View.");
701 static_assert (Kokkos::is_view<ViewD>::value, "D must be a Kokkos::View.");
702 static_assert (Kokkos::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
703 static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
704 "Y and Z must have the same rank.");
705 static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
706
707 const auto lclNumMeshRows = D.extent (0);
708
709#ifdef HAVE_TPETRA_DEBUG
710 // D.extent(0) is the (local) number of mesh rows.
711 // D.extent(1) is the block size. Thus, their product should be
712 // the local number of point rows, that is, the number of rows in Y.
713 const auto blkSize = D.extent (1);
714 const auto lclNumPtRows = lclNumMeshRows * blkSize;
715 TEUCHOS_TEST_FOR_EXCEPTION
716 (Y.extent (0) != lclNumPtRows, std::invalid_argument,
717 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
718 "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
719 << " = " << lclNumPtRows << ".");
720 TEUCHOS_TEST_FOR_EXCEPTION
721 (Y.extent (0) != Z.extent (0), std::invalid_argument,
722 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
723 "Z.extent(0) = " << Z.extent (0) << ".");
724 TEUCHOS_TEST_FOR_EXCEPTION
725 (Y.extent (1) != Z.extent (1), std::invalid_argument,
726 "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
727 "Z.extent(1) = " << Z.extent (1) << ".");
728#endif // HAVE_TPETRA_DEBUG
729
730 BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
731 typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
732 // lclNumMeshRows must fit in LO, else the Map would not be correct.
733 range_type range (0, static_cast<LO> (lclNumMeshRows));
734 Kokkos::parallel_for (range, functor);
735}
736
737} // namespace Impl
738
739template<class Scalar, class LO, class GO, class Node>
741blockWiseMultiply (const Scalar& alpha,
742 const Kokkos::View<const impl_scalar_type***,
743 device_type, Kokkos::MemoryUnmanaged>& D,
745{
746 using Kokkos::ALL;
747 typedef typename device_type::execution_space exec_space;
748 const LO lclNumMeshRows = meshMap_.getLocalNumElements ();
749
750 if (alpha == STS::zero ()) {
751 this->putScalar (STS::zero ());
752 }
753 else { // alpha != 0
754 const LO blockSize = this->getBlockSize ();
755 const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
756 auto X_lcl = X.mv_.getLocalViewDevice (Access::ReadOnly);
757 auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
758 auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
759
760 // Use an explicit RangePolicy with the desired execution space.
761 // Otherwise, if you just use a number, it runs on the default
762 // execution space. For example, if the default execution space
763 // is Cuda but the current execution space is Serial, using just a
764 // number would incorrectly run with Cuda.
765 Kokkos::RangePolicy<exec_space, LO> range (0, lclNumMeshRows);
766 Kokkos::parallel_for (range, bwm);
767 }
768}
769
770template<class Scalar, class LO, class GO, class Node>
772blockJacobiUpdate (const Scalar& alpha,
773 const Kokkos::View<const impl_scalar_type***,
774 device_type, Kokkos::MemoryUnmanaged>& D,
777 const Scalar& beta)
778{
779 using Kokkos::ALL;
780 using Kokkos::subview;
781 typedef impl_scalar_type IST;
782
783 const IST alphaImpl = static_cast<IST> (alpha);
784 const IST betaImpl = static_cast<IST> (beta);
785 const LO numVecs = mv_.getNumVectors ();
786
787 if (alpha == STS::zero ()) { // Y := beta * Y
788 this->scale (beta);
789 }
790 else { // alpha != 0
791 Z.update (STS::one (), X, -STS::one ());
792 for (LO j = 0; j < numVecs; ++j) {
793 auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
794 auto Z_lcl = Z.mv_.getLocalViewDevice (Access::ReadWrite);
795 auto Y_lcl_j = subview (Y_lcl, ALL (), j);
796 auto Z_lcl_j = subview (Z_lcl, ALL (), j);
797 Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
798 }
799 }
800}
801
802} // namespace Tpetra
803
804//
805// Explicit instantiation macro
806//
807// Must be expanded from within the Tpetra namespace!
808//
809#define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
810 template class BlockMultiVector< S, LO, GO, NODE >;
811
812#endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
static Teuchos::RCP< const map_type > makePointMapRCP(const map_type &meshMap, const LO blockSize)
Create and return an owning RCP to the point Map corresponding to the given mesh Map and block size.
map_type meshMap_
Mesh Map given to constructor.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization used for communication buffers.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
Tpetra::Map< LO, GO, Node > map_type
The specialization of Tpetra::Map that this class uses.
typename mv_type::device_type device_type
The Kokkos Device type.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
LO local_ordinal_type
The type of local indices.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Abstract base class for objects that can be the source of an Import or Export operation.
Namespace for new Tpetra features that are not ready for public release, but are ready for evaluation...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
size_t global_size_t
Global size_t object.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
CombineMode
Rule for combining data in an Import or Export.