Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43// clang-format off
44
45
46// mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
47// incorrect (as() is not a device function) and usually irrelevant
48// (it would only matter if LocalOrdinal were bigger than size_t on a
49// particular platform, which is unlikely).
50
51// KDD Aug 2020: In the permute/pack/unpack functors,
52// the Enabled template parameter is specialized in
53// downstream packages like Stokhos using SFINAE to provide partial
54// specializations based on the scalar type of the SrcView and DstView
55// template parameters. See #7898.
56// Do not remove it before checking with Stokhos and other specializing users.
57
58#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
59#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
60
61#include "Kokkos_Core.hpp"
62#include "Kokkos_ArithTraits.hpp"
63#include <sstream>
64#include <stdexcept>
65
66namespace Tpetra {
67namespace KokkosRefactor {
68namespace Details {
69
75namespace Impl {
76
83template<class IntegerType,
84 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
86 static KOKKOS_INLINE_FUNCTION bool
87 test (const IntegerType x,
88 const IntegerType exclusiveUpperBound);
89};
90
91// Partial specialization for the case where IntegerType IS signed.
92template<class IntegerType>
93struct OutOfBounds<IntegerType, true> {
94 static KOKKOS_INLINE_FUNCTION bool
95 test (const IntegerType x,
96 const IntegerType exclusiveUpperBound)
97 {
98 return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
99 }
100};
101
102// Partial specialization for the case where IntegerType is NOT signed.
103template<class IntegerType>
104struct OutOfBounds<IntegerType, false> {
105 static KOKKOS_INLINE_FUNCTION bool
106 test (const IntegerType x,
107 const IntegerType exclusiveUpperBound)
108 {
109 return x >= exclusiveUpperBound;
110 }
111};
112
115template<class IntegerType>
116KOKKOS_INLINE_FUNCTION bool
117outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
118{
119 return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
120}
121
122} // namespace Impl
123
124 // Functors for implementing packAndPrepare and unpackAndCombine
125 // through parallel_for
126
127 template <typename DstView, typename SrcView, typename IdxView,
128 typename Enabled = void>
129 struct PackArraySingleColumn {
130 typedef typename DstView::execution_space execution_space;
131 typedef typename execution_space::size_type size_type;
132
133 DstView dst;
134 SrcView src;
135 IdxView idx;
136 size_t col;
137
138 PackArraySingleColumn (const DstView& dst_,
139 const SrcView& src_,
140 const IdxView& idx_,
141 const size_t col_) :
142 dst(dst_), src(src_), idx(idx_), col(col_) {}
143
144 KOKKOS_INLINE_FUNCTION void
145 operator() (const size_type k) const {
146 dst(k) = src(idx(k), col);
147 }
148
149 static void
150 pack (const DstView& dst,
151 const SrcView& src,
152 const IdxView& idx,
153 const size_t col,
154 const execution_space &space)
155 {
156 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
157 Kokkos::parallel_for
158 ("Tpetra::MultiVector pack one col",
159 range_type (space, 0, idx.size ()),
160 PackArraySingleColumn (dst, src, idx, col));
161 }
162 };
163
164 template <typename DstView,
165 typename SrcView,
166 typename IdxView,
167 typename SizeType = typename DstView::execution_space::size_type,
168 typename Enabled = void>
169 class PackArraySingleColumnWithBoundsCheck {
170 private:
171 static_assert (Kokkos::is_view<DstView>::value,
172 "DstView must be a Kokkos::View.");
173 static_assert (Kokkos::is_view<SrcView>::value,
174 "SrcView must be a Kokkos::View.");
175 static_assert (Kokkos::is_view<IdxView>::value,
176 "IdxView must be a Kokkos::View.");
177 static_assert (static_cast<int> (DstView::rank) == 1,
178 "DstView must be a rank-1 Kokkos::View.");
179 static_assert (static_cast<int> (SrcView::rank) == 2,
180 "SrcView must be a rank-2 Kokkos::View.");
181 static_assert (static_cast<int> (IdxView::rank) == 1,
182 "IdxView must be a rank-1 Kokkos::View.");
183 static_assert (std::is_integral<SizeType>::value,
184 "SizeType must be a built-in integer type.");
185
186 using execution_space = typename DstView::execution_space;
187
188 public:
189 typedef SizeType size_type;
190 using value_type = size_t;
191
192 private:
193 DstView dst;
194 SrcView src;
195 IdxView idx;
196 size_type col;
197 execution_space space;
198
199 public:
200 PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
201 const SrcView& src_,
202 const IdxView& idx_,
203 const size_type col_) :
204 dst (dst_), src (src_), idx (idx_), col (col_) {}
205
206 KOKKOS_INLINE_FUNCTION void
207 operator() (const size_type k, value_type& lclErrCount) const {
208 using index_type = typename IdxView::non_const_value_type;
209
210 const index_type lclRow = idx(k);
211 if (lclRow < static_cast<index_type> (0) ||
212 lclRow >= static_cast<index_type> (src.extent (0))) {
213 ++lclErrCount;
214 }
215 else {
216 dst(k) = src(lclRow, col);
217 }
218 }
219
220 KOKKOS_INLINE_FUNCTION
221 void init (value_type& initialErrorCount) const {
222 initialErrorCount = 0;
223 }
224
225 KOKKOS_INLINE_FUNCTION void
226 join (value_type& dstErrorCount,
227 const value_type& srcErrorCount) const
228 {
229 dstErrorCount += srcErrorCount;
230 }
231
232 static void
233 pack (const DstView& dst,
234 const SrcView& src,
235 const IdxView& idx,
236 const size_type col,
237 const execution_space &space)
238 {
239 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
240 typedef typename IdxView::non_const_value_type index_type;
241
242 size_t errorCount = 0;
243 Kokkos::parallel_reduce
244 ("Tpetra::MultiVector pack one col debug only",
245 range_type (space, 0, idx.size ()),
246 PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
247 errorCount);
248
249 if (errorCount != 0) {
250 // Go back and find the out-of-bounds entries in the index
251 // array. Performance doesn't matter since we are already in
252 // an error state, so we can do this sequentially, on host.
253 auto idx_h = Kokkos::create_mirror_view (idx);
254
255 // DEEP_COPY REVIEW - NOT TESTED
256 Kokkos::deep_copy (idx_h, idx);
257
258 std::vector<index_type> badIndices;
259 const size_type numInds = idx_h.extent (0);
260 for (size_type k = 0; k < numInds; ++k) {
261 if (idx_h(k) < static_cast<index_type> (0) ||
262 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
263 badIndices.push_back (idx_h(k));
264 }
265 }
266
267 TEUCHOS_TEST_FOR_EXCEPTION
268 (errorCount != badIndices.size (), std::logic_error,
269 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
270 << " != badIndices.size() = " << badIndices.size () << ". This sho"
271 "uld never happen. Please report this to the Tpetra developers.");
272
273 std::ostringstream os;
274 os << "MultiVector single-column pack kernel had "
275 << badIndices.size () << " out-of bounds index/ices. "
276 "Here they are: [";
277 for (size_t k = 0; k < badIndices.size (); ++k) {
278 os << badIndices[k];
279 if (k + 1 < badIndices.size ()) {
280 os << ", ";
281 }
282 }
283 os << "].";
284 throw std::runtime_error (os.str ());
285 }
286 }
287 };
288
289
290 template <typename DstView, typename SrcView, typename IdxView>
291 void
292 pack_array_single_column (const DstView& dst,
293 const SrcView& src,
294 const IdxView& idx,
295 const size_t col,
296 const bool debug,
297 const typename DstView::execution_space &space)
298 {
299 static_assert (Kokkos::is_view<DstView>::value,
300 "DstView must be a Kokkos::View.");
301 static_assert (Kokkos::is_view<SrcView>::value,
302 "SrcView must be a Kokkos::View.");
303 static_assert (Kokkos::is_view<IdxView>::value,
304 "IdxView must be a Kokkos::View.");
305 static_assert (static_cast<int> (DstView::rank) == 1,
306 "DstView must be a rank-1 Kokkos::View.");
307 static_assert (static_cast<int> (SrcView::rank) == 2,
308 "SrcView must be a rank-2 Kokkos::View.");
309 static_assert (static_cast<int> (IdxView::rank) == 1,
310 "IdxView must be a rank-1 Kokkos::View.");
311
312 using execution_space = typename DstView::execution_space;
313
314 static_assert (Kokkos::SpaceAccessibility<execution_space,
315 typename DstView::memory_space>::accessible,
316 "DstView not accessible from execution space");
317 static_assert (Kokkos::SpaceAccessibility<execution_space,
318 typename SrcView::memory_space>::accessible,
319 "SrcView not accessible from execution space");
320 static_assert (Kokkos::SpaceAccessibility<execution_space,
321 typename IdxView::memory_space>::accessible,
322 "IdxView not accessible from execution space");
323
324 if (debug) {
325 typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
326 impl_type::pack (dst, src, idx, col, space);
327 }
328 else {
329 typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
330 impl_type::pack (dst, src, idx, col, space);
331 }
332 }
333
336 template <typename DstView, typename SrcView, typename IdxView>
337 void
338 pack_array_single_column (const DstView& dst,
339 const SrcView& src,
340 const IdxView& idx,
341 const size_t col,
342 const bool debug = true)
343 {
344 pack_array_single_column(dst, src, idx, col, debug, typename DstView::execution_space());
345 }
346
347 template <typename DstView, typename SrcView, typename IdxView,
348 typename Enabled = void>
349 struct PackArrayMultiColumn {
350 using execution_space = typename DstView::execution_space;
351 typedef typename execution_space::size_type size_type;
352
353 DstView dst;
354 SrcView src;
355 IdxView idx;
356 size_t numCols;
357
358 PackArrayMultiColumn (const DstView& dst_,
359 const SrcView& src_,
360 const IdxView& idx_,
361 const size_t numCols_) :
362 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
363
364 KOKKOS_INLINE_FUNCTION void
365 operator() (const size_type k) const {
366 const typename IdxView::value_type localRow = idx(k);
367 const size_t offset = k*numCols;
368 for (size_t j = 0; j < numCols; ++j) {
369 dst(offset + j) = src(localRow, j);
370 }
371 }
372
373 static void pack(const DstView& dst,
374 const SrcView& src,
375 const IdxView& idx,
376 size_t numCols,
377 const execution_space &space) {
378 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
379 Kokkos::parallel_for
380 ("Tpetra::MultiVector pack multicol const stride",
381 range_type (space, 0, idx.size ()),
382 PackArrayMultiColumn (dst, src, idx, numCols));
383 }
384 };
385
386 template <typename DstView,
387 typename SrcView,
388 typename IdxView,
389 typename SizeType = typename DstView::execution_space::size_type,
390 typename Enabled = void>
391 class PackArrayMultiColumnWithBoundsCheck {
392 public:
393 using size_type = SizeType;
394 using value_type = size_t;
395 using execution_space = typename DstView::execution_space;
396
397 private:
398 DstView dst;
399 SrcView src;
400 IdxView idx;
401 size_type numCols;
402
403 public:
404 PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
405 const SrcView& src_,
406 const IdxView& idx_,
407 const size_type numCols_) :
408 dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
409
410 KOKKOS_INLINE_FUNCTION void
411 operator() (const size_type k, value_type& lclErrorCount) const {
412 typedef typename IdxView::non_const_value_type index_type;
413
414 const index_type lclRow = idx(k);
415 if (lclRow < static_cast<index_type> (0) ||
416 lclRow >= static_cast<index_type> (src.extent (0))) {
417 ++lclErrorCount; // failed
418 }
419 else {
420 const size_type offset = k*numCols;
421 for (size_type j = 0; j < numCols; ++j) {
422 dst(offset + j) = src(lclRow, j);
423 }
424 }
425 }
426
427 KOKKOS_INLINE_FUNCTION
428 void init (value_type& initialErrorCount) const {
429 initialErrorCount = 0;
430 }
431
432 KOKKOS_INLINE_FUNCTION void
433 join (value_type& dstErrorCount,
434 const value_type& srcErrorCount) const
435 {
436 dstErrorCount += srcErrorCount;
437 }
438
439 static void
440 pack (const DstView& dst,
441 const SrcView& src,
442 const IdxView& idx,
443 const size_type numCols,
444 const execution_space &space)
445 {
446 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
447 typedef typename IdxView::non_const_value_type index_type;
448
449 size_t errorCount = 0;
450 Kokkos::parallel_reduce
451 ("Tpetra::MultiVector pack multicol const stride debug only",
452 range_type (space, 0, idx.size ()),
453 PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
454 errorCount);
455 if (errorCount != 0) {
456 // Go back and find the out-of-bounds entries in the index
457 // array. Performance doesn't matter since we are already in
458 // an error state, so we can do this sequentially, on host.
459 auto idx_h = Kokkos::create_mirror_view (idx);
460
461 // DEEP_COPY REVIEW - NOT TESTED
462 Kokkos::deep_copy (idx_h, idx);
463
464 std::vector<index_type> badIndices;
465 const size_type numInds = idx_h.extent (0);
466 for (size_type k = 0; k < numInds; ++k) {
467 if (idx_h(k) < static_cast<index_type> (0) ||
468 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
469 badIndices.push_back (idx_h(k));
470 }
471 }
472
473 TEUCHOS_TEST_FOR_EXCEPTION
474 (errorCount != badIndices.size (), std::logic_error,
475 "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
476 << " != badIndices.size() = " << badIndices.size () << ". This sho"
477 "uld never happen. Please report this to the Tpetra developers.");
478
479 std::ostringstream os;
480 os << "Tpetra::MultiVector multiple-column pack kernel had "
481 << badIndices.size () << " out-of bounds index/ices (errorCount = "
482 << errorCount << "): [";
483 for (size_t k = 0; k < badIndices.size (); ++k) {
484 os << badIndices[k];
485 if (k + 1 < badIndices.size ()) {
486 os << ", ";
487 }
488 }
489 os << "].";
490 throw std::runtime_error (os.str ());
491 }
492 }
493 };
494
495
496 template <typename DstView,
497 typename SrcView,
498 typename IdxView>
499 void
500 pack_array_multi_column (const DstView& dst,
501 const SrcView& src,
502 const IdxView& idx,
503 const size_t numCols,
504 const bool debug,
505 const typename DstView::execution_space &space)
506 {
507 static_assert (Kokkos::is_view<DstView>::value,
508 "DstView must be a Kokkos::View.");
509 static_assert (Kokkos::is_view<SrcView>::value,
510 "SrcView must be a Kokkos::View.");
511 static_assert (Kokkos::is_view<IdxView>::value,
512 "IdxView must be a Kokkos::View.");
513 static_assert (static_cast<int> (DstView::rank) == 1,
514 "DstView must be a rank-1 Kokkos::View.");
515 static_assert (static_cast<int> (SrcView::rank) == 2,
516 "SrcView must be a rank-2 Kokkos::View.");
517 static_assert (static_cast<int> (IdxView::rank) == 1,
518 "IdxView must be a rank-1 Kokkos::View.");
519
520 using execution_space = typename DstView::execution_space;
521
522 static_assert (Kokkos::SpaceAccessibility<execution_space,
523 typename DstView::memory_space>::accessible,
524 "DstView not accessible from execution space");
525 static_assert (Kokkos::SpaceAccessibility<execution_space,
526 typename SrcView::memory_space>::accessible,
527 "SrcView not accessible from execution space");
528 static_assert (Kokkos::SpaceAccessibility<execution_space,
529 typename IdxView::memory_space>::accessible,
530 "IdxView not accessible from execution space");
531
532 if (debug) {
533 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
534 SrcView, IdxView> impl_type;
535 impl_type::pack (dst, src, idx, numCols, space);
536 }
537 else {
538 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
539 impl_type::pack (dst, src, idx, numCols, space);
540 }
541 }
542
543 template <typename DstView,
544 typename SrcView,
545 typename IdxView>
546 void
547 pack_array_multi_column (const DstView& dst,
548 const SrcView& src,
549 const IdxView& idx,
550 const size_t numCols,
551 const bool debug = true) {
552 pack_array_multi_column(dst, src, idx, numCols, debug, typename DstView::execution_space());
553 }
554
555 template <typename DstView, typename SrcView, typename IdxView,
556 typename ColView, typename Enabled = void>
557 struct PackArrayMultiColumnVariableStride {
558 using execution_space = typename DstView::execution_space;
559 typedef typename execution_space::size_type size_type;
560
561 DstView dst;
562 SrcView src;
563 IdxView idx;
564 ColView col;
565 size_t numCols;
566
567 PackArrayMultiColumnVariableStride (const DstView& dst_,
568 const SrcView& src_,
569 const IdxView& idx_,
570 const ColView& col_,
571 const size_t numCols_) :
572 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
573
574 KOKKOS_INLINE_FUNCTION
575 void operator() (const size_type k) const {
576 const typename IdxView::value_type localRow = idx(k);
577 const size_t offset = k*numCols;
578 for (size_t j = 0; j < numCols; ++j) {
579 dst(offset + j) = src(localRow, col(j));
580 }
581 }
582
583 static void pack(const DstView& dst,
584 const SrcView& src,
585 const IdxView& idx,
586 const ColView& col,
587 size_t numCols,
588 const execution_space &space) {
589 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
590 Kokkos::parallel_for
591 ("Tpetra::MultiVector pack multicol var stride",
592 range_type (space, 0, idx.size ()),
593 PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
594 }
595 };
596
597 template <typename DstView,
598 typename SrcView,
599 typename IdxView,
600 typename ColView,
601 typename SizeType = typename DstView::execution_space::size_type,
602 typename Enabled = void>
603 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
604 public:
605 using size_type = SizeType;
606 using value_type = size_t;
607 using execution_space = typename DstView::execution_space;
608
609 private:
610 DstView dst;
611 SrcView src;
612 IdxView idx;
613 ColView col;
614 size_type numCols;
615
616 public:
617 PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
618 const SrcView& src_,
619 const IdxView& idx_,
620 const ColView& col_,
621 const size_type numCols_) :
622 dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
623
624 KOKKOS_INLINE_FUNCTION void
625 operator() (const size_type k, value_type& lclErrorCount) const {
626 typedef typename IdxView::non_const_value_type row_index_type;
627 typedef typename ColView::non_const_value_type col_index_type;
628
629 const row_index_type lclRow = idx(k);
630 if (lclRow < static_cast<row_index_type> (0) ||
631 lclRow >= static_cast<row_index_type> (src.extent (0))) {
632 ++lclErrorCount = 0;
633 }
634 else {
635 const size_type offset = k*numCols;
636 for (size_type j = 0; j < numCols; ++j) {
637 const col_index_type lclCol = col(j);
638 if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
639 ++lclErrorCount = 0;
640 }
641 else { // all indices are valid; do the assignment
642 dst(offset + j) = src(lclRow, lclCol);
643 }
644 }
645 }
646 }
647
648 KOKKOS_INLINE_FUNCTION void
649 init (value_type& initialErrorCount) const {
650 initialErrorCount = 0;
651 }
652
653 KOKKOS_INLINE_FUNCTION void
654 join (value_type& dstErrorCount,
655 const value_type& srcErrorCount) const
656 {
657 dstErrorCount += srcErrorCount;
658 }
659
660 static void
661 pack (const DstView& dst,
662 const SrcView& src,
663 const IdxView& idx,
664 const ColView& col,
665 const size_type numCols,
666 const execution_space &space)
667 {
668 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
669 using row_index_type = typename IdxView::non_const_value_type;
670 using col_index_type = typename ColView::non_const_value_type;
671
672 size_t errorCount = 0;
673 Kokkos::parallel_reduce
674 ("Tpetra::MultiVector pack multicol var stride debug only",
675 range_type (space, 0, idx.size ()),
676 PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
677 col, numCols),
678 errorCount);
679 if (errorCount != 0) {
680 constexpr size_t maxNumBadIndicesToPrint = 100;
681
682 std::ostringstream os; // for error reporting
683 os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
684 "found " << errorCount
685 << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
686
687 // Go back and find any out-of-bounds entries in the array of
688 // row indices. Performance doesn't matter since we are already
689 // in an error state, so we can do this sequentially, on host.
690 auto idx_h = Kokkos::create_mirror_view (idx);
691
692 // DEEP_COPY REVIEW - NOT TESTED
693 Kokkos::deep_copy (idx_h, idx);
694
695 std::vector<row_index_type> badRows;
696 const size_type numRowInds = idx_h.extent (0);
697 for (size_type k = 0; k < numRowInds; ++k) {
698 if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
699 badRows.push_back (idx_h(k));
700 }
701 }
702
703 if (badRows.size () != 0) {
704 os << badRows.size () << " out-of-bounds row ind"
705 << (badRows.size () != size_t (1) ? "ices" : "ex");
706 if (badRows.size () <= maxNumBadIndicesToPrint) {
707 os << ": [";
708 for (size_t k = 0; k < badRows.size (); ++k) {
709 os << badRows[k];
710 if (k + 1 < badRows.size ()) {
711 os << ", ";
712 }
713 }
714 os << "]. ";
715 }
716 else {
717 os << ". ";
718 }
719 }
720 else {
721 os << "No out-of-bounds row indices. ";
722 }
723
724 // Go back and find any out-of-bounds entries in the array
725 // of column indices.
726 auto col_h = Kokkos::create_mirror_view (col);
727
728 // DEEP_COPY REVIEW - NOT TESTED
729 Kokkos::deep_copy (col_h, col);
730
731 std::vector<col_index_type> badCols;
732 const size_type numColInds = col_h.extent (0);
733 for (size_type k = 0; k < numColInds; ++k) {
734 if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
735 badCols.push_back (col_h(k));
736 }
737 }
738
739 if (badCols.size () != 0) {
740 os << badCols.size () << " out-of-bounds column ind"
741 << (badCols.size () != size_t (1) ? "ices" : "ex");
742 if (badCols.size () <= maxNumBadIndicesToPrint) {
743 os << ": [";
744 for (size_t k = 0; k < badCols.size (); ++k) {
745 os << badCols[k];
746 if (k + 1 < badCols.size ()) {
747 os << ", ";
748 }
749 }
750 os << "]. ";
751 }
752 else {
753 os << ". ";
754 }
755 }
756 else {
757 os << "No out-of-bounds column indices. ";
758 }
759
760 TEUCHOS_TEST_FOR_EXCEPTION
761 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
762 std::logic_error, "Tpetra::MultiVector variable stride pack "
763 "kernel reports errorCount=" << errorCount << ", but we failed "
764 "to find any bad rows or columns. This should never happen. "
765 "Please report this to the Tpetra developers.");
766
767 throw std::runtime_error (os.str ());
768 } // hasErr
769 }
770 };
771
772 template <typename DstView,
773 typename SrcView,
774 typename IdxView,
775 typename ColView>
776 void
777 pack_array_multi_column_variable_stride (const DstView& dst,
778 const SrcView& src,
779 const IdxView& idx,
780 const ColView& col,
781 const size_t numCols,
782 const bool debug,
783 const typename DstView::execution_space &space)
784 {
785 static_assert (Kokkos::is_view<DstView>::value,
786 "DstView must be a Kokkos::View.");
787 static_assert (Kokkos::is_view<SrcView>::value,
788 "SrcView must be a Kokkos::View.");
789 static_assert (Kokkos::is_view<IdxView>::value,
790 "IdxView must be a Kokkos::View.");
791 static_assert (Kokkos::is_view<ColView>::value,
792 "ColView must be a Kokkos::View.");
793 static_assert (static_cast<int> (DstView::rank) == 1,
794 "DstView must be a rank-1 Kokkos::View.");
795 static_assert (static_cast<int> (SrcView::rank) == 2,
796 "SrcView must be a rank-2 Kokkos::View.");
797 static_assert (static_cast<int> (IdxView::rank) == 1,
798 "IdxView must be a rank-1 Kokkos::View.");
799 static_assert (static_cast<int> (ColView::rank) == 1,
800 "ColView must be a rank-1 Kokkos::View.");
801
802 using execution_space = typename DstView::execution_space;
803
804 static_assert (Kokkos::SpaceAccessibility<execution_space,
805 typename DstView::memory_space>::accessible,
806 "DstView not accessible from execution space");
807 static_assert (Kokkos::SpaceAccessibility<execution_space,
808 typename SrcView::memory_space>::accessible,
809 "SrcView not accessible from execution space");
810 static_assert (Kokkos::SpaceAccessibility<execution_space,
811 typename IdxView::memory_space>::accessible,
812 "IdxView not accessible from execution space");
813
814 if (debug) {
815 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
816 SrcView, IdxView, ColView> impl_type;
817 impl_type::pack (dst, src, idx, col, numCols, space);
818 }
819 else {
820 typedef PackArrayMultiColumnVariableStride<DstView,
821 SrcView, IdxView, ColView> impl_type;
822 impl_type::pack (dst, src, idx, col, numCols, space);
823 }
824 }
825
826 template <typename DstView,
827 typename SrcView,
828 typename IdxView,
829 typename ColView>
830 void
831 pack_array_multi_column_variable_stride (const DstView& dst,
832 const SrcView& src,
833 const IdxView& idx,
834 const ColView& col,
835 const size_t numCols,
836 const bool debug = true) {
837 pack_array_multi_column_variable_stride(dst, src, idx, col, numCols, debug,
838 typename DstView::execution_space());
839 }
840
841 // Tag types to indicate whether to use atomic updates in the
842 // various CombineMode "Op"s.
843 struct atomic_tag {};
844 struct nonatomic_tag {};
845
846 struct AddOp {
847 template<class SC>
848 KOKKOS_INLINE_FUNCTION
849 void operator() (atomic_tag, SC& dest, const SC& src) const {
850 Kokkos::atomic_add (&dest, src);
851 }
852
853 template<class SC>
854 KOKKOS_INLINE_FUNCTION
855 void operator() (nonatomic_tag, SC& dest, const SC& src) const {
856 dest += src;
857 }
858 };
859
860 struct InsertOp {
861 // There's no point to using Kokkos::atomic_assign for the REPLACE
862 // or INSERT CombineModes, since this is not a well-defined
863 // reduction for MultiVector anyway. See GitHub Issue #4417
864 // (which this fixes).
865 template<class SC>
866 KOKKOS_INLINE_FUNCTION
867 void operator() (atomic_tag, SC& dest, const SC& src) const {
868 dest = src;
869 }
870
871 template<class SC>
872 KOKKOS_INLINE_FUNCTION
873 void operator() (nonatomic_tag, SC& dest, const SC& src) const {
874 dest = src;
875 }
876 };
877
878 struct AbsMaxOp {
879 template <class Scalar>
880 struct AbsMaxHelper{
881 Scalar value;
882
883 KOKKOS_FUNCTION AbsMaxHelper& operator+=(AbsMaxHelper const& rhs) {
884 auto lhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(value);
885 auto rhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(rhs.value);
886 value = lhs_abs_value > rhs_abs_value ? lhs_abs_value : rhs_abs_value;
887 return *this;
888 }
889
890 KOKKOS_FUNCTION AbsMaxHelper operator+(AbsMaxHelper const& rhs) const {
891 AbsMaxHelper ret = *this;
892 ret += rhs;
893 return ret;
894 }
895 };
896
897 template <typename SC>
898 KOKKOS_INLINE_FUNCTION
899 void operator() (atomic_tag, SC& dst, const SC& src) const {
900 Kokkos::atomic_add(reinterpret_cast<AbsMaxHelper<SC>*>(&dst), AbsMaxHelper<SC>{src});
901 }
902
903 template <typename SC>
904 KOKKOS_INLINE_FUNCTION
905 void operator() (nonatomic_tag, SC& dst, const SC& src) const {
906 auto dst_abs_value = Kokkos::ArithTraits<SC>::abs(dst);
907 auto src_abs_value = Kokkos::ArithTraits<SC>::abs(src);
908 dst = dst_abs_value > src_abs_value ? dst_abs_value : src_abs_value;
909 }
910 };
911
912 template <typename ExecutionSpace,
913 typename DstView,
914 typename SrcView,
915 typename IdxView,
916 typename Op,
917 typename Enabled = void>
918 class UnpackArrayMultiColumn {
919 private:
920 static_assert (Kokkos::is_view<DstView>::value,
921 "DstView must be a Kokkos::View.");
922 static_assert (Kokkos::is_view<SrcView>::value,
923 "SrcView must be a Kokkos::View.");
924 static_assert (Kokkos::is_view<IdxView>::value,
925 "IdxView must be a Kokkos::View.");
926 static_assert (static_cast<int> (DstView::rank) == 2,
927 "DstView must be a rank-2 Kokkos::View.");
928 static_assert (static_cast<int> (SrcView::rank) == 1,
929 "SrcView must be a rank-1 Kokkos::View.");
930 static_assert (static_cast<int> (IdxView::rank) == 1,
931 "IdxView must be a rank-1 Kokkos::View.");
932
933 public:
934 typedef typename ExecutionSpace::execution_space execution_space;
935 typedef typename execution_space::size_type size_type;
936
937 private:
938 DstView dst;
939 SrcView src;
940 IdxView idx;
941 Op op;
942 size_t numCols;
943
944 public:
945 UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
946 const DstView& dst_,
947 const SrcView& src_,
948 const IdxView& idx_,
949 const Op& op_,
950 const size_t numCols_) :
951 dst (dst_),
952 src (src_),
953 idx (idx_),
954 op (op_),
955 numCols (numCols_)
956 {}
957
958 template<class TagType>
959 KOKKOS_INLINE_FUNCTION void
960 operator() (TagType tag, const size_type k) const
961 {
962 static_assert
963 (std::is_same<TagType, atomic_tag>::value ||
964 std::is_same<TagType, nonatomic_tag>::value,
965 "TagType must be atomic_tag or nonatomic_tag.");
966
967 const typename IdxView::value_type localRow = idx(k);
968 const size_t offset = k*numCols;
969 for (size_t j = 0; j < numCols; ++j) {
970 op (tag, dst(localRow, j), src(offset+j));
971 }
972 }
973
974 static void
975 unpack (const ExecutionSpace& execSpace,
976 const DstView& dst,
977 const SrcView& src,
978 const IdxView& idx,
979 const Op& op,
980 const size_t numCols,
981 const bool use_atomic_updates)
982 {
983 if (use_atomic_updates) {
984 using range_type =
985 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
986 Kokkos::parallel_for
987 ("Tpetra::MultiVector unpack const stride atomic",
988 range_type (0, idx.size ()),
989 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
990 }
991 else {
992 using range_type =
993 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
994 Kokkos::parallel_for
995 ("Tpetra::MultiVector unpack const stride nonatomic",
996 range_type (0, idx.size ()),
997 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
998 }
999 }
1000 };
1001
1002 template <typename ExecutionSpace,
1003 typename DstView,
1004 typename SrcView,
1005 typename IdxView,
1006 typename Op,
1007 typename SizeType = typename ExecutionSpace::execution_space::size_type,
1008 typename Enabled = void>
1009 class UnpackArrayMultiColumnWithBoundsCheck {
1010 private:
1011 static_assert (Kokkos::is_view<DstView>::value,
1012 "DstView must be a Kokkos::View.");
1013 static_assert (Kokkos::is_view<SrcView>::value,
1014 "SrcView must be a Kokkos::View.");
1015 static_assert (Kokkos::is_view<IdxView>::value,
1016 "IdxView must be a Kokkos::View.");
1017 static_assert (static_cast<int> (DstView::rank) == 2,
1018 "DstView must be a rank-2 Kokkos::View.");
1019 static_assert (static_cast<int> (SrcView::rank) == 1,
1020 "SrcView must be a rank-1 Kokkos::View.");
1021 static_assert (static_cast<int> (IdxView::rank) == 1,
1022 "IdxView must be a rank-1 Kokkos::View.");
1023 static_assert (std::is_integral<SizeType>::value,
1024 "SizeType must be a built-in integer type.");
1025
1026 public:
1027 using execution_space = typename ExecutionSpace::execution_space;
1028 using size_type = SizeType;
1029 using value_type = size_t;
1030
1031 private:
1032 DstView dst;
1033 SrcView src;
1034 IdxView idx;
1035 Op op;
1036 size_type numCols;
1037
1038 public:
1039 UnpackArrayMultiColumnWithBoundsCheck (const ExecutionSpace& /* execSpace */,
1040 const DstView& dst_,
1041 const SrcView& src_,
1042 const IdxView& idx_,
1043 const Op& op_,
1044 const size_type numCols_) :
1045 dst (dst_),
1046 src (src_),
1047 idx (idx_),
1048 op (op_),
1049 numCols (numCols_)
1050 {}
1051
1052 template<class TagType>
1053 KOKKOS_INLINE_FUNCTION void
1054 operator() (TagType tag,
1055 const size_type k,
1056 size_t& lclErrCount) const
1057 {
1058 static_assert
1059 (std::is_same<TagType, atomic_tag>::value ||
1060 std::is_same<TagType, nonatomic_tag>::value,
1061 "TagType must be atomic_tag or nonatomic_tag.");
1062 using index_type = typename IdxView::non_const_value_type;
1063
1064 const index_type lclRow = idx(k);
1065 if (lclRow < static_cast<index_type> (0) ||
1066 lclRow >= static_cast<index_type> (dst.extent (0))) {
1067 ++lclErrCount;
1068 }
1069 else {
1070 const size_type offset = k*numCols;
1071 for (size_type j = 0; j < numCols; ++j) {
1072 op (tag, dst(lclRow,j), src(offset+j));
1073 }
1074 }
1075 }
1076
1077 template<class TagType>
1078 KOKKOS_INLINE_FUNCTION void
1079 init (TagType, size_t& initialErrorCount) const {
1080 initialErrorCount = 0;
1081 }
1082
1083 template<class TagType>
1084 KOKKOS_INLINE_FUNCTION void
1085 join (TagType,
1086 size_t& dstErrorCount,
1087 const size_t& srcErrorCount) const
1088 {
1089 dstErrorCount += srcErrorCount;
1090 }
1091
1092 static void
1093 unpack (const ExecutionSpace& execSpace,
1094 const DstView& dst,
1095 const SrcView& src,
1096 const IdxView& idx,
1097 const Op& op,
1098 const size_type numCols,
1099 const bool use_atomic_updates)
1100 {
1101 using index_type = typename IdxView::non_const_value_type;
1102
1103 size_t errorCount = 0;
1104 if (use_atomic_updates) {
1105 using range_type =
1106 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1107 Kokkos::parallel_reduce
1108 ("Tpetra::MultiVector unpack multicol const stride atomic debug only",
1109 range_type (0, idx.size ()),
1110 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1111 idx, op, numCols),
1112 errorCount);
1113 }
1114 else {
1115 using range_type =
1116 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1117 Kokkos::parallel_reduce
1118 ("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1119 range_type (0, idx.size ()),
1120 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1121 idx, op, numCols),
1122 errorCount);
1123 }
1124
1125 if (errorCount != 0) {
1126 // Go back and find the out-of-bounds entries in the index
1127 // array. Performance doesn't matter since we are already in
1128 // an error state, so we can do this sequentially, on host.
1129 auto idx_h = Kokkos::create_mirror_view (idx);
1130
1131 // DEEP_COPY REVIEW - NOT TESTED
1132 Kokkos::deep_copy (idx_h, idx);
1133
1134 std::vector<index_type> badIndices;
1135 const size_type numInds = idx_h.extent (0);
1136 for (size_type k = 0; k < numInds; ++k) {
1137 if (idx_h(k) < static_cast<index_type> (0) ||
1138 idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1139 badIndices.push_back (idx_h(k));
1140 }
1141 }
1142
1143 if (errorCount != badIndices.size ()) {
1144 std::ostringstream os;
1145 os << "MultiVector unpack kernel: errorCount = " << errorCount
1146 << " != badIndices.size() = " << badIndices.size ()
1147 << ". This should never happen. "
1148 "Please report this to the Tpetra developers.";
1149 throw std::logic_error (os.str ());
1150 }
1151
1152 std::ostringstream os;
1153 os << "MultiVector unpack kernel had " << badIndices.size ()
1154 << " out-of bounds index/ices. Here they are: [";
1155 for (size_t k = 0; k < badIndices.size (); ++k) {
1156 os << badIndices[k];
1157 if (k + 1 < badIndices.size ()) {
1158 os << ", ";
1159 }
1160 }
1161 os << "].";
1162 throw std::runtime_error (os.str ());
1163 }
1164 }
1165 };
1166
1167 template <typename ExecutionSpace,
1168 typename DstView,
1169 typename SrcView,
1170 typename IdxView,
1171 typename Op>
1172 void
1173 unpack_array_multi_column (const ExecutionSpace& execSpace,
1174 const DstView& dst,
1175 const SrcView& src,
1176 const IdxView& idx,
1177 const Op& op,
1178 const size_t numCols,
1179 const bool use_atomic_updates,
1180 const bool debug)
1181 {
1182 static_assert (Kokkos::is_view<DstView>::value,
1183 "DstView must be a Kokkos::View.");
1184 static_assert (Kokkos::is_view<SrcView>::value,
1185 "SrcView must be a Kokkos::View.");
1186 static_assert (Kokkos::is_view<IdxView>::value,
1187 "IdxView must be a Kokkos::View.");
1188 static_assert (static_cast<int> (DstView::rank) == 2,
1189 "DstView must be a rank-2 Kokkos::View.");
1190 static_assert (static_cast<int> (SrcView::rank) == 1,
1191 "SrcView must be a rank-1 Kokkos::View.");
1192 static_assert (static_cast<int> (IdxView::rank) == 1,
1193 "IdxView must be a rank-1 Kokkos::View.");
1194
1195 if (debug) {
1196 typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1197 DstView, SrcView, IdxView, Op> impl_type;
1198 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1199 use_atomic_updates);
1200 }
1201 else {
1202 typedef UnpackArrayMultiColumn<ExecutionSpace,
1203 DstView, SrcView, IdxView, Op> impl_type;
1204 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1205 use_atomic_updates);
1206 }
1207 }
1208
1209 template <typename ExecutionSpace,
1210 typename DstView,
1211 typename SrcView,
1212 typename IdxView,
1213 typename ColView,
1214 typename Op,
1215 typename Enabled = void>
1216 class UnpackArrayMultiColumnVariableStride {
1217 private:
1218 static_assert (Kokkos::is_view<DstView>::value,
1219 "DstView must be a Kokkos::View.");
1220 static_assert (Kokkos::is_view<SrcView>::value,
1221 "SrcView must be a Kokkos::View.");
1222 static_assert (Kokkos::is_view<IdxView>::value,
1223 "IdxView must be a Kokkos::View.");
1224 static_assert (Kokkos::is_view<ColView>::value,
1225 "ColView must be a Kokkos::View.");
1226 static_assert (static_cast<int> (DstView::rank) == 2,
1227 "DstView must be a rank-2 Kokkos::View.");
1228 static_assert (static_cast<int> (SrcView::rank) == 1,
1229 "SrcView must be a rank-1 Kokkos::View.");
1230 static_assert (static_cast<int> (IdxView::rank) == 1,
1231 "IdxView must be a rank-1 Kokkos::View.");
1232 static_assert (static_cast<int> (ColView::rank) == 1,
1233 "ColView must be a rank-1 Kokkos::View.");
1234
1235 public:
1236 using execution_space = typename ExecutionSpace::execution_space;
1237 using size_type = typename execution_space::size_type;
1238
1239 private:
1240 DstView dst;
1241 SrcView src;
1242 IdxView idx;
1243 ColView col;
1244 Op op;
1245 size_t numCols;
1246
1247 public:
1248 UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
1249 const DstView& dst_,
1250 const SrcView& src_,
1251 const IdxView& idx_,
1252 const ColView& col_,
1253 const Op& op_,
1254 const size_t numCols_) :
1255 dst (dst_),
1256 src (src_),
1257 idx (idx_),
1258 col (col_),
1259 op (op_),
1260 numCols (numCols_)
1261 {}
1262
1263 template<class TagType>
1264 KOKKOS_INLINE_FUNCTION void
1265 operator() (TagType tag, const size_type k) const
1266 {
1267 static_assert
1268 (std::is_same<TagType, atomic_tag>::value ||
1269 std::is_same<TagType, nonatomic_tag>::value,
1270 "TagType must be atomic_tag or nonatomic_tag.");
1271
1272 const typename IdxView::value_type localRow = idx(k);
1273 const size_t offset = k*numCols;
1274 for (size_t j = 0; j < numCols; ++j) {
1275 op (tag, dst(localRow, col(j)), src(offset+j));
1276 }
1277 }
1278
1279 static void
1280 unpack (const ExecutionSpace& execSpace,
1281 const DstView& dst,
1282 const SrcView& src,
1283 const IdxView& idx,
1284 const ColView& col,
1285 const Op& op,
1286 const size_t numCols,
1287 const bool use_atomic_updates)
1288 {
1289 if (use_atomic_updates) {
1290 using range_type =
1291 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1292 Kokkos::parallel_for
1293 ("Tpetra::MultiVector unpack var stride atomic",
1294 range_type (0, idx.size ()),
1295 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1296 idx, col, op, numCols));
1297 }
1298 else {
1299 using range_type =
1300 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1301 Kokkos::parallel_for
1302 ("Tpetra::MultiVector unpack var stride nonatomic",
1303 range_type (0, idx.size ()),
1304 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1305 idx, col, op, numCols));
1306 }
1307 }
1308 };
1309
1310 template <typename ExecutionSpace,
1311 typename DstView,
1312 typename SrcView,
1313 typename IdxView,
1314 typename ColView,
1315 typename Op,
1316 typename SizeType = typename ExecutionSpace::execution_space::size_type,
1317 typename Enabled = void>
1318 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1319 private:
1320 static_assert (Kokkos::is_view<DstView>::value,
1321 "DstView must be a Kokkos::View.");
1322 static_assert (Kokkos::is_view<SrcView>::value,
1323 "SrcView must be a Kokkos::View.");
1324 static_assert (Kokkos::is_view<IdxView>::value,
1325 "IdxView must be a Kokkos::View.");
1326 static_assert (Kokkos::is_view<ColView>::value,
1327 "ColView must be a Kokkos::View.");
1328 static_assert (static_cast<int> (DstView::rank) == 2,
1329 "DstView must be a rank-2 Kokkos::View.");
1330 static_assert (static_cast<int> (SrcView::rank) == 1,
1331 "SrcView must be a rank-1 Kokkos::View.");
1332 static_assert (static_cast<int> (IdxView::rank) == 1,
1333 "IdxView must be a rank-1 Kokkos::View.");
1334 static_assert (static_cast<int> (ColView::rank) == 1,
1335 "ColView must be a rank-1 Kokkos::View.");
1336 static_assert (std::is_integral<SizeType>::value,
1337 "SizeType must be a built-in integer type.");
1338
1339 public:
1340 using execution_space = typename ExecutionSpace::execution_space;
1341 using size_type = SizeType;
1342 using value_type = size_t;
1343
1344 private:
1345 DstView dst;
1346 SrcView src;
1347 IdxView idx;
1348 ColView col;
1349 Op op;
1350 size_type numCols;
1351
1352 public:
1353 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1354 (const ExecutionSpace& /* execSpace */,
1355 const DstView& dst_,
1356 const SrcView& src_,
1357 const IdxView& idx_,
1358 const ColView& col_,
1359 const Op& op_,
1360 const size_t numCols_) :
1361 dst (dst_),
1362 src (src_),
1363 idx (idx_),
1364 col (col_),
1365 op (op_),
1366 numCols (numCols_)
1367 {}
1368
1369 template<class TagType>
1370 KOKKOS_INLINE_FUNCTION void
1371 operator() (TagType tag,
1372 const size_type k,
1373 value_type& lclErrorCount) const
1374 {
1375 static_assert
1376 (std::is_same<TagType, atomic_tag>::value ||
1377 std::is_same<TagType, nonatomic_tag>::value,
1378 "TagType must be atomic_tag or nonatomic_tag.");
1379 using row_index_type = typename IdxView::non_const_value_type;
1380 using col_index_type = typename ColView::non_const_value_type;
1381
1382 const row_index_type lclRow = idx(k);
1383 if (lclRow < static_cast<row_index_type> (0) ||
1384 lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1385 ++lclErrorCount;
1386 }
1387 else {
1388 const size_type offset = k * numCols;
1389 for (size_type j = 0; j < numCols; ++j) {
1390 const col_index_type lclCol = col(j);
1391 if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1392 ++lclErrorCount;
1393 }
1394 else { // all indices are valid; apply the op
1395 op (tag, dst(lclRow, col(j)), src(offset+j));
1396 }
1397 }
1398 }
1399 }
1400
1401 KOKKOS_INLINE_FUNCTION void
1402 init (value_type& initialErrorCount) const {
1403 initialErrorCount = 0;
1404 }
1405
1406 KOKKOS_INLINE_FUNCTION void
1407 join (value_type& dstErrorCount,
1408 const value_type& srcErrorCount) const
1409 {
1410 dstErrorCount += srcErrorCount;
1411 }
1412
1413 static void
1414 unpack (const ExecutionSpace& execSpace,
1415 const DstView& dst,
1416 const SrcView& src,
1417 const IdxView& idx,
1418 const ColView& col,
1419 const Op& op,
1420 const size_type numCols,
1421 const bool use_atomic_updates)
1422 {
1423 using row_index_type = typename IdxView::non_const_value_type;
1424 using col_index_type = typename ColView::non_const_value_type;
1425
1426 size_t errorCount = 0;
1427 if (use_atomic_updates) {
1428 using range_type =
1429 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1430 Kokkos::parallel_reduce
1431 ("Tpetra::MultiVector unpack var stride atomic debug only",
1432 range_type (0, idx.size ()),
1433 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1434 (execSpace, dst, src, idx, col, op, numCols),
1435 errorCount);
1436 }
1437 else {
1438 using range_type =
1439 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1440 Kokkos::parallel_reduce
1441 ("Tpetra::MultiVector unpack var stride nonatomic debug only",
1442 range_type (0, idx.size ()),
1443 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1444 (execSpace, dst, src, idx, col, op, numCols),
1445 errorCount);
1446 }
1447
1448 if (errorCount != 0) {
1449 constexpr size_t maxNumBadIndicesToPrint = 100;
1450
1451 std::ostringstream os; // for error reporting
1452 os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1453 "found " << errorCount
1454 << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
1455
1456 // Go back and find any out-of-bounds entries in the array of
1457 // row indices. Performance doesn't matter since we are
1458 // already in an error state, so we can do this sequentially,
1459 // on host.
1460 auto idx_h = Kokkos::create_mirror_view (idx);
1461
1462 // DEEP_COPY REVIEW - NOT TESTED
1463 Kokkos::deep_copy (idx_h, idx);
1464
1465 std::vector<row_index_type> badRows;
1466 const size_type numRowInds = idx_h.extent (0);
1467 for (size_type k = 0; k < numRowInds; ++k) {
1468 if (idx_h(k) < static_cast<row_index_type> (0) ||
1469 idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1470 badRows.push_back (idx_h(k));
1471 }
1472 }
1473
1474 if (badRows.size () != 0) {
1475 os << badRows.size () << " out-of-bounds row ind"
1476 << (badRows.size () != size_t (1) ? "ices" : "ex");
1477 if (badRows.size () <= maxNumBadIndicesToPrint) {
1478 os << ": [";
1479 for (size_t k = 0; k < badRows.size (); ++k) {
1480 os << badRows[k];
1481 if (k + 1 < badRows.size ()) {
1482 os << ", ";
1483 }
1484 }
1485 os << "]. ";
1486 }
1487 else {
1488 os << ". ";
1489 }
1490 }
1491 else {
1492 os << "No out-of-bounds row indices. ";
1493 }
1494
1495 // Go back and find any out-of-bounds entries in the array
1496 // of column indices.
1497 auto col_h = Kokkos::create_mirror_view (col);
1498
1499 // DEEP_COPY REVIEW - NOT TESTED
1500 Kokkos::deep_copy (col_h, col);
1501
1502 std::vector<col_index_type> badCols;
1503 const size_type numColInds = col_h.extent (0);
1504 for (size_type k = 0; k < numColInds; ++k) {
1505 if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1506 badCols.push_back (col_h(k));
1507 }
1508 }
1509
1510 if (badCols.size () != 0) {
1511 os << badCols.size () << " out-of-bounds column ind"
1512 << (badCols.size () != size_t (1) ? "ices" : "ex");
1513 if (badCols.size () <= maxNumBadIndicesToPrint) {
1514 for (size_t k = 0; k < badCols.size (); ++k) {
1515 os << ": [";
1516 os << badCols[k];
1517 if (k + 1 < badCols.size ()) {
1518 os << ", ";
1519 }
1520 }
1521 os << "]. ";
1522 }
1523 else {
1524 os << ". ";
1525 }
1526 }
1527 else {
1528 os << "No out-of-bounds column indices. ";
1529 }
1530
1531 TEUCHOS_TEST_FOR_EXCEPTION
1532 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1533 std::logic_error, "Tpetra::MultiVector variable stride unpack "
1534 "kernel reports errorCount=" << errorCount << ", but we failed "
1535 "to find any bad rows or columns. This should never happen. "
1536 "Please report this to the Tpetra developers.");
1537
1538 throw std::runtime_error (os.str ());
1539 } // hasErr
1540 }
1541 };
1542
1543 template <typename ExecutionSpace,
1544 typename DstView,
1545 typename SrcView,
1546 typename IdxView,
1547 typename ColView,
1548 typename Op>
1549 void
1550 unpack_array_multi_column_variable_stride (const ExecutionSpace& execSpace,
1551 const DstView& dst,
1552 const SrcView& src,
1553 const IdxView& idx,
1554 const ColView& col,
1555 const Op& op,
1556 const size_t numCols,
1557 const bool use_atomic_updates,
1558 const bool debug)
1559 {
1560 static_assert (Kokkos::is_view<DstView>::value,
1561 "DstView must be a Kokkos::View.");
1562 static_assert (Kokkos::is_view<SrcView>::value,
1563 "SrcView must be a Kokkos::View.");
1564 static_assert (Kokkos::is_view<IdxView>::value,
1565 "IdxView must be a Kokkos::View.");
1566 static_assert (Kokkos::is_view<ColView>::value,
1567 "ColView must be a Kokkos::View.");
1568 static_assert (static_cast<int> (DstView::rank) == 2,
1569 "DstView must be a rank-2 Kokkos::View.");
1570 static_assert (static_cast<int> (SrcView::rank) == 1,
1571 "SrcView must be a rank-1 Kokkos::View.");
1572 static_assert (static_cast<int> (IdxView::rank) == 1,
1573 "IdxView must be a rank-1 Kokkos::View.");
1574 static_assert (static_cast<int> (ColView::rank) == 1,
1575 "ColView must be a rank-1 Kokkos::View.");
1576
1577 if (debug) {
1578 using impl_type =
1579 UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1580 DstView, SrcView, IdxView, ColView, Op>;
1581 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1582 use_atomic_updates);
1583 }
1584 else {
1585 using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1586 DstView, SrcView, IdxView, ColView, Op>;
1587 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1588 use_atomic_updates);
1589 }
1590 }
1591
1592 template <typename DstView, typename SrcView,
1593 typename DstIdxView, typename SrcIdxView, typename Op,
1594 typename Enabled = void>
1595 struct PermuteArrayMultiColumn {
1596 using size_type = typename DstView::size_type;
1597
1598 DstView dst;
1599 SrcView src;
1600 DstIdxView dst_idx;
1601 SrcIdxView src_idx;
1602 size_t numCols;
1603 Op op;
1604
1605 PermuteArrayMultiColumn (const DstView& dst_,
1606 const SrcView& src_,
1607 const DstIdxView& dst_idx_,
1608 const SrcIdxView& src_idx_,
1609 const size_t numCols_,
1610 const Op& op_) :
1611 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1612 numCols(numCols_), op(op_) {}
1613
1614 KOKKOS_INLINE_FUNCTION void
1615 operator() (const size_type k) const {
1616 const typename DstIdxView::value_type toRow = dst_idx(k);
1617 const typename SrcIdxView::value_type fromRow = src_idx(k);
1618 nonatomic_tag tag; // permute does not need atomics
1619 for (size_t j = 0; j < numCols; ++j) {
1620 op(tag, dst(toRow, j), src(fromRow, j));
1621 }
1622 }
1623
1624 template <typename ExecutionSpace>
1625 static void
1626 permute (const ExecutionSpace &space,
1627 const DstView& dst,
1628 const SrcView& src,
1629 const DstIdxView& dst_idx,
1630 const SrcIdxView& src_idx,
1631 const size_t numCols,
1632 const Op& op)
1633 {
1634 using range_type =
1635 Kokkos::RangePolicy<ExecutionSpace, size_type>;
1636 // permute does not need atomics for Op
1637 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1638 Kokkos::parallel_for
1639 ("Tpetra::MultiVector permute multicol const stride",
1640 range_type (space, 0, n),
1641 PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols, op));
1642 }
1643 };
1644
1645// clang-format on
1646// To do: Add enable_if<> restrictions on DstView::rank == 1,
1647// SrcView::rank == 2
1648template <typename ExecutionSpace, typename DstView, typename SrcView,
1649 typename DstIdxView, typename SrcIdxView, typename Op>
1650void permute_array_multi_column(const ExecutionSpace &space, const DstView &dst,
1651 const SrcView &src, const DstIdxView &dst_idx,
1652 const SrcIdxView &src_idx, size_t numCols,
1653 const Op &op) {
1654 PermuteArrayMultiColumn<DstView, SrcView, DstIdxView, SrcIdxView,
1655 Op>::permute(space, dst, src, dst_idx, src_idx,
1656 numCols, op);
1657}
1658// clang-format off
1659
1660 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1661 // SrcView::rank == 2
1662 template <typename DstView, typename SrcView,
1663 typename DstIdxView, typename SrcIdxView, typename Op>
1664 void permute_array_multi_column(const DstView& dst,
1665 const SrcView& src,
1666 const DstIdxView& dst_idx,
1667 const SrcIdxView& src_idx,
1668 size_t numCols,
1669 const Op& op) {
1670 using execution_space = typename DstView::execution_space;
1671 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView,Op>::permute(
1672 execution_space(), dst, src, dst_idx, src_idx, numCols, op);
1673 }
1674
1675 template <typename DstView, typename SrcView,
1676 typename DstIdxView, typename SrcIdxView,
1677 typename DstColView, typename SrcColView, typename Op,
1678 typename Enabled = void>
1679 struct PermuteArrayMultiColumnVariableStride {
1680 using size_type = typename DstView::size_type;
1681
1682 DstView dst;
1683 SrcView src;
1684 DstIdxView dst_idx;
1685 SrcIdxView src_idx;
1686 DstColView dst_col;
1687 SrcColView src_col;
1688 size_t numCols;
1689 Op op;
1690
1691 PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1692 const SrcView& src_,
1693 const DstIdxView& dst_idx_,
1694 const SrcIdxView& src_idx_,
1695 const DstColView& dst_col_,
1696 const SrcColView& src_col_,
1697 const size_t numCols_,
1698 const Op& op_) :
1699 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1700 dst_col(dst_col_), src_col(src_col_),
1701 numCols(numCols_), op(op_) {}
1702
1703 KOKKOS_INLINE_FUNCTION void
1704 operator() (const size_type k) const {
1705 const typename DstIdxView::value_type toRow = dst_idx(k);
1706 const typename SrcIdxView::value_type fromRow = src_idx(k);
1707 const nonatomic_tag tag; // permute does not need atomics
1708 for (size_t j = 0; j < numCols; ++j) {
1709 op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1710 }
1711 }
1712
1713 template <typename ExecutionSpace>
1714 static void
1715 permute ( const ExecutionSpace &space,
1716 const DstView& dst,
1717 const SrcView& src,
1718 const DstIdxView& dst_idx,
1719 const SrcIdxView& src_idx,
1720 const DstColView& dst_col,
1721 const SrcColView& src_col,
1722 const size_t numCols,
1723 const Op& op)
1724 {
1725
1726 static_assert(Kokkos::SpaceAccessibility<
1727 ExecutionSpace, typename DstView::memory_space>::accessible,
1728 "ExecutionSpace must be able to access DstView");
1729
1730 using range_type = Kokkos::RangePolicy<ExecutionSpace, size_type>;
1731 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1732 Kokkos::parallel_for
1733 ("Tpetra::MultiVector permute multicol var stride",
1734 range_type (space, 0, n),
1735 PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1736 dst_col, src_col, numCols, op));
1737 }
1738 };
1739
1740// clang-format on
1741// To do: Add enable_if<> restrictions on DstView::rank == 1,
1742// SrcView::rank == 2
1743template <typename ExecutionSpace, typename DstView, typename SrcView,
1744 typename DstIdxView, typename SrcIdxView, typename DstColView,
1745 typename SrcColView, typename Op>
1746void permute_array_multi_column_variable_stride(
1747 const ExecutionSpace &space, const DstView &dst, const SrcView &src,
1748 const DstIdxView &dst_idx, const SrcIdxView &src_idx,
1749 const DstColView &dst_col, const SrcColView &src_col, size_t numCols,
1750 const Op &op) {
1751 PermuteArrayMultiColumnVariableStride<DstView, SrcView, DstIdxView,
1752 SrcIdxView, DstColView, SrcColView,
1753 Op>::permute(space, dst, src, dst_idx,
1754 src_idx, dst_col, src_col,
1755 numCols, op);
1756}
1757// clang-format off
1758
1759 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1760 // SrcView::rank == 2
1761 template <typename DstView, typename SrcView,
1762 typename DstIdxView, typename SrcIdxView,
1763 typename DstColView, typename SrcColView, typename Op>
1764 void permute_array_multi_column_variable_stride(const DstView& dst,
1765 const SrcView& src,
1766 const DstIdxView& dst_idx,
1767 const SrcIdxView& src_idx,
1768 const DstColView& dst_col,
1769 const SrcColView& src_col,
1770 size_t numCols,
1771 const Op& op) {
1772 using execution_space = typename DstView::execution_space;
1773 PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1774 DstIdxView,SrcIdxView,DstColView,SrcColView,Op>::permute(
1775 execution_space(), dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
1776 }
1777
1778} // Details namespace
1779} // KokkosRefactor namespace
1780} // Tpetra namespace
1781
1782#endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...