Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels_MP_Vector.hpp
Go to the documentation of this file.
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP
45#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP
46
47//----------------------------------------------------------------------------
48// Specializations of Tpetra::MultiVector pack/unpack kernels for MP::Vector
49//----------------------------------------------------------------------------
50
51#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
54
55namespace Tpetra {
56namespace KokkosRefactor {
57namespace Details {
58
59 // Functors for implementing packAndPrepare and unpackAndCombine
60 // through parallel_for
61
62 template <typename DstView, typename SrcView, typename IdxView>
64 DstView, SrcView, IdxView,
65 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
66 Kokkos::is_view_mp_vector<SrcView>::value >::type >
67 {
68 typedef typename DstView::execution_space execution_space;
69 typedef typename execution_space::size_type size_type;
70
71 DstView dst;
72 SrcView src;
73 IdxView idx;
74 size_t col;
75
76 PackArraySingleColumn(const DstView& dst_,
77 const SrcView& src_,
78 const IdxView& idx_,
79 size_t col_) :
80 dst(dst_), src(src_), idx(idx_), col(col_) {}
81
82 KOKKOS_INLINE_FUNCTION
83 void operator()( const size_type k ) const {
84 dst(k) = src(idx(k), col);
85 }
86
87 KOKKOS_INLINE_FUNCTION
88 void operator()( const size_type k, const size_type tidx ) const {
89 dst(k).fastAccessCoeff(tidx) = src(idx(k), col).fastAccessCoeff(tidx);
90 }
91
92 static void pack(const DstView& dst,
93 const SrcView& src,
94 const IdxView& idx,
95 size_t col,
96 const execution_space &space) {
97 Kokkos::parallel_for(
99 space, idx.size(), Kokkos::dimension_scalar(dst) ),
101 }
102 };
103
104 template <typename DstView, typename SrcView, typename IdxView>
106 DstView, SrcView, IdxView,
107 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
108 Kokkos::is_view_mp_vector<SrcView>::value >::type >
109 {
110 typedef typename DstView::execution_space execution_space;
111 typedef typename execution_space::size_type size_type;
112
113 DstView dst;
114 SrcView src;
115 IdxView idx;
116 size_t numCols;
117
118 PackArrayMultiColumn(const DstView& dst_,
119 const SrcView& src_,
120 const IdxView& idx_,
121 size_t numCols_) :
122 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
123
124 KOKKOS_INLINE_FUNCTION
125 void operator()( const size_type k ) const {
126 const typename IdxView::value_type localRow = idx(k);
127 const size_t offset = k*numCols;
128 for (size_t j = 0; j < numCols; ++j)
129 dst(offset + j) = src(localRow, j);
130 }
131
132 KOKKOS_INLINE_FUNCTION
133 void operator()( const size_type k, const size_type tidx ) const {
134 const typename IdxView::value_type localRow = idx(k);
135 const size_t offset = k*numCols;
136 for (size_t j = 0; j < numCols; ++j)
137 dst(offset + j).fastAccessCoeff(tidx) =
138 src(localRow, j).fastAccessCoeff(tidx);
139 }
140
141 static void pack(const DstView& dst,
142 const SrcView& src,
143 const IdxView& idx,
144 size_t numCols,
145 const execution_space &space) {
146 Kokkos::parallel_for(
149 }
150 };
151
152 template <typename DstView, typename SrcView, typename IdxView,
153 typename ColView>
155 DstView, SrcView, IdxView, ColView,
156 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
157 Kokkos::is_view_mp_vector<SrcView>::value >::type >
158 {
159 typedef typename DstView::execution_space execution_space;
160 typedef typename execution_space::size_type size_type;
161
162 DstView dst;
163 SrcView src;
164 IdxView idx;
165 ColView col;
166 size_t numCols;
167
169 const SrcView& src_,
170 const IdxView& idx_,
171 const ColView& col_,
172 size_t numCols_) :
173 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
174
175 KOKKOS_INLINE_FUNCTION
176 void operator()( const size_type k ) const {
177 const typename IdxView::value_type localRow = idx(k);
178 const size_t offset = k*numCols;
179 for (size_t j = 0; j < numCols; ++j)
180 dst(offset + j) = src(localRow, col(j));
181 }
182
183 KOKKOS_INLINE_FUNCTION
184 void operator()( const size_type k, const size_type tidx ) const {
185 const typename IdxView::value_type localRow = idx(k);
186 const size_t offset = k*numCols;
187 for (size_t j = 0; j < numCols; ++j)
188 dst(offset + j).fastAccessCoeff(tidx) =
189 src(localRow, col(j)).fastAccessCoeff(tidx);
190 }
191
192 static void pack(const DstView& dst,
193 const SrcView& src,
194 const IdxView& idx,
195 const ColView& col,
196 size_t numCols,
197 const execution_space &space) {
198 Kokkos::parallel_for(
200 space, idx.size(), Kokkos::dimension_scalar(dst) ),
202 }
203 };
204
205 template <typename ExecutionSpace,
206 typename DstView,
207 typename SrcView,
208 typename IdxView,
209 typename Op>
211 ExecutionSpace, DstView, SrcView, IdxView, Op,
212 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
213 Kokkos::is_view_mp_vector<SrcView>::value >::type >
214 {
215 typedef typename ExecutionSpace::execution_space execution_space;
216 typedef typename execution_space::size_type size_type;
217
218 DstView dst;
219 SrcView src;
220 IdxView idx;
221 Op op;
222 size_t numCols;
223
224 UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
225 const DstView& dst_,
226 const SrcView& src_,
227 const IdxView& idx_,
228 const Op& op_,
229 const size_t numCols_) :
230 dst (dst_),
231 src (src_),
232 idx (idx_),
233 op (op_),
234 numCols (numCols_)
235 {}
236
237 template <class TagType>
238 KOKKOS_INLINE_FUNCTION void
239 operator() (TagType tag, const size_type k) const
240 {
241 const typename IdxView::value_type localRow = idx(k);
242 const size_t offset = k*numCols;
243 for (size_t j = 0; j < numCols; ++j) {
244 op (tag, dst(localRow, j), src(offset+j));
245 }
246 }
247
248 template <class TagType>
249 KOKKOS_INLINE_FUNCTION void
250 operator() (TagType tag, const size_type k, const size_type tidx) const
251 {
252 const typename IdxView::value_type localRow = idx(k);
253 const size_t offset = k*numCols;
254 for (size_t j = 0; j < numCols; ++j) {
255 op (tag, dst(localRow, j).fastAccessCoeff(tidx),
256 src(offset+j).fastAccessCoeff(tidx));
257 }
258 }
259
260 static void
261 unpack (const ExecutionSpace& execSpace,
262 const DstView& dst,
263 const SrcView& src,
264 const IdxView& idx,
265 const Op& op,
266 const size_t numCols,
267 const bool use_atomic_updates)
268 {
269 if (use_atomic_updates) {
270 Kokkos::parallel_for
271 ("Tpetra::MultiVector (Sacado::MP::Vector) unpack (constant stride)",
273 idx.size (), Kokkos::dimension_scalar (dst)),
274 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
275 }
276 else {
277 Kokkos::parallel_for
278 ("Tpetra::MultiVector (Sacado::MP::Vector) unpack (constant stride)",
280 idx.size (), Kokkos::dimension_scalar (dst)),
281 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
282 }
283 }
284 };
285
286 template <typename ExecutionSpace,
287 typename DstView,
288 typename SrcView,
289 typename IdxView,
290 typename ColView,
291 typename Op>
293 ExecutionSpace, DstView, SrcView, IdxView, ColView, Op,
294 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
295 Kokkos::is_view_mp_vector<SrcView>::value >::type >
296 {
297 typedef typename ExecutionSpace::execution_space execution_space;
298 typedef typename execution_space::size_type size_type;
299
300 DstView dst;
301 SrcView src;
302 IdxView idx;
303 ColView col;
304 Op op;
305 size_t numCols;
306
307 UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
308 const DstView& dst_,
309 const SrcView& src_,
310 const IdxView& idx_,
311 const ColView& col_,
312 const Op& op_,
313 const size_t numCols_) :
314 dst (dst_),
315 src (src_),
316 idx (idx_),
317 col (col_),
318 op (op_),
319 numCols (numCols_)
320 {}
321
322 template <class TagType>
323 KOKKOS_INLINE_FUNCTION void
324 operator() (TagType tag, const size_type k) const
325 {
326 const typename IdxView::value_type localRow = idx(k);
327 const size_t offset = k*numCols;
328 for (size_t j = 0; j < numCols; ++j) {
329 op (tag, dst(localRow, col(j)), src(offset+j));
330 }
331 }
332
333 template <class TagType>
334 KOKKOS_INLINE_FUNCTION void
335 operator() (TagType tag, const size_type k, const size_type tidx) const
336 {
337 const typename IdxView::value_type localRow = idx(k);
338 const size_t offset = k*numCols;
339 for (size_t j = 0; j < numCols; ++j) {
340 op (tag, dst(localRow, col(j)).fastAccessCoeff(tidx),
341 src(offset+j).fastAccessCoeff(tidx));
342 }
343 }
344
345 static void
346 unpack (const ExecutionSpace& execSpace,
347 const DstView& dst,
348 const SrcView& src,
349 const IdxView& idx,
350 const ColView& col,
351 const Op& op,
352 const size_t numCols,
353 const bool use_atomic_updates)
354 {
355 if (use_atomic_updates) {
356 Kokkos::parallel_for
357 ("Tpetra::MultiVector unpack (Sacado::MP::Vector) (nonconstant stride)",
359 idx.size (), Kokkos::dimension_scalar (dst)),
361 }
362 else {
363 Kokkos::parallel_for
364 ("Tpetra::MultiVector unpack (Sacado::MP::Vector) (nonconstant stride)",
366 idx.size (), Kokkos::dimension_scalar (dst)),
368 }
369 }
370 };
371
372 template <typename DstView, typename SrcView,
373 typename DstIdxView, typename SrcIdxView, typename Op>
375 DstView, SrcView, DstIdxView, SrcIdxView, Op,
376 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
377 Kokkos::is_view_mp_vector<SrcView>::value >::type >
378 {
379 typedef typename DstView::execution_space execution_space;
380 typedef typename execution_space::size_type size_type;
381
382 DstView dst;
383 SrcView src;
384 DstIdxView dst_idx;
385 SrcIdxView src_idx;
386 size_t numCols;
387 Op op;
388
389 PermuteArrayMultiColumn(const DstView& dst_,
390 const SrcView& src_,
391 const DstIdxView& dst_idx_,
392 const SrcIdxView& src_idx_,
393 size_t numCols_, const Op& op_) :
394 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
395 numCols(numCols_), op(op_) {}
396
397 KOKKOS_INLINE_FUNCTION
398 void operator()( const size_type k ) const {
399 const typename DstIdxView::value_type toRow = dst_idx(k);
400 const typename SrcIdxView::value_type fromRow = src_idx(k);
401 nonatomic_tag tag; // permute does not need atomics
402 for (size_t j = 0; j < numCols; ++j)
403 op(tag, dst(toRow, j),src(fromRow, j));
404 }
405
406 KOKKOS_INLINE_FUNCTION
407 void operator()( const size_type k, const size_type tidx ) const {
408 const typename DstIdxView::value_type toRow = dst_idx(k);
409 const typename SrcIdxView::value_type fromRow = src_idx(k);
410 nonatomic_tag tag; // permute does not need atomics
411 for (size_t j = 0; j < numCols; ++j)
412 op(tag, dst(toRow, j).fastAccessCoeff(tidx),
413 src(fromRow, j).fastAccessCoeff(tidx));
414 }
415
416 template <typename ExecutionSpace>
417 static void permute(const ExecutionSpace &space,
418 const DstView& dst,
419 const SrcView& src,
420 const DstIdxView& dst_idx,
421 const SrcIdxView& src_idx,
422 size_t numCols,
423 const Op& op) {
424 const size_type n = std::min( dst_idx.size(), src_idx.size() );
425 Kokkos::parallel_for(
427 space, n, Kokkos::dimension_scalar(dst) ),
429 }
430 };
431
432 template <typename DstView, typename SrcView,
433 typename DstIdxView, typename SrcIdxView,
434 typename DstColView, typename SrcColView, typename Op>
436 DstView, SrcView, DstIdxView, SrcIdxView, DstColView, SrcColView, Op,
437 typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
438 Kokkos::is_view_mp_vector<SrcView>::value >::type >
439 {
440 typedef typename DstView::execution_space execution_space;
441 typedef typename execution_space::size_type size_type;
442
443 DstView dst;
444 SrcView src;
445 DstIdxView dst_idx;
446 SrcIdxView src_idx;
447 DstColView dst_col;
448 SrcColView src_col;
449 size_t numCols;
450 Op op;
451
453 const SrcView& src_,
454 const DstIdxView& dst_idx_,
455 const SrcIdxView& src_idx_,
456 const DstColView& dst_col_,
457 const SrcColView& src_col_,
458 size_t numCols_,
459 const Op& op_) :
460 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
461 dst_col(dst_col_), src_col(src_col_),
462 numCols(numCols_), op(op_) {}
463
464 KOKKOS_INLINE_FUNCTION
465 void operator()( const size_type k ) const {
466 const typename DstIdxView::value_type toRow = dst_idx(k);
467 const typename SrcIdxView::value_type fromRow = src_idx(k);
468 nonatomic_tag tag; // permute does not need atomics
469 for (size_t j = 0; j < numCols; ++j)
470 op(tag, dst(toRow, dst_col(j)),src(fromRow, src_col(j)));
471 }
472
473 KOKKOS_INLINE_FUNCTION
474 void operator()( const size_type k, const size_type tidx ) const {
475 const typename DstIdxView::value_type toRow = dst_idx(k);
476 const typename SrcIdxView::value_type fromRow = src_idx(k);
477 nonatomic_tag tag; // permute does not need atomics
478 for (size_t j = 0; j < numCols; ++j)
479 op(tag, dst(toRow, dst_col(j)).fastAccessCoeff(tidx),
480 src(fromRow, src_col(j)).fastAccessCoeff(tidx));
481 }
482
483 template <typename ExecutionSpace>
484 static void permute(const ExecutionSpace &space,
485 const DstView& dst,
486 const SrcView& src,
487 const DstIdxView& dst_idx,
488 const SrcIdxView& src_idx,
489 const DstColView& dst_col,
490 const SrcColView& src_col,
491 size_t numCols,
492 const Op& op) {
493 const size_type n = std::min( dst_idx.size(), src_idx.size() );
494 Kokkos::parallel_for(
496 space, n, Kokkos::dimension_scalar(dst) ),
499 }
500 };
501
502} // Details namespace
503} // KokkosRefactor namespace
504} // Tpetra namespace
505
506#endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
Team-based parallel work configuration for Sacado::MP::Vector.
static void permute(const ExecutionSpace &space, const DstView &dst, const SrcView &src, const DstIdxView &dst_idx, const SrcIdxView &src_idx, const DstColView &dst_col, const SrcColView &src_col, size_t numCols, const Op &op)