Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels_UQ_PCE.hpp
Go to the documentation of this file.
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
45#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
46
47//----------------------------------------------------------------------------
48// Specializations of Tpetra::MultiVector pack/unpack kernels for UQ::PCE
49//----------------------------------------------------------------------------
50
51#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
54
55namespace Tpetra {
56namespace KokkosRefactor {
57namespace Details {
58
59 // Functors for implementing packAndPrepare and unpackAndCombine
60 // through parallel_for
61
62 template <typename DstView, typename SrcView, typename IdxView>
64 DstView, SrcView, IdxView,
65 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
66 Kokkos::is_view_uq_pce<SrcView>::value >::type >
67 {
68 typedef typename DstView::execution_space execution_space;
69 typedef typename execution_space::size_type size_type;
70
71 static const unsigned BlockSize = 32;
72
73 DstView dst;
74 SrcView src;
75 IdxView idx;
76 size_t col;
77
78 PackArraySingleColumn(const DstView& dst_,
79 const SrcView& src_,
80 const IdxView& idx_,
81 size_t col_) :
82 dst(dst_), src(src_), idx(idx_), col(col_) {}
83
84 KOKKOS_INLINE_FUNCTION
85 void operator()( const size_type k ) const {
86 dst(k) = src(idx(k), col);
87 }
88
89 KOKKOS_INLINE_FUNCTION
90 void operator()( const size_type k, const size_type tidx ) const {
92 dst(k).fastAccessCoeff(i) = src(idx(k), col).fastAccessCoeff(i);
93 }
94
95 static void pack(const DstView& dst,
96 const SrcView& src,
97 const IdxView& idx,
98 size_t col,
99 const execution_space &space) {
100 Kokkos::parallel_for(
103 }
104 };
105
106 template <typename DstView, typename SrcView, typename IdxView>
108 DstView, SrcView, IdxView,
109 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
110 Kokkos::is_view_uq_pce<SrcView>::value >::type >
111 {
112 typedef typename DstView::execution_space execution_space;
113 typedef typename execution_space::size_type size_type;
114
115 static const unsigned BlockSize = 32;
116
117 DstView dst;
118 SrcView src;
119 IdxView idx;
120 size_t numCols;
121
122 PackArrayMultiColumn(const DstView& dst_,
123 const SrcView& src_,
124 const IdxView& idx_,
125 size_t numCols_) :
126 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
127
128 KOKKOS_INLINE_FUNCTION
129 void operator()( const size_type k ) const {
130 const typename IdxView::value_type localRow = idx(k);
131 const size_t offset = k*numCols;
132 for (size_t j = 0; j < numCols; ++j)
133 dst(offset + j) = src(localRow, j);
134 }
135
136 KOKKOS_INLINE_FUNCTION
137 void operator()( const size_type k, const size_type tidx ) const {
138 const typename IdxView::value_type localRow = idx(k);
139 const size_t offset = k*numCols;
140 for (size_t j = 0; j < numCols; ++j)
142 dst(offset + j).fastAccessCoeff(i) =
143 src(localRow, j).fastAccessCoeff(i);
144 }
145
146 static void pack(const DstView& dst,
147 const SrcView& src,
148 const IdxView& idx,
149 size_t numCols,
150 const execution_space &space) {
151 Kokkos::parallel_for(
154 }
155 };
156
157 template <typename DstView, typename SrcView, typename IdxView,
158 typename ColView>
160 DstView, SrcView, IdxView, ColView,
161 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
162 Kokkos::is_view_uq_pce<SrcView>::value >::type >
163 {
164 typedef typename DstView::execution_space execution_space;
165 typedef typename execution_space::size_type size_type;
166
167 static const unsigned BlockSize = 32;
168
169 DstView dst;
170 SrcView src;
171 IdxView idx;
172 ColView col;
173 size_t numCols;
174
176 const SrcView& src_,
177 const IdxView& idx_,
178 const ColView& col_,
179 size_t numCols_) :
180 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
181
182 KOKKOS_INLINE_FUNCTION
183 void operator()( const size_type k ) const {
184 const typename IdxView::value_type localRow = idx(k);
185 const size_t offset = k*numCols;
186 for (size_t j = 0; j < numCols; ++j)
187 dst(offset + j) = src(localRow, col(j));
188 }
189
190 KOKKOS_INLINE_FUNCTION
191 void operator()( const size_type k, const size_type tidx ) const {
192 const typename IdxView::value_type localRow = idx(k);
193 const size_t offset = k*numCols;
194 for (size_t j = 0; j < numCols; ++j)
196 dst(offset + j).fastAccessCoeff(i) =
197 src(localRow, col(j)).fastAccessCoeff(i);
198 }
199
200 static void pack(const DstView& dst,
201 const SrcView& src,
202 const IdxView& idx,
203 const ColView& col,
204 size_t numCols,
205 const execution_space &space) {
206 Kokkos::parallel_for(
209 }
210 };
211
212 template <typename ExecutionSpace,
213 typename DstView,
214 typename SrcView,
215 typename IdxView,
216 typename Op>
218 ExecutionSpace, DstView, SrcView, IdxView, Op,
219 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
220 Kokkos::is_view_uq_pce<SrcView>::value >::type >
221 {
222 typedef typename ExecutionSpace::execution_space execution_space;
223 typedef typename execution_space::size_type size_type;
224
225 static const unsigned BlockSize = 32;
226
227 DstView dst;
228 SrcView src;
229 IdxView idx;
230 Op op;
231 size_t numCols;
232
233 UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
234 const DstView& dst_,
235 const SrcView& src_,
236 const IdxView& idx_,
237 const Op& op_,
238 const size_t numCols_) :
239 dst (dst_),
240 src (src_),
241 idx (idx_),
242 op (op_),
243 numCols (numCols_)
244 {}
245
246 template <class TagType>
247 KOKKOS_INLINE_FUNCTION void
248 operator() (TagType tag, const size_type k) const
249 {
250 const typename IdxView::value_type localRow = idx(k);
251 const size_t offset = k*numCols;
252 for (size_t j = 0; j < numCols; ++j) {
253 op (tag, dst(localRow, j), src(offset+j));
254 }
255 }
256
257 template <class TagType>
258 KOKKOS_INLINE_FUNCTION void
259 operator() (TagType tag, const size_type k, const size_type tidx) const
260 {
261 const typename IdxView::value_type localRow = idx(k);
262 const size_t offset = k*numCols;
263 for (size_t j = 0; j < numCols; ++j) {
264 for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize) {
265 op (tag, dst(localRow, j).fastAccessCoeff(i),
266 src(offset+j).fastAccessCoeff(i));
267 }
268 }
269 }
270
271 static void
272 unpack (const ExecutionSpace& execSpace,
273 const DstView& dst,
274 const SrcView& src,
275 const IdxView& idx,
276 const Op& op,
277 const size_t numCols,
278 const bool use_atomic_updates)
279 {
280 if (use_atomic_updates) {
281 Kokkos::parallel_for
282 ("Tpetra::MultiVector (Sacado::UQ::PCE) unpack (constant stride)",
284 idx.size (), BlockSize),
285 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
286 }
287 else {
288 Kokkos::parallel_for
289 ("Tpetra::MultiVector (Sacado::UQ::PCE) unpack (constant stride)",
291 idx.size (), BlockSize),
292 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
293 }
294 }
295 };
296
297 template <typename ExecutionSpace,
298 typename DstView,
299 typename SrcView,
300 typename IdxView,
301 typename ColView,
302 typename Op>
304 ExecutionSpace, DstView, SrcView, IdxView, ColView, Op,
305 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
306 Kokkos::is_view_uq_pce<SrcView>::value >::type>
307 {
308 typedef typename ExecutionSpace::execution_space execution_space;
309 typedef typename execution_space::size_type size_type;
310
311 static const unsigned BlockSize = 32;
312
313 DstView dst;
314 SrcView src;
315 IdxView idx;
316 ColView col;
317 Op op;
318 size_t numCols;
319
320 UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
321 const DstView& dst_,
322 const SrcView& src_,
323 const IdxView& idx_,
324 const ColView& col_,
325 const Op& op_,
326 size_t numCols_) :
327 dst(dst_), src(src_), idx(idx_), col(col_), op(op_), numCols(numCols_) {}
328
329 template <class TagType>
330 KOKKOS_INLINE_FUNCTION void
331 operator()( TagType tag, const size_type k ) const {
332 const typename IdxView::value_type localRow = idx(k);
333 const size_t offset = k*numCols;
334 for (size_t j = 0; j < numCols; ++j)
335 op( tag, dst(localRow,col(j)), src(offset+j) );
336 }
337
338 template <class TagType>
339 KOKKOS_INLINE_FUNCTION void
340 operator()( TagType tag, const size_type k, const size_type tidx ) const {
341 const typename IdxView::value_type localRow = idx(k);
342 const size_t offset = k*numCols;
343 for (size_t j = 0; j < numCols; ++j)
345 op( tag, dst(localRow,col(j)).fastAccessCoeff(i),
346 src(offset+j).fastAccessCoeff(i) );
347 }
348
349 static void
350 unpack (const ExecutionSpace& execSpace,
351 const DstView& dst,
352 const SrcView& src,
353 const IdxView& idx,
354 const ColView& col,
355 const Op& op,
356 const size_t numCols,
357 const bool use_atomic_updates)
358 {
359 if (use_atomic_updates) {
360 Kokkos::parallel_for
361 ("Tpetra::MultiVector unpack (Sacado::UQ::PCE) (nonconstant stride)",
363 idx.size (), BlockSize),
365 idx, col, op, numCols));
366 }
367 else {
368 Kokkos::parallel_for
369 ("Tpetra::MultiVector unpack (Sacado::UQ::PCE) (nonconstant stride)",
371 idx.size (), BlockSize),
373 idx, col, op, numCols));
374 }
375 }
376 };
377
378 template <typename DstView, typename SrcView,
379 typename DstIdxView, typename SrcIdxView, typename Op>
381 DstView, SrcView, DstIdxView, SrcIdxView, Op,
382 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
383 Kokkos::is_view_uq_pce<SrcView>::value >::type>
384 {
385 typedef typename DstView::execution_space execution_space;
386 typedef typename execution_space::size_type size_type;
387
388 static const unsigned BlockSize = 32;
389
390 DstView dst;
391 SrcView src;
392 DstIdxView dst_idx;
393 SrcIdxView src_idx;
394 size_t numCols;
395 Op op;
396
397 PermuteArrayMultiColumn(const DstView& dst_,
398 const SrcView& src_,
399 const DstIdxView& dst_idx_,
400 const SrcIdxView& src_idx_,
401 size_t numCols_, const Op& op_) :
402 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
403 numCols(numCols_), op(op_) {}
404
405 KOKKOS_INLINE_FUNCTION
406 void operator()( const size_type k ) const {
407 const typename DstIdxView::value_type toRow = dst_idx(k);
408 const typename SrcIdxView::value_type fromRow = src_idx(k);
409 nonatomic_tag tag; // permute does not need atomics
410 for (size_t j = 0; j < numCols; ++j)
411 op(tag, dst(toRow, j),src(fromRow, j));
412 }
413
414 KOKKOS_INLINE_FUNCTION
415 void operator()( const size_type k, const size_type tidx ) const {
416 const typename DstIdxView::value_type toRow = dst_idx(k);
417 const typename SrcIdxView::value_type fromRow = src_idx(k);
418 nonatomic_tag tag; // permute does not need atomics
419 for (size_t j = 0; j < numCols; ++j)
421 op(tag, dst(toRow, j).fastAccessCoeff(i),
422 src(fromRow, j).fastAccessCoeff(i));
423 }
424
425 template <typename ExecutionSpace>
426 static void permute(const ExecutionSpace &space,
427 const DstView& dst,
428 const SrcView& src,
429 const DstIdxView& dst_idx,
430 const SrcIdxView& src_idx,
431 size_t numCols,
432 const Op& op) {
433 const size_type n = std::min( dst_idx.size(), src_idx.size() );
434 Kokkos::parallel_for(
437 }
438 };
439
440 template <typename DstView, typename SrcView,
441 typename DstIdxView, typename SrcIdxView,
442 typename DstColView, typename SrcColView, typename Op>
444 DstView, SrcView, DstIdxView, SrcIdxView, DstColView, SrcColView, Op,
445 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
446 Kokkos::is_view_uq_pce<SrcView>::value >::type >
447 {
448 typedef typename DstView::execution_space execution_space;
449 typedef typename execution_space::size_type size_type;
450
451 static const unsigned BlockSize = 32;
452
453 DstView dst;
454 SrcView src;
455 DstIdxView dst_idx;
456 SrcIdxView src_idx;
457 DstColView dst_col;
458 SrcColView src_col;
459 size_t numCols;
460 Op op;
461
463 const SrcView& src_,
464 const DstIdxView& dst_idx_,
465 const SrcIdxView& src_idx_,
466 const DstColView& dst_col_,
467 const SrcColView& src_col_,
468 size_t numCols_,
469 const Op& op_) :
470 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
471 dst_col(dst_col_), src_col(src_col_),
472 numCols(numCols_), op(op_) {}
473
474 KOKKOS_INLINE_FUNCTION
475 void operator()( const size_type k ) const {
476 const typename DstIdxView::value_type toRow = dst_idx(k);
477 const typename SrcIdxView::value_type fromRow = src_idx(k);
478 nonatomic_tag tag; // permute does not need atomics
479 for (size_t j = 0; j < numCols; ++j)
480 op(tag, dst(toRow, dst_col(j)),src(fromRow, src_col(j)));
481 }
482
483 KOKKOS_INLINE_FUNCTION
484 void operator()( const size_type k, const size_type tidx ) const {
485 const typename DstIdxView::value_type toRow = dst_idx(k);
486 const typename SrcIdxView::value_type fromRow = src_idx(k);
487 nonatomic_tag tag; // permute does not need atomics
488 for (size_t j = 0; j < numCols; ++j)
490 op(tag, dst(toRow, dst_col(j)).fastAccessCoeff(i),
491 src(fromRow, src_col(j)).fastAccessCoeff(i));
492 }
493
494 template <typename ExecutionSpace>
495 static void permute(const ExecutionSpace &space,
496 const DstView& dst,
497 const SrcView& src,
498 const DstIdxView& dst_idx,
499 const SrcIdxView& src_idx,
500 const DstColView& dst_col,
501 const SrcColView& src_col,
502 size_t numCols,
503 const Op& op) {
504 const size_type n = std::min( dst_idx.size(), src_idx.size() );
505 Kokkos::parallel_for(
509 }
510 };
511
512} // Details namespace
513} // KokkosRefactor namespace
514} // Tpetra namespace
515
516#endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
Team-based parallel work configuration for Sacado::MP::Vector.
static void permute(const ExecutionSpace &space, const DstView &dst, const SrcView &src, const DstIdxView &dst_idx, const SrcIdxView &src_idx, const DstColView &dst_col, const SrcColView &src_col, size_t numCols, const Op &op)