Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_ExecPolicy.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
19static_assert(false,
20 "Including non-public Kokkos header files is not allowed.");
21#endif
22#ifndef KOKKOS_EXECPOLICY_HPP
23#define KOKKOS_EXECPOLICY_HPP
24
25#include <Kokkos_Core_fwd.hpp>
26#include <impl/Kokkos_Traits.hpp>
27#include <impl/Kokkos_Error.hpp>
28#include <impl/Kokkos_AnalyzePolicy.hpp>
29#include <Kokkos_Concepts.hpp>
30#include <typeinfo>
31
32//----------------------------------------------------------------------------
33
34namespace Kokkos {
35
36struct ParallelForTag {};
37struct ParallelScanTag {};
38struct ParallelReduceTag {};
39
40struct ChunkSize {
41 int value;
42 ChunkSize(int value_) : value(value_) {}
43};
44
66template <class... Properties>
67class RangePolicy : public Impl::PolicyTraits<Properties...> {
68 public:
69 using traits = Impl::PolicyTraits<Properties...>;
70
71 private:
72 typename traits::execution_space m_space;
73 typename traits::index_type m_begin;
74 typename traits::index_type m_end;
75 typename traits::index_type m_granularity;
76 typename traits::index_type m_granularity_mask;
77
78 template <class... OtherProperties>
79 friend class RangePolicy;
80
81 public:
83 using execution_policy = RangePolicy<Properties...>;
84 using member_type = typename traits::index_type;
85 using index_type = typename traits::index_type;
86
87 KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const {
88 return m_space;
89 }
90 KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
91 KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
92
93 // TODO: find a better workaround for Clangs weird instantiation order
94 // This thing is here because of an instantiation error, where the RangePolicy
95 // is inserted into FunctorValue Traits, which tries decltype on the operator.
96 // It tries to do this even though the first argument of parallel for clearly
97 // doesn't match.
98 void operator()(const int&) const {}
99
100 template <class... OtherProperties>
101 RangePolicy(const RangePolicy<OtherProperties...>& p)
102 : traits(p), // base class may contain data such as desired occupancy
103 m_space(p.m_space),
104 m_begin(p.m_begin),
105 m_end(p.m_end),
106 m_granularity(p.m_granularity),
107 m_granularity_mask(p.m_granularity_mask) {}
108
109 inline RangePolicy()
110 : m_space(),
111 m_begin(0),
112 m_end(0),
113 m_granularity(0),
114 m_granularity_mask(0) {}
115
117 inline RangePolicy(const typename traits::execution_space& work_space,
118 const member_type work_begin, const member_type work_end)
119 : m_space(work_space),
120 m_begin(work_begin < work_end ? work_begin : 0),
121 m_end(work_begin < work_end ? work_end : 0),
122 m_granularity(0),
123 m_granularity_mask(0) {
124 set_auto_chunk_size();
125 }
126
128 inline RangePolicy(const member_type work_begin, const member_type work_end)
129 : RangePolicy(typename traits::execution_space(), work_begin, work_end) {
130 set_auto_chunk_size();
131 }
132
134 template <class... Args>
135 inline RangePolicy(const typename traits::execution_space& work_space,
136 const member_type work_begin, const member_type work_end,
137 Args... args)
138 : m_space(work_space),
139 m_begin(work_begin < work_end ? work_begin : 0),
140 m_end(work_begin < work_end ? work_end : 0),
141 m_granularity(0),
142 m_granularity_mask(0) {
143 set_auto_chunk_size();
144 set(args...);
145 }
146
148 template <class... Args>
149 inline RangePolicy(const member_type work_begin, const member_type work_end,
150 Args... args)
151 : RangePolicy(typename traits::execution_space(), work_begin, work_end) {
152 set_auto_chunk_size();
153 set(args...);
154 }
155
156 private:
157 inline void set() {}
158
159 public:
160 template <class... Args>
161 inline void set(Args...) {
162 static_assert(
163 0 == sizeof...(Args),
164 "Kokkos::RangePolicy: unhandled constructor arguments encountered.");
165 }
166
167 template <class... Args>
168 inline void set(const ChunkSize& chunksize, Args... args) {
169 m_granularity = chunksize.value;
170 m_granularity_mask = m_granularity - 1;
171 set(args...);
172 }
173
174 public:
176 inline member_type chunk_size() const { return m_granularity; }
177
179 inline RangePolicy& set_chunk_size(int chunk_size) {
180 m_granularity = chunk_size;
181 m_granularity_mask = m_granularity - 1;
182 return *this;
183 }
184
185 private:
187 inline void set_auto_chunk_size() {
188#ifdef KOKKOS_ENABLE_SYCL
189 if (std::is_same_v<typename traits::execution_space,
190 Kokkos::Experimental::SYCL>) {
191 // chunk_size <=1 lets the compiler choose the workgroup size when
192 // launching kernels
193 m_granularity = 1;
194 m_granularity_mask = 0;
195 return;
196 }
197#endif
198 auto concurrency = static_cast<int64_t>(m_space.concurrency());
199 if (concurrency == 0) concurrency = 1;
200
201 if (m_granularity > 0) {
202 if (!Impl::is_integral_power_of_two(m_granularity))
203 Kokkos::abort("RangePolicy blocking granularity must be power of two");
204 }
205
206 int64_t new_chunk_size = 1;
207 while (new_chunk_size * 100 * concurrency <
208 static_cast<int64_t>(m_end - m_begin))
209 new_chunk_size *= 2;
210 if (new_chunk_size < 128) {
211 new_chunk_size = 1;
212 while ((new_chunk_size * 40 * concurrency <
213 static_cast<int64_t>(m_end - m_begin)) &&
214 (new_chunk_size < 128))
215 new_chunk_size *= 2;
216 }
217 m_granularity = new_chunk_size;
218 m_granularity_mask = m_granularity - 1;
219 }
220
221 public:
226 struct WorkRange {
227 using work_tag = typename RangePolicy<Properties...>::work_tag;
228 using member_type = typename RangePolicy<Properties...>::member_type;
229
230 KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
231 KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
232
237 KOKKOS_INLINE_FUNCTION
238 WorkRange(const RangePolicy& range, const int part_rank,
239 const int part_size)
240 : m_begin(0), m_end(0) {
241 if (part_size) {
242 // Split evenly among partitions, then round up to the granularity.
243 const member_type work_part =
244 ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
245 range.m_granularity_mask) &
246 ~member_type(range.m_granularity_mask);
247
248 m_begin = range.begin() + work_part * part_rank;
249 m_end = m_begin + work_part;
250
251 if (range.end() < m_begin) m_begin = range.end();
252 if (range.end() < m_end) m_end = range.end();
253 }
254 }
255
256 private:
257 member_type m_begin;
258 member_type m_end;
259 WorkRange();
260 WorkRange& operator=(const WorkRange&);
261 };
262};
263
264} // namespace Kokkos
265
266//----------------------------------------------------------------------------
267//----------------------------------------------------------------------------
268
269namespace Kokkos {
270
271namespace Impl {
272
273template <class ExecSpace, class... Properties>
274class TeamPolicyInternal : public Impl::PolicyTraits<Properties...> {
275 private:
276 using traits = Impl::PolicyTraits<Properties...>;
277
278 public:
279 using index_type = typename traits::index_type;
280
281 //----------------------------------------
292 template <class FunctorType>
293 static int team_size_max(const FunctorType&);
294
305 template <class FunctorType>
306 static int team_size_recommended(const FunctorType&);
307
308 template <class FunctorType>
309 static int team_size_recommended(const FunctorType&, const int&);
310
311 template <class FunctorType>
312 int team_size_recommended(const FunctorType& functor,
313 const int vector_length);
314
315 //----------------------------------------
317 TeamPolicyInternal(const typename traits::execution_space&,
318 int league_size_request, int team_size_request,
319 int vector_length_request = 1);
320
321 TeamPolicyInternal(const typename traits::execution_space&,
322 int league_size_request, const Kokkos::AUTO_t&,
323 int vector_length_request = 1);
324
327 TeamPolicyInternal(int league_size_request, int team_size_request,
328 int vector_length_request = 1);
329
330 TeamPolicyInternal(int league_size_request, const Kokkos::AUTO_t&,
331 int vector_length_request = 1);
332
333 /* TeamPolicyInternal( int league_size_request , int team_size_request );
334
335 TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & );*/
336
342 KOKKOS_INLINE_FUNCTION int league_size() const;
343
349 KOKKOS_INLINE_FUNCTION int team_size() const;
350
353 inline bool impl_auto_team_size() const;
356 inline bool impl_auto_vector_length() const;
357
358 static int vector_length_max();
359
360 KOKKOS_INLINE_FUNCTION int impl_vector_length() const;
361
362 inline typename traits::index_type chunk_size() const;
363
364 inline TeamPolicyInternal& set_chunk_size(int chunk_size);
365
369 struct member_type {
371 KOKKOS_INLINE_FUNCTION
372 typename traits::execution_space::scratch_memory_space team_shmem() const;
373
375 KOKKOS_INLINE_FUNCTION int league_rank() const;
376
378 KOKKOS_INLINE_FUNCTION int league_size() const;
379
381 KOKKOS_INLINE_FUNCTION int team_rank() const;
382
384 KOKKOS_INLINE_FUNCTION int team_size() const;
385
387 KOKKOS_INLINE_FUNCTION void team_barrier() const;
388
391 template <class JoinOp>
392 KOKKOS_INLINE_FUNCTION typename JoinOp::value_type team_reduce(
393 const typename JoinOp::value_type, const JoinOp&) const;
394
400 template <typename Type>
401 KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value) const;
402
412 template <typename Type>
413 KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value,
414 Type* const global_accum) const;
415 };
416};
417
418struct PerTeamValue {
419 size_t value;
420 PerTeamValue(size_t arg);
421};
422
423struct PerThreadValue {
424 size_t value;
425 PerThreadValue(size_t arg);
426};
427
428template <class iType, class... Args>
429struct ExtractVectorLength {
430 static inline iType value(
431 std::enable_if_t<std::is_integral<iType>::value, iType> val, Args...) {
432 return val;
433 }
434 static inline std::enable_if_t<!std::is_integral<iType>::value, int> value(
435 std::enable_if_t<!std::is_integral<iType>::value, iType>, Args...) {
436 return 1;
437 }
438};
439
440template <class iType, class... Args>
441inline std::enable_if_t<std::is_integral<iType>::value, iType>
442extract_vector_length(iType val, Args...) {
443 return val;
444}
445
446template <class iType, class... Args>
447inline std::enable_if_t<!std::is_integral<iType>::value, int>
448extract_vector_length(iType, Args...) {
449 return 1;
450}
451
452} // namespace Impl
453
454Impl::PerTeamValue PerTeam(const size_t& arg);
455Impl::PerThreadValue PerThread(const size_t& arg);
456
457struct ScratchRequest {
458 int level;
459
460 size_t per_team;
461 size_t per_thread;
462
463 inline ScratchRequest(const int& level_,
464 const Impl::PerTeamValue& team_value) {
465 level = level_;
466 per_team = team_value.value;
467 per_thread = 0;
468 }
469
470 inline ScratchRequest(const int& level_,
471 const Impl::PerThreadValue& thread_value) {
472 level = level_;
473 per_team = 0;
474 per_thread = thread_value.value;
475 }
476
477 inline ScratchRequest(const int& level_, const Impl::PerTeamValue& team_value,
478 const Impl::PerThreadValue& thread_value) {
479 level = level_;
480 per_team = team_value.value;
481 per_thread = thread_value.value;
482 }
483
484 inline ScratchRequest(const int& level_,
485 const Impl::PerThreadValue& thread_value,
486 const Impl::PerTeamValue& team_value) {
487 level = level_;
488 per_team = team_value.value;
489 per_thread = thread_value.value;
490 }
491};
492
493// Throws a runtime exception if level is not `0` or `1`
494void team_policy_check_valid_storage_level_argument(int level);
495
522template <class... Properties>
523class TeamPolicy
524 : public Impl::TeamPolicyInternal<
525 typename Impl::PolicyTraits<Properties...>::execution_space,
526 Properties...> {
527 using internal_policy = Impl::TeamPolicyInternal<
528 typename Impl::PolicyTraits<Properties...>::execution_space,
529 Properties...>;
530
531 template <class... OtherProperties>
532 friend class TeamPolicy;
533
534 public:
535 using traits = Impl::PolicyTraits<Properties...>;
536
537 using execution_policy = TeamPolicy<Properties...>;
538
539 TeamPolicy() : internal_policy(0, AUTO) {}
540
542 TeamPolicy(const typename traits::execution_space& space_,
543 int league_size_request, int team_size_request,
544 int vector_length_request = 1)
545 : internal_policy(space_, league_size_request, team_size_request,
546 vector_length_request) {}
547
548 TeamPolicy(const typename traits::execution_space& space_,
549 int league_size_request, const Kokkos::AUTO_t&,
550 int vector_length_request = 1)
551 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
552 vector_length_request) {}
553
554 TeamPolicy(const typename traits::execution_space& space_,
555 int league_size_request, const Kokkos::AUTO_t&,
556 const Kokkos::AUTO_t&)
557 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
558 Kokkos::AUTO()) {}
559 TeamPolicy(const typename traits::execution_space& space_,
560 int league_size_request, const int team_size_request,
561 const Kokkos::AUTO_t&)
562 : internal_policy(space_, league_size_request, team_size_request,
563 Kokkos::AUTO()) {}
566 TeamPolicy(int league_size_request, int team_size_request,
567 int vector_length_request = 1)
568 : internal_policy(league_size_request, team_size_request,
569 vector_length_request) {}
570
571 TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
572 int vector_length_request = 1)
573 : internal_policy(league_size_request, Kokkos::AUTO(),
574 vector_length_request) {}
575
576 TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
577 const Kokkos::AUTO_t&)
578 : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
579 TeamPolicy(int league_size_request, const int team_size_request,
580 const Kokkos::AUTO_t&)
581 : internal_policy(league_size_request, team_size_request,
582 Kokkos::AUTO()) {}
583
584 template <class... OtherProperties>
585 TeamPolicy(const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
586 // Cannot call converting constructor in the member initializer list because
587 // it is not a direct base.
588 internal_policy::traits::operator=(p);
589 }
590
591 private:
592 TeamPolicy(const internal_policy& p) : internal_policy(p) {}
593
594 public:
595 inline TeamPolicy& set_chunk_size(int chunk) {
596 static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)),
597 internal_policy&>::value,
598 "internal set_chunk_size should return a reference");
599 return static_cast<TeamPolicy&>(internal_policy::set_chunk_size(chunk));
600 }
601
602 inline TeamPolicy& set_scratch_size(const int& level,
603 const Impl::PerTeamValue& per_team) {
604 static_assert(std::is_same<decltype(internal_policy::set_scratch_size(
605 level, per_team)),
606 internal_policy&>::value,
607 "internal set_chunk_size should return a reference");
608
609 team_policy_check_valid_storage_level_argument(level);
610 return static_cast<TeamPolicy&>(
611 internal_policy::set_scratch_size(level, per_team));
612 }
613 inline TeamPolicy& set_scratch_size(const int& level,
614 const Impl::PerThreadValue& per_thread) {
615 team_policy_check_valid_storage_level_argument(level);
616 return static_cast<TeamPolicy&>(
617 internal_policy::set_scratch_size(level, per_thread));
618 }
619 inline TeamPolicy& set_scratch_size(const int& level,
620 const Impl::PerTeamValue& per_team,
621 const Impl::PerThreadValue& per_thread) {
622 team_policy_check_valid_storage_level_argument(level);
623 return static_cast<TeamPolicy&>(
624 internal_policy::set_scratch_size(level, per_team, per_thread));
625 }
626 inline TeamPolicy& set_scratch_size(const int& level,
627 const Impl::PerThreadValue& per_thread,
628 const Impl::PerTeamValue& per_team) {
629 team_policy_check_valid_storage_level_argument(level);
630 return static_cast<TeamPolicy&>(
631 internal_policy::set_scratch_size(level, per_team, per_thread));
632 }
633};
634
635namespace Impl {
636
637template <typename iType, class TeamMemberType>
638struct TeamThreadRangeBoundariesStruct {
639 private:
640 KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
641 const iType& arg_end,
642 const iType& arg_rank,
643 const iType& arg_size) {
644 return arg_begin +
645 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
646 }
647
648 KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
649 const iType& arg_end,
650 const iType& arg_rank,
651 const iType& arg_size) {
652 const iType end_ =
653 arg_begin +
654 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
655 return end_ < arg_end ? end_ : arg_end;
656 }
657
658 public:
659 using index_type = iType;
660 const iType start;
661 const iType end;
662 enum { increment = 1 };
663 const TeamMemberType& thread;
664
665 KOKKOS_INLINE_FUNCTION
666 TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
667 const iType& arg_end)
668 : start(
669 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
670 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
671 thread(arg_thread) {}
672
673 KOKKOS_INLINE_FUNCTION
674 TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
675 const iType& arg_begin, const iType& arg_end)
676 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
677 arg_thread.team_size())),
678 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
679 arg_thread.team_size())),
680 thread(arg_thread) {}
681};
682
683template <typename iType, class TeamMemberType>
684struct TeamVectorRangeBoundariesStruct {
685 private:
686 KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
687 const iType& arg_end,
688 const iType& arg_rank,
689 const iType& arg_size) {
690 return arg_begin +
691 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
692 }
693
694 KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
695 const iType& arg_end,
696 const iType& arg_rank,
697 const iType& arg_size) {
698 const iType end_ =
699 arg_begin +
700 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
701 return end_ < arg_end ? end_ : arg_end;
702 }
703
704 public:
705 using index_type = iType;
706 const iType start;
707 const iType end;
708 enum { increment = 1 };
709 const TeamMemberType& thread;
710
711 KOKKOS_INLINE_FUNCTION
712 TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
713 const iType& arg_end)
714 : start(
715 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
716 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
717 thread(arg_thread) {}
718
719 KOKKOS_INLINE_FUNCTION
720 TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
721 const iType& arg_begin, const iType& arg_end)
722 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
723 arg_thread.team_size())),
724 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
725 arg_thread.team_size())),
726 thread(arg_thread) {}
727};
728
729template <typename iType, class TeamMemberType>
730struct ThreadVectorRangeBoundariesStruct {
731 using index_type = iType;
732 const index_type start;
733 const index_type end;
734 enum { increment = 1 };
735
736 KOKKOS_INLINE_FUNCTION
737 constexpr ThreadVectorRangeBoundariesStruct(const TeamMemberType,
738 const index_type& count) noexcept
739 : start(static_cast<index_type>(0)), end(count) {}
740
741 KOKKOS_INLINE_FUNCTION
742 constexpr ThreadVectorRangeBoundariesStruct(
743 const TeamMemberType, const index_type& arg_begin,
744 const index_type& arg_end) noexcept
745 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
746};
747
748template <class TeamMemberType>
749struct ThreadSingleStruct {
750 const TeamMemberType& team_member;
751 KOKKOS_INLINE_FUNCTION
752 ThreadSingleStruct(const TeamMemberType& team_member_)
753 : team_member(team_member_) {}
754};
755
756template <class TeamMemberType>
757struct VectorSingleStruct {
758 const TeamMemberType& team_member;
759 KOKKOS_INLINE_FUNCTION
760 VectorSingleStruct(const TeamMemberType& team_member_)
761 : team_member(team_member_) {}
762};
763
764} // namespace Impl
765
773template <typename iType, class TeamMemberType, class _never_use_this_overload>
774KOKKOS_INLINE_FUNCTION_DELETED
775 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
776 TeamThreadRange(const TeamMemberType&, const iType& count) = delete;
777
785template <typename iType1, typename iType2, class TeamMemberType,
786 class _never_use_this_overload>
787KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
788 std::common_type_t<iType1, iType2>, TeamMemberType>
789TeamThreadRange(const TeamMemberType&, const iType1& begin,
790 const iType2& end) = delete;
791
799template <typename iType, class TeamMemberType, class _never_use_this_overload>
800KOKKOS_INLINE_FUNCTION_DELETED
801 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
802 TeamVectorRange(const TeamMemberType&, const iType& count) = delete;
803
811template <typename iType1, typename iType2, class TeamMemberType,
812 class _never_use_this_overload>
813KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
814 std::common_type_t<iType1, iType2>, TeamMemberType>
815TeamVectorRange(const TeamMemberType&, const iType1& begin,
816 const iType2& end) = delete;
817
825template <typename iType, class TeamMemberType, class _never_use_this_overload>
826KOKKOS_INLINE_FUNCTION_DELETED
827 Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
828 ThreadVectorRange(const TeamMemberType&, const iType& count) = delete;
829
830template <typename iType1, typename iType2, class TeamMemberType,
831 class _never_use_this_overload>
832KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
833 std::common_type_t<iType1, iType2>, TeamMemberType>
834ThreadVectorRange(const TeamMemberType&, const iType1& arg_begin,
835 const iType2& arg_end) = delete;
836
837namespace Impl {
838
839enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
840enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
841enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
842enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
843
844template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
845struct HostBasedNestLevel;
846
847template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
848struct AcceleratorBasedNestLevel;
849
850// ThreadAndVectorNestLevel determines on which nested level parallelization
851// happens.
852// - Rank is Kokkos::Rank<TotalNestLevel, Iter>
853// - TotalNestLevel is the total number of loop nests
854// - Iter is whether to go forward or backward through ranks (i.e. the
855// iteration order for MDRangePolicy)
856// - ThreadAndVector determines whether both vector and thread parallelism is
857// in use
858template <typename Rank, typename ExecSpace,
859 TeamMDRangeThreadAndVector ThreadAndVector>
860struct ThreadAndVectorNestLevel;
861
862struct NoReductionTag {};
863
864template <typename Rank, typename TeamMDPolicy, typename Lambda,
865 typename ReductionValueType>
866KOKKOS_INLINE_FUNCTION void md_parallel_impl(TeamMDPolicy const& policy,
867 Lambda const& lambda,
868 ReductionValueType&& val);
869} // namespace Impl
870
871template <typename Rank, typename TeamHandle>
872struct TeamThreadMDRange;
873
874template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
875struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
876 using NestLevelType = int;
877 using BoundaryType = int;
878 using TeamHandleType = TeamHandle;
879 using ExecutionSpace = typename TeamHandleType::execution_space;
880 using ArrayLayout = typename ExecutionSpace::array_layout;
881
882 static constexpr NestLevelType total_nest_level =
883 Rank<N, OuterDir, InnerDir>::rank;
884 static constexpr Iterate iter = OuterDir;
885 static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
886 static constexpr auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
887
888 static constexpr Iterate direction =
889 OuterDir == Iterate::Default
890 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
891 : iter;
892
893 template <class... Args>
894 KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType const& team_, Args&&... args)
895 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
896 static_assert(sizeof...(Args) == total_nest_level);
897 }
898
899 TeamHandleType const& team;
900 BoundaryType boundaries[total_nest_level];
901};
902
903template <typename TeamHandle, typename... Args>
904TeamThreadMDRange(TeamHandle const&, Args&&...)
905 ->TeamThreadMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
906
907template <typename Rank, typename TeamHandle>
908struct ThreadVectorMDRange;
909
910template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
911struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
912 using NestLevelType = int;
913 using BoundaryType = int;
914 using TeamHandleType = TeamHandle;
915 using ExecutionSpace = typename TeamHandleType::execution_space;
916 using ArrayLayout = typename ExecutionSpace::array_layout;
917
918 static constexpr NestLevelType total_nest_level =
919 Rank<N, OuterDir, InnerDir>::rank;
920 static constexpr Iterate iter = OuterDir;
921 static constexpr auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
922 static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
923
924 static constexpr Iterate direction =
925 OuterDir == Iterate::Default
926 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
927 : iter;
928
929 template <class... Args>
930 KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType const& team_,
931 Args&&... args)
932 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
933 static_assert(sizeof...(Args) == total_nest_level);
934 }
935
936 TeamHandleType const& team;
937 BoundaryType boundaries[total_nest_level];
938};
939
940template <typename TeamHandle, typename... Args>
941ThreadVectorMDRange(TeamHandle const&, Args&&...)
942 ->ThreadVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
943
944template <typename Rank, typename TeamHandle>
945struct TeamVectorMDRange;
946
947template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
948struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
949 using NestLevelType = int;
950 using BoundaryType = int;
951 using TeamHandleType = TeamHandle;
952 using ExecutionSpace = typename TeamHandleType::execution_space;
953 using ArrayLayout = typename ExecutionSpace::array_layout;
954
955 static constexpr NestLevelType total_nest_level =
956 Rank<N, OuterDir, InnerDir>::rank;
957 static constexpr Iterate iter = OuterDir;
958 static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
959 static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
960
961 static constexpr Iterate direction =
962 iter == Iterate::Default
963 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
964 : iter;
965
966 template <class... Args>
967 KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType const& team_,
968 Args&&... args)
969 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
970 static_assert(sizeof...(Args) == total_nest_level);
971 }
972
973 TeamHandleType const& team;
974 BoundaryType boundaries[total_nest_level];
975};
976
977template <typename TeamHandle, typename... Args>
978TeamVectorMDRange(TeamHandle const&, Args&&...)
979 ->TeamVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
980
981template <typename Rank, typename TeamHandle, typename Lambda,
982 typename ReducerValueType>
983KOKKOS_INLINE_FUNCTION void parallel_reduce(
984 TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
985 ReducerValueType& val) {
986 Impl::md_parallel_impl<Rank>(policy, lambda, val);
987}
988
989template <typename Rank, typename TeamHandle, typename Lambda>
990KOKKOS_INLINE_FUNCTION void parallel_for(
991 TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
992 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
993}
994
995template <typename Rank, typename TeamHandle, typename Lambda,
996 typename ReducerValueType>
997KOKKOS_INLINE_FUNCTION void parallel_reduce(
998 ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
999 ReducerValueType& val) {
1000 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1001}
1002
1003template <typename Rank, typename TeamHandle, typename Lambda>
1004KOKKOS_INLINE_FUNCTION void parallel_for(
1005 ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1006 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1007}
1008
1009template <typename Rank, typename TeamHandle, typename Lambda,
1010 typename ReducerValueType>
1011KOKKOS_INLINE_FUNCTION void parallel_reduce(
1012 TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1013 ReducerValueType& val) {
1014 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1015}
1016
1017template <typename Rank, typename TeamHandle, typename Lambda>
1018KOKKOS_INLINE_FUNCTION void parallel_for(
1019 TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1020 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1021}
1022
1023namespace Impl {
1024
1025template <typename FunctorType, typename TagType,
1026 bool HasTag = !std::is_void<TagType>::value>
1027struct ParallelConstructName;
1028
1029template <typename FunctorType, typename TagType>
1030struct ParallelConstructName<FunctorType, TagType, true> {
1031 ParallelConstructName(std::string const& label) : label_ref(label) {
1032 if (label.empty()) {
1033 default_name = std::string(typeid(FunctorType).name()) + "/" +
1034 typeid(TagType).name();
1035 }
1036 }
1037 std::string const& get() {
1038 return (label_ref.empty()) ? default_name : label_ref;
1039 }
1040 std::string const& label_ref;
1041 std::string default_name;
1042};
1043
1044template <typename FunctorType, typename TagType>
1045struct ParallelConstructName<FunctorType, TagType, false> {
1046 ParallelConstructName(std::string const& label) : label_ref(label) {
1047 if (label.empty()) {
1048 default_name = std::string(typeid(FunctorType).name());
1049 }
1050 }
1051 std::string const& get() {
1052 return (label_ref.empty()) ? default_name : label_ref;
1053 }
1054 std::string const& label_ref;
1055 std::string default_name;
1056};
1057
1058} // namespace Impl
1059
1060} // namespace Kokkos
1061
1062namespace Kokkos {
1063
1064namespace Impl {
1065
1066template <class PatternTag, class... Args>
1067struct PatternImplSpecializationFromTag;
1068
1069template <class... Args>
1070struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
1071 : type_identity<ParallelFor<Args...>> {};
1072
1073template <class... Args>
1074struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
1075 : type_identity<ParallelReduce<Args...>> {};
1076
1077template <class... Args>
1078struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
1079 : type_identity<ParallelScan<Args...>> {};
1080
1081template <class PatternImpl>
1082struct PatternTagFromImplSpecialization;
1083
1084template <class... Args>
1085struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1086 : type_identity<ParallelForTag> {};
1087
1088template <class... Args>
1089struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1090 : type_identity<ParallelReduceTag> {};
1091
1092template <class... Args>
1093struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1094 : type_identity<ParallelScanTag> {};
1095
1096} // end namespace Impl
1097
1098} // namespace Kokkos
1099#endif /* #define KOKKOS_EXECPOLICY_HPP */
Implementation of the ParallelFor operator that has a partial specialization for the device.
Implementation detail of parallel_reduce.
Implementation detail of parallel_scan.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
member_type chunk_size() const
return chunk_size
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end, Args... args)
Total range.
RangePolicy(const member_type work_begin, const member_type work_end, Args... args)
Total range.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
Execution policy for parallel work over a league of teams of threads.
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
ScopeGuard Some user scope issues have been identified with some Kokkos::finalize calls; ScopeGuard a...
Parallel execution of a functor calls the functor once with each member of the execution policy.
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value, Type *const global_accum) const
Intra-team exclusive prefix sum with team_rank() ordering with intra-team non-deterministic ordering ...
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
Subrange for a partition's rank and size.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.