Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_ExecutionSpaces.hpp
Go to the documentation of this file.
1#ifndef TPETRA_DETAILS_EXECUTIONSPACES_HPP
2#define TPETRA_DETAILS_EXECUTIONSPACES_HPP
3
4#include <iostream>
5#include <sstream>
6#include <vector>
7
8#include <Kokkos_Core.hpp>
9
10#include <Teuchos_RCP.hpp>
11
14
30
31#define TPETRA_DETAILS_SPACES_THROW(x) \
32 { \
33 std::stringstream ss; \
34 ss << __FILE__ << ":" << __LINE__ << ": " << x; \
35 throw std::runtime_error(ss.str()); \
36 }
37
38namespace Tpetra {
39namespace Details {
40namespace Spaces {
41
47enum class Priority {
48 low = 0,
49 medium = 1,
50 high = 2,
51 NUM_LEVELS = 3 // not to be used as a priority
52};
53
54#if defined(KOKKOS_ENABLE_CUDA)
55inline void success_or_throw(cudaError_t err, const char *file,
56 const int line) {
57 if (err != cudaSuccess) {
58 std::stringstream ss;
59 ss << file << ":" << line << ": ";
60 ss << cudaGetErrorString(err);
61 throw std::runtime_error(ss.str());
62 }
63}
64#define TPETRA_DETAILS_SPACES_CUDA_RUNTIME(x) \
65 Tpetra::Details::Spaces::success_or_throw((x), __FILE__, __LINE__)
66#endif // KOKKOS_ENABLE_CUDA
67
74void lazy_init();
75
76#if defined(KOKKOS_ENABLE_CUDA)
77struct CudaInfo {
78 bool initialized_;
79 int lowPrio_;
80 int mediumPrio_; // same as CUDA default
81 int highPrio_;
82 cudaEvent_t execSpaceWaitEvent_; // see exec_space_wait
83
84 CudaInfo();
85 ~CudaInfo() = default; // execSpaceWaitEvent_ cleaned up by CUDA deinit
86 CudaInfo(const CudaInfo &other) = delete;
87 CudaInfo(CudaInfo &&other) = delete;
88};
89extern CudaInfo cudaInfo;
90#endif // KOKKOS_ENABLE_CUDA
91
92// Tpetra's managed spaces
93#if defined(KOKKOS_ENABLE_CUDA)
94template <typename Space>
95using IsCuda = std::enable_if_t<std::is_same_v<Space, Kokkos::Cuda>, bool>;
96template <typename Space>
97using NotCuda = std::enable_if_t<!std::is_same_v<Space, Kokkos::Cuda>, bool>;
98template <typename S1, typename S2>
99using BothCuda = std::enable_if_t<
100 std::is_same_v<S1, Kokkos::Cuda> && std::is_same_v<S2, Kokkos::Cuda>, bool>;
101template <typename S1, typename S2>
102using NotBothCuda = std::enable_if_t<!std::is_same_v<S1, Kokkos::Cuda> ||
103 !std::is_same_v<S2, Kokkos::Cuda>,
104 bool>;
105#endif // KOKKOS_ENABLE_CUDA
106
107#if defined(KOKKOS_ENABLE_SERIAL)
109template <typename Space>
110using IsSerial = std::enable_if_t<std::is_same_v<Space, Kokkos::Serial>, bool>;
111#endif // KOKKOS_ENABLE_SERIAL
112
113#if defined(KOKKOS_ENABLE_OPENMP)
115template <typename Space>
116using IsOpenMP = std::enable_if_t<std::is_same_v<Space, Kokkos::OpenMP>, bool>;
117#endif // KOKKOS_ENABLE_OPENMP
118
119#if defined(KOKKOS_ENABLE_HIP)
121template <typename Space>
122using IsHIP = std::enable_if_t<std::is_same_v<Space, Kokkos::HIP>, bool>;
123#endif // KOKKOS_ENABLE_HIP
124
125#if defined(KOKKOS_ENABLE_SYCL)
127template <typename Space>
128using IsSYCL = std::enable_if_t<std::is_same_v<Space, Kokkos::Experimental::SYCL>, bool>;
129#endif // KOKKOS_ENABLE_SYCL
130
136template <typename ExecSpace, Priority priority = Priority::medium
137#if defined(KOKKOS_ENABLE_CUDA)
138 ,
139 NotCuda<ExecSpace> = true
140#endif // KOKKOS_ENABLE_CUDA
141 >
142ExecSpace make_instance() {
143 return ExecSpace();
144}
145
152#if defined(KOKKOS_ENABLE_CUDA)
153template <typename ExecSpace, Priority priority = Priority::medium,
154 IsCuda<ExecSpace> = true>
155Kokkos::Cuda make_instance() {
156 lazy_init(); // CUDA priorities
157 cudaStream_t stream;
158 int prio;
159 switch (priority) {
160 case Priority::high:
161 prio = cudaInfo.highPrio_;
162 break;
163 case Priority::medium:
164 prio = cudaInfo.mediumPrio_;
165 break;
166 case Priority::low:
167 prio = cudaInfo.lowPrio_;
168 break;
169 default:
170 throw std::runtime_error("unexpected static Tpetra Space priority");
171 }
172 TPETRA_DETAILS_SPACES_CUDA_RUNTIME(
173 cudaStreamCreateWithPriority(&stream, cudaStreamNonBlocking, prio));
174 return Kokkos::Cuda(stream, true /*Kokkos will manage this stream*/);
175}
176#endif // KOKKOS_ENABLE_CUDA
177
183template <typename ExecSpace> ExecSpace make_instance(const Priority &prio) {
184 switch (prio) {
185 case Priority::high:
187 case Priority::medium:
189 case Priority::low:
191 default:
192 throw std::runtime_error("unexpected dynamic Tpetra Space priority");
193 }
194}
195
207template <typename ExecSpace> class InstanceLifetimeManager {
208public:
209 using execution_space = ExecSpace;
210 using rcp_type = Teuchos::RCP<const execution_space>;
211
218 template <Priority priority = Priority::medium>
219 rcp_type space_instance(int i = 0) {
221 "Tpetra::Details::Spaces::space_instance");
222
223 constexpr int p = static_cast<int>(priority);
224 static_assert(p < sizeof(instances) / sizeof(instances[0]),
225 "Spaces::Priority enum error");
226
227 if (i < 0) {
228 TPETRA_DETAILS_SPACES_THROW("requested instance id " << i << " (< 0)");
229 }
231 TPETRA_DETAILS_SPACES_THROW(
232 "requested instance id "
234 << ") set by TPETRA_SPACES_ID_WARN_LIMT");
235 }
236
237 // make sure we can store an exec space at index i for priority
238 // not sure what happens in RCP(), so let's explicitly make it null
239 while (size_t(i) >= instances[p].size()) {
240 instances[p].push_back(Teuchos::ENull());
241 }
242
243 /* no exec space instance i of priority p exists.
244 It may have never existed, or all Tpetra objects referencing it have been
245 destructed.
246
247 Create a new RCP<ExecSpace> and internally store a weak
248 reference, so this space will be destructed when all strong references to
249 it are gone, but we can still refer to it as long as it lives to prevent
250 recreating
251 */
252 if (instances[p][i].is_null() || !instances[p][i].is_valid_ptr()) {
253
254 // create a strong RCP to a space
255 rcp_type r = Teuchos::RCP<const execution_space>(
256 new ExecSpace(make_instance<ExecSpace, priority>()));
257
258 // store a weak RCP to the space
259 instances[p][i] = r.create_weak();
260
261 return r; // allow strong rcp to escape so internal weak one does not
262 // immediately go away
263 }
264
265 auto r = instances[p][i].create_strong();
266 return r;
267 }
268
273 for (int i = 0; i < static_cast<int>(Spaces::Priority::NUM_LEVELS); ++i) {
274 for (const rcp_type &rcp : instances[i]) {
275 if (rcp.is_valid_ptr() && !rcp.is_null()) {
276 // avoid throwing in dtor
277 std::cerr << __FILE__ << ":" << __LINE__
278 << " execution space instance survived to "
279 "~InstanceLifetimeManager. strong_count() = "
280 << rcp.strong_count()
281 << ". Did a Tpetra object live past Kokkos::finalize()?"
282 << std::endl;
283 }
284 }
285 }
286 }
287
288private:
289 // one vector of instances for each priority level
290 std::vector<rcp_type>
291 instances[static_cast<int>(Spaces::Priority::NUM_LEVELS)];
292};
293
294#if defined(KOKKOS_ENABLE_CUDA)
295extern InstanceLifetimeManager<Kokkos::Cuda> cudaSpaces;
296#endif
297#if defined(KOKKOS_ENABLE_SERIAL)
298extern InstanceLifetimeManager<Kokkos::Serial> serialSpaces;
299#endif
300#if defined(KOKKOS_ENABLE_OPENMP)
301extern InstanceLifetimeManager<Kokkos::OpenMP> openMPSpaces;
302#endif
303#if defined(KOKKOS_ENABLE_HIP)
304extern InstanceLifetimeManager<Kokkos::HIP> HIPSpaces;
305#endif
306#if defined(KOKKOS_ENABLE_SYCL)
307extern InstanceLifetimeManager<Kokkos::Experimental::SYCL> SYCLSpaces;
308#endif
309
310#if defined(KOKKOS_ENABLE_CUDA)
311
315template <typename ExecSpace, Priority priority = Priority::medium,
316 IsCuda<ExecSpace> = true>
317Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
318 return cudaSpaces.space_instance<priority>(i);
319}
320#endif
321
322#if defined(KOKKOS_ENABLE_SERIAL)
326template <typename ExecSpace, Priority priority = Priority::medium,
327 IsSerial<ExecSpace> = true>
328Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
329 return serialSpaces.space_instance<priority>(i);
330}
331#endif
332
333#if defined(KOKKOS_ENABLE_OPENMP)
337template <typename ExecSpace, Priority priority = Priority::medium,
338 IsOpenMP<ExecSpace> = true>
339Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
340 return openMPSpaces.space_instance<priority>(i);
341}
342#endif
343
344#if defined(KOKKOS_ENABLE_HIP)
347template <typename ExecSpace, Priority priority = Priority::medium,
348 IsHIP<ExecSpace> = true>
349Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
350 return HIPSpaces.space_instance<priority>(i);
351}
352#endif
353#if defined(KOKKOS_ENABLE_SYCL)
357template <typename ExecSpace, Priority priority = Priority::medium,
358 IsSYCL<ExecSpace> = true>
359Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
360 return SYCLSpaces.space_instance<priority>(i);
361}
362#endif
363
369template <typename ExecSpace>
370Teuchos::RCP<const ExecSpace> space_instance(const Priority &priority,
371 int i = 0) {
372 switch (priority) {
373 case Priority::high:
375 case Priority::medium:
377 case Priority::low:
379 default:
380 throw std::runtime_error(
381 "unexpected dynamic Tpetra Space priority in space_instance");
382 }
383}
384
398template <typename S1, typename S2
399#if defined(KOKKOS_ENABLE_CUDA)
400 ,
401 NotBothCuda<S1, S2> = true
402#endif
403 >
404void exec_space_wait(const char *msg, const S1 &waitee, const S2 & /*waiter*/) {
406 "Tpetra::Details::Spaces::exec_space_wait");
407 lazy_init();
408 waitee.fence(msg);
409}
410
411#if defined(KOKKOS_ENABLE_CUDA)
412template <typename S1, typename S2, BothCuda<S1, S2> = true>
413void exec_space_wait(const char *msg, const S1 &waitee, const S2 &waiter) {
415 "Tpetra::Details::Spaces::exec_space_wait");
416 lazy_init();
417
418 // if they are the same instance, no sync needed
419 if (waitee.impl_instance_id() !=
420 waiter
421 .impl_instance_id()) { // TODO: use instance operator== once available
422 /* cudaStreamWaitEvent is not affected by later calls to cudaEventRecord,
423 even if it overwrites the state of a shared event this means we only need
424 one event even if many exec_space_waits are in flight at the same time
425 */
426 TPETRA_DETAILS_SPACES_CUDA_RUNTIME(
427 cudaEventRecord(cudaInfo.execSpaceWaitEvent_, waitee.cuda_stream()));
428 TPETRA_DETAILS_SPACES_CUDA_RUNTIME(cudaStreamWaitEvent(
429 waiter.cuda_stream(), cudaInfo.execSpaceWaitEvent_, 0 /*flags*/));
430 }
431}
432#endif
433
434template <typename S1, typename S2>
435void exec_space_wait(const S1 &waitee, const S2 &waiter) {
437 "Tpetra::Details::Spaces::exec_space_wait");
438 lazy_init();
439 exec_space_wait("anonymous", waitee, waiter);
440}
441
442template <typename ExecutionSpace>
443constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space() {
444 return false;
445}
446
447#if defined(KOKKOS_ENABLE_CUDA)
448template <>
449constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space<Kokkos::Cuda>() {
450 return true;
451}
452#endif
453
454#if defined(KOKKOS_ENABLE_HIP)
455template <>
456constexpr KOKKOS_INLINE_FUNCTION bool
457is_gpu_exec_space<Kokkos::Experimental::HIP>() {
458 return true;
459}
460#endif
461
462#if defined(KOKKOS_ENABLE_SYCL)
463template <>
464constexpr KOKKOS_INLINE_FUNCTION bool
465is_gpu_exec_space<Kokkos::Experimental::SYCL>() {
466 return true;
467}
468#endif
469
470} // namespace Spaces
471} // namespace Details
472} // namespace Tpetra
473
474#undef TPETRA_DETAILS_SPACES_THROW
475
476#endif // TPETRA_DETAILS_EXECUTIONSPACES_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
void exec_space_wait(const char *msg, const S1 &waitee, const S2 &)
cause future work submitted to waiter to wait for the current work in waitee to finish
ExecSpace make_instance()
Construct a Kokkos execution space instance with the following priority.
Teuchos::RCP< const ExecSpace > space_instance(const Priority &priority, int i=0)
get a strong Teuchos::RCP to Tpetra-managed Kokkos execution space instance i
Priority
Priority interface for Tpetra's managed execution spaces.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
Provides reusable Kokkos execution space instances.
~InstanceLifetimeManager()
Issue a warning if any Tpetra-managed execution space instances survive to the end of static lifetime...
rcp_type space_instance(int i=0)
Retrieve a strong Teuchos::RCP<const ExecSpace> to instance i.
Nonmember function that computes a residual Computes R = B - A * X.
Namespace Tpetra contains the class and methods constituting the Tpetra library.