Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40// clang-format off
41#ifndef TPETRA_DISTOBJECT_DEF_HPP
42#define TPETRA_DISTOBJECT_DEF_HPP
43
51
52#include "Tpetra_Distributor.hpp"
55#include "Tpetra_Details_checkGlobalError.hpp"
57#include "Tpetra_Util.hpp" // Details::createPrefix
58#include "Teuchos_CommHelpers.hpp"
59#include "Teuchos_TypeNameTraits.hpp"
60#include <typeinfo>
61#include <memory>
62#include <sstream>
63
64namespace Tpetra {
65
66 namespace { // (anonymous)
67 template<class DeviceType, class IndexType = size_t>
68 struct SumFunctor {
69 SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
70 viewToSum_ (viewToSum) {}
71 KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
72 lclSum += viewToSum_(i);
73 }
74 Kokkos::View<const size_t*, DeviceType> viewToSum_;
75 };
76
77 template<class DeviceType, class IndexType = size_t>
78 size_t
79 countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
80 {
81 using Kokkos::parallel_reduce;
82 typedef DeviceType DT;
83 typedef typename DT::execution_space DES;
84 typedef Kokkos::RangePolicy<DES, IndexType> range_type;
85
86 const IndexType numOut = numImportPacketsPerLID.extent (0);
87 size_t totalImportPackets = 0;
88 parallel_reduce ("Count import packets",
89 range_type (0, numOut),
90 SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
91 totalImportPackets);
92 return totalImportPackets;
93 }
94 } // namespace (anonymous)
95
96
97 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
99 DistObject (const Teuchos::RCP<const map_type>& map) :
100 map_ (map)
101 {
102#ifdef HAVE_TPETRA_TRANSFER_TIMERS
103 using Teuchos::RCP;
104 using Teuchos::Time;
105 using Teuchos::TimeMonitor;
106
107 RCP<Time> doXferTimer =
108 TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
109 if (doXferTimer.is_null ()) {
110 doXferTimer =
111 TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
112 }
113 doXferTimer_ = doXferTimer;
114
115 RCP<Time> copyAndPermuteTimer =
116 TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
117 if (copyAndPermuteTimer.is_null ()) {
118 copyAndPermuteTimer =
119 TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
120 }
121 copyAndPermuteTimer_ = copyAndPermuteTimer;
122
123 RCP<Time> packAndPrepareTimer =
124 TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
125 if (packAndPrepareTimer.is_null ()) {
126 packAndPrepareTimer =
127 TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
128 }
129 packAndPrepareTimer_ = packAndPrepareTimer;
130
131 RCP<Time> doPostsAndWaitsTimer =
132 TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
133 if (doPostsAndWaitsTimer.is_null ()) {
134 doPostsAndWaitsTimer =
135 TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
136 }
137 doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
138
139 RCP<Time> unpackAndCombineTimer =
140 TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
141 if (unpackAndCombineTimer.is_null ()) {
142 unpackAndCombineTimer =
143 TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
144 }
145 unpackAndCombineTimer_ = unpackAndCombineTimer;
146#endif // HAVE_TPETRA_TRANSFER_TIMERS
147 }
148
149 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
150 std::string
152 description () const
153 {
154 using Teuchos::TypeNameTraits;
155
156 std::ostringstream os;
157 os << "\"Tpetra::DistObject\": {"
158 << "Packet: " << TypeNameTraits<packet_type>::name ()
159 << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
160 << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
161 << ", Node: " << TypeNameTraits<Node>::name ();
162 if (this->getObjectLabel () != "") {
163 os << "Label: \"" << this->getObjectLabel () << "\"";
164 }
165 os << "}";
166 return os.str ();
167 }
168
169 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
170 void
172 describe (Teuchos::FancyOStream &out,
173 const Teuchos::EVerbosityLevel verbLevel) const
174 {
175 using Teuchos::rcpFromRef;
176 using Teuchos::TypeNameTraits;
177 using std::endl;
178 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
179 Teuchos::VERB_LOW : verbLevel;
180 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
181 const int myRank = comm.is_null () ? 0 : comm->getRank ();
182 const int numProcs = comm.is_null () ? 1 : comm->getSize ();
183
184 if (vl != Teuchos::VERB_NONE) {
185 Teuchos::OSTab tab0 (out);
186 if (myRank == 0) {
187 out << "\"Tpetra::DistObject\":" << endl;
188 }
189 Teuchos::OSTab tab1 (out);
190 if (myRank == 0) {
191 out << "Template parameters:" << endl;
192 {
193 Teuchos::OSTab tab2 (out);
194 out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
195 << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
196 << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
197 << "Node: " << TypeNameTraits<node_type>::name () << endl;
198 }
199 if (this->getObjectLabel () != "") {
200 out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
201 }
202 } // if myRank == 0
203
204 // Describe the Map.
205 {
206 if (myRank == 0) {
207 out << "Map:" << endl;
208 }
209 Teuchos::OSTab tab2 (out);
210 map_->describe (out, vl);
211 }
212
213 // At verbosity > VERB_LOW, each process prints something.
214 if (vl > Teuchos::VERB_LOW) {
215 for (int p = 0; p < numProcs; ++p) {
216 if (myRank == p) {
217 out << "Process " << myRank << ":" << endl;
218 Teuchos::OSTab tab2 (out);
219 out << "Export buffer size (in packets): "
220 << exports_.extent (0)
221 << endl
222 << "Import buffer size (in packets): "
223 << imports_.extent (0)
224 << endl;
225 }
226 if (! comm.is_null ()) {
227 comm->barrier (); // give output time to finish
228 comm->barrier ();
229 comm->barrier ();
230 }
231 } // for each process rank p
232 } // if vl > VERB_LOW
233 } // if vl != VERB_NONE
234 }
235
236 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
237 void
239 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
240 {
241 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
242 "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
243 }
244
245 /* These are provided in base DistObject template
246 template<class DistObjectType>
247 void
248 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
249 const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
250 typename DistObjectType::global_ordinal_type,
251 typename DistObjectType::node_type> >& newMap)
252 {
253 input->removeEmptyProcessesInPlace (newMap);
254 if (newMap.is_null ()) { // my process is excluded
255 input = Teuchos::null;
256 }
257 }
258
259 template<class DistObjectType>
260 void
261 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
262 {
263 using Teuchos::RCP;
264 typedef typename DistObjectType::local_ordinal_type LO;
265 typedef typename DistObjectType::global_ordinal_type GO;
266 typedef typename DistObjectType::node_type NT;
267 typedef Map<LO, GO, NT> map_type;
268
269 RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
270 removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
271 }
272 */
273
274 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
275 void
277 doImport (const SrcDistObject& source,
279 const CombineMode CM,
280 const bool restrictedMode)
281 {
282 using Details::Behavior;
283 using std::endl;
284 const char modeString[] = "doImport (forward mode)";
285
286 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
287 // output to std::cerr on every MPI process. This is unwise for
288 // runs with large numbers of MPI processes.
289 const bool verbose = Behavior::verbose("DistObject");
290 std::unique_ptr<std::string> prefix;
291 if (verbose) {
292 prefix = this->createPrefix("DistObject", modeString);
293 std::ostringstream os;
294 os << *prefix << "Start" << endl;
295 std::cerr << os.str ();
296 }
297 this->beginImport(source, importer, CM, restrictedMode);
298 this->endImport(source, importer, CM, restrictedMode);
299 if (verbose) {
300 std::ostringstream os;
301 os << *prefix << "Done" << endl;
302 std::cerr << os.str ();
303 }
304 }
305
306 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
307 void
309 doExport (const SrcDistObject& source,
311 const CombineMode CM,
312 const bool restrictedMode)
313 {
314 using Details::Behavior;
315 using std::endl;
316 const char modeString[] = "doExport (forward mode)";
317
318 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
319 // output to std::cerr on every MPI process. This is unwise for
320 // runs with large numbers of MPI processes.
321 const bool verbose = Behavior::verbose("DistObject");
322 std::unique_ptr<std::string> prefix;
323 if (verbose) {
324 prefix = this->createPrefix("DistObject", modeString);
325 std::ostringstream os;
326 os << *prefix << "Start" << endl;
327 std::cerr << os.str ();
328 }
329 this->beginExport(source, exporter, CM, restrictedMode);
330 this->endExport(source, exporter, CM, restrictedMode);
331 if (verbose) {
332 std::ostringstream os;
333 os << *prefix << "Done" << endl;
334 std::cerr << os.str ();
335 }
336 }
337
338 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
339 void
341 doImport (const SrcDistObject& source,
343 const CombineMode CM,
344 const bool restrictedMode)
345 {
346 using Details::Behavior;
347 using std::endl;
348 const char modeString[] = "doImport (reverse mode)";
349
350 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
351 // output to std::cerr on every MPI process. This is unwise for
352 // runs with large numbers of MPI processes.
353 const bool verbose = Behavior::verbose("DistObject");
354 std::unique_ptr<std::string> prefix;
355 if (verbose) {
356 prefix = this->createPrefix("DistObject", modeString);
357 std::ostringstream os;
358 os << *prefix << "Start" << endl;
359 std::cerr << os.str ();
360 }
361 this->beginImport(source, exporter, CM, restrictedMode);
362 this->endImport(source, exporter, CM, restrictedMode);
363 if (verbose) {
364 std::ostringstream os;
365 os << *prefix << "Done" << endl;
366 std::cerr << os.str ();
367 }
368 }
369
370 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
371 void
373 doExport (const SrcDistObject& source,
375 const CombineMode CM,
376 const bool restrictedMode)
377 {
378 using Details::Behavior;
379 using std::endl;
380 const char modeString[] = "doExport (reverse mode)";
381
382 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
383 // output to std::cerr on every MPI process. This is unwise for
384 // runs with large numbers of MPI processes.
385 const bool verbose = Behavior::verbose("DistObject");
386 std::unique_ptr<std::string> prefix;
387 if (verbose) {
388 prefix = this->createPrefix("DistObject", modeString);
389 std::ostringstream os;
390 os << *prefix << "Start" << endl;
391 std::cerr << os.str ();
392 }
393 this->beginExport(source, importer, CM, restrictedMode);
394 this->endExport(source, importer, CM, restrictedMode);
395 if (verbose) {
396 std::ostringstream os;
397 os << *prefix << "Done" << endl;
398 std::cerr << os.str ();
399 }
400 }
401
402 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
403 void
405 beginImport(const SrcDistObject& source,
407 const CombineMode CM,
408 const bool restrictedMode)
409 {
410 using Details::Behavior;
411 using std::endl;
412 const char modeString[] = "beginImport (forward mode)";
413
414 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
415 // output to std::cerr on every MPI process. This is unwise for
416 // runs with large numbers of MPI processes.
417 const bool verbose = Behavior::verbose("DistObject");
418 std::unique_ptr<std::string> prefix;
419 if (verbose) {
420 prefix = this->createPrefix("DistObject", modeString);
421 std::ostringstream os;
422 os << *prefix << "Start" << endl;
423 std::cerr << os.str ();
424 }
425 this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
426 if (verbose) {
427 std::ostringstream os;
428 os << *prefix << "Done" << endl;
429 std::cerr << os.str ();
430 }
431 }
432
433 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
434 void
436 beginExport(const SrcDistObject& source,
438 const CombineMode CM,
439 const bool restrictedMode)
440 {
441 using Details::Behavior;
442 using std::endl;
443 const char modeString[] = "beginExport (forward mode)";
444
445 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
446 // output to std::cerr on every MPI process. This is unwise for
447 // runs with large numbers of MPI processes.
448 const bool verbose = Behavior::verbose("DistObject");
449 std::unique_ptr<std::string> prefix;
450 if (verbose) {
451 prefix = this->createPrefix("DistObject", modeString);
452 std::ostringstream os;
453 os << *prefix << "Start" << endl;
454 std::cerr << os.str ();
455 }
456 this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
457 if (verbose) {
458 std::ostringstream os;
459 os << *prefix << "Done" << endl;
460 std::cerr << os.str ();
461 }
462 }
463
464 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
465 void
467 beginImport(const SrcDistObject& source,
469 const CombineMode CM,
470 const bool restrictedMode)
471 {
472 using Details::Behavior;
473 using std::endl;
474 const char modeString[] = "beginImport (reverse mode)";
475
476 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
477 // output to std::cerr on every MPI process. This is unwise for
478 // runs with large numbers of MPI processes.
479 const bool verbose = Behavior::verbose("DistObject");
480 std::unique_ptr<std::string> prefix;
481 if (verbose) {
482 prefix = this->createPrefix("DistObject", modeString);
483 std::ostringstream os;
484 os << *prefix << "Start" << endl;
485 std::cerr << os.str ();
486 }
487 this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
488 if (verbose) {
489 std::ostringstream os;
490 os << *prefix << "Done" << endl;
491 std::cerr << os.str ();
492 }
493 }
494
495 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
496 void
498 beginExport(const SrcDistObject& source,
500 const CombineMode CM,
501 const bool restrictedMode)
502 {
503 using Details::Behavior;
504 using std::endl;
505 const char modeString[] = "beginExport (reverse mode)";
506
507 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
508 // output to std::cerr on every MPI process. This is unwise for
509 // runs with large numbers of MPI processes.
510 const bool verbose = Behavior::verbose("DistObject");
511 std::unique_ptr<std::string> prefix;
512 if (verbose) {
513 prefix = this->createPrefix("DistObject", modeString);
514 std::ostringstream os;
515 os << *prefix << "Start" << endl;
516 std::cerr << os.str ();
517 }
518 this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
519 if (verbose) {
520 std::ostringstream os;
521 os << *prefix << "Done" << endl;
522 std::cerr << os.str ();
523 }
524 }
525
526 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
527 void
529 endImport(const SrcDistObject& source,
531 const CombineMode CM,
532 const bool restrictedMode)
533 {
534 using Details::Behavior;
535 using std::endl;
536 const char modeString[] = "endImport (forward mode)";
537
538 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
539 // output to std::cerr on every MPI process. This is unwise for
540 // runs with large numbers of MPI processes.
541 const bool verbose = Behavior::verbose("DistObject");
542 std::unique_ptr<std::string> prefix;
543 if (verbose) {
544 prefix = this->createPrefix("DistObject", modeString);
545 std::ostringstream os;
546 os << *prefix << "Start" << endl;
547 std::cerr << os.str ();
548 }
549 this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
550 if (verbose) {
551 std::ostringstream os;
552 os << *prefix << "Done" << endl;
553 std::cerr << os.str ();
554 }
555 }
556
557 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
558 void
560 endExport(const SrcDistObject& source,
562 const CombineMode CM,
563 const bool restrictedMode)
564 {
565 using Details::Behavior;
566 using std::endl;
567 const char modeString[] = "endExport (forward mode)";
568
569 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
570 // output to std::cerr on every MPI process. This is unwise for
571 // runs with large numbers of MPI processes.
572 const bool verbose = Behavior::verbose("DistObject");
573 std::unique_ptr<std::string> prefix;
574 if (verbose) {
575 prefix = this->createPrefix("DistObject", modeString);
576 std::ostringstream os;
577 os << *prefix << "Start" << endl;
578 std::cerr << os.str ();
579 }
580 this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
581 if (verbose) {
582 std::ostringstream os;
583 os << *prefix << "Done" << endl;
584 std::cerr << os.str ();
585 }
586 }
587
588 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
589 void
590 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
591 endImport(const SrcDistObject& source,
592 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
593 const CombineMode CM,
594 const bool restrictedMode)
595 {
596 using Details::Behavior;
597 using std::endl;
598 const char modeString[] = "endImport (reverse mode)";
600 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
601 // output to std::cerr on every MPI process. This is unwise for
602 // runs with large numbers of MPI processes.
603 const bool verbose = Behavior::verbose("DistObject");
604 std::unique_ptr<std::string> prefix;
605 if (verbose) {
606 prefix = this->createPrefix("DistObject", modeString);
607 std::ostringstream os;
608 os << *prefix << "Start" << endl;
609 std::cerr << os.str ();
610 }
611 this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
612 if (verbose) {
613 std::ostringstream os;
614 os << *prefix << "Done" << endl;
615 std::cerr << os.str ();
617 }
618
619 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
620 void
621 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
622 endExport(const SrcDistObject& source,
623 const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
624 const CombineMode CM,
625 const bool restrictedMode)
626 {
627 using Details::Behavior;
628 using std::endl;
629 const char modeString[] = "endExport (reverse mode)";
630
631 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
632 // output to std::cerr on every MPI process. This is unwise for
633 // runs with large numbers of MPI processes.
634 const bool verbose = Behavior::verbose("DistObject");
635 std::unique_ptr<std::string> prefix;
636 if (verbose) {
637 prefix = this->createPrefix("DistObject", modeString);
638 std::ostringstream os;
639 os << *prefix << "Start" << endl;
640 std::cerr << os.str ();
641 }
642 this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
643 if (verbose) {
644 std::ostringstream os;
645 os << *prefix << "Done" << endl;
646 std::cerr << os.str ();
647 }
648 }
649
650 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
651 bool
653 transferArrived() const {
654 return distributorActor_.isReady();
655 }
656
657 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
658 bool
660 isDistributed () const {
661 return map_->isDistributed ();
662 }
663
664 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
665 size_t
668 return 0; // default implementation; subclasses may override
669 }
670
671 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
672 void
674 doTransfer (const SrcDistObject& src,
675 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
676 const char modeString[],
677 const ReverseOption revOp,
678 const CombineMode CM,
679 bool restrictedMode)
680 {
681 beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
682 endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
683 }
684
685 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
686 bool
688 reallocImportsIfNeeded (const size_t newSize,
689 const bool verbose,
690 const std::string* prefix,
691 const bool /*remoteLIDsContiguous*/,
692 const CombineMode /*CM*/)
693 {
694 if (verbose) {
695 std::ostringstream os;
696 os << *prefix << "Realloc (if needed) imports_ from "
697 << imports_.extent (0) << " to " << newSize << std::endl;
698 std::cerr << os.str ();
699 }
701 const bool reallocated =
702 reallocDualViewIfNeeded (this->imports_, newSize, "imports");
703 if (verbose) {
704 std::ostringstream os;
705 os << *prefix << "Finished realloc'ing imports_" << std::endl;
706 std::cerr << os.str ();
707 }
708 return reallocated;
709 }
710
711 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
712 bool
714 reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
715 const size_t numImportLIDs)
716 {
717 using Details::Behavior;
720 using std::endl;
721 // If an array is already allocated, and if is at least
722 // tooBigFactor times bigger than it needs to be, free it and
723 // reallocate to the size we need, in order to save space.
724 // Otherwise, take subviews to reduce allocation size.
725 constexpr size_t tooBigFactor = 10;
726
727 const bool verbose = Behavior::verbose("DistObject");
728 std::unique_ptr<std::string> prefix;
729 if (verbose) {
730 prefix = this->createPrefix("DistObject",
731 "reallocArraysForNumPacketsPerLid");
732 std::ostringstream os;
733 os << *prefix
734 << "numExportLIDs: " << numExportLIDs
735 << ", numImportLIDs: " << numImportLIDs
736 << endl;
737 os << *prefix << "DualView status before:" << endl
738 << *prefix
739 << dualViewStatusToString (this->numExportPacketsPerLID_,
740 "numExportPacketsPerLID_")
741 << endl
742 << *prefix
743 << dualViewStatusToString (this->numImportPacketsPerLID_,
744 "numImportPacketsPerLID_")
745 << endl;
746 std::cerr << os.str ();
747 }
748
749 // Reallocate numExportPacketsPerLID_ if needed.
750 const bool firstReallocated =
751 reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
752 numExportLIDs,
753 "numExportPacketsPerLID",
754 tooBigFactor,
755 true); // need fence before, if realloc'ing
756
757 // If we reallocated above, then we fenced after that
758 // reallocation. This means that we don't need to fence again,
759 // before the next reallocation.
760 const bool needFenceBeforeNextAlloc = ! firstReallocated;
761 const bool secondReallocated =
762 reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
763 numImportLIDs,
764 "numImportPacketsPerLID",
765 tooBigFactor,
766 needFenceBeforeNextAlloc);
767
768 if (verbose) {
769 std::ostringstream os;
770 os << *prefix << "DualView status after:" << endl
771 << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
772 "numExportPacketsPerLID_")
773 << endl
774 << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
775 "numImportPacketsPerLID_")
776 << endl;
777 std::cerr << os.str ();
779
780 return firstReallocated || secondReallocated;
781 }
782
783 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
784 void
787 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
788 const char modeString[],
789 const ReverseOption revOp,
790 const CombineMode CM,
791 bool restrictedMode)
792 {
793 using Details::Behavior;
797 using Kokkos::Compat::getArrayView;
798 using Kokkos::Compat::getConstArrayView;
799 using Kokkos::Compat::getKokkosViewDeepCopy;
800 using Kokkos::Compat::create_const_view;
801 using std::endl;
804 const char funcName[] = "Tpetra::DistObject::doTransfer";
805
806 ProfilingRegion region_doTransfer(funcName);
807 const bool verbose = Behavior::verbose("DistObject");
808 std::shared_ptr<std::string> prefix;
809 if (verbose) {
810 std::ostringstream os;
811 prefix = this->createPrefix("DistObject", "doTransfer");
812 os << *prefix << "Source type: " << Teuchos::typeName(src)
813 << ", Target type: " << Teuchos::typeName(*this) << endl;
814 std::cerr << os.str();
815 }
816
817 // "Restricted Mode" does two things:
818 // 1) Skips copyAndPermute
819 // 2) Allows the "target" Map of the transfer to be a subset of
820 // the Map of *this, in a "locallyFitted" sense.
821 //
822 // This cannot be used if #2 is not true, OR there are permutes.
823 // Source Maps still need to match
824
825 // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
826 // checks. These may communicate more.
827 const bool debug = Behavior::debug("DistObject");
828 if (debug) {
829 if (! restrictedMode && revOp == DoForward) {
830 const bool myMapSameAsTransferTgtMap =
831 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
832 TEUCHOS_TEST_FOR_EXCEPTION
833 (! myMapSameAsTransferTgtMap, std::invalid_argument,
834 "Tpetra::DistObject::" << modeString << ": For forward-mode "
835 "communication, the target DistObject's Map must be the same "
836 "(in the sense of Tpetra::Map::isSameAs) as the input "
837 "Export/Import object's target Map.");
838 }
839 else if (! restrictedMode && revOp == DoReverse) {
840 const bool myMapSameAsTransferSrcMap =
841 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
842 TEUCHOS_TEST_FOR_EXCEPTION
843 (! myMapSameAsTransferSrcMap, std::invalid_argument,
844 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
845 "communication, the target DistObject's Map must be the same "
846 "(in the sense of Tpetra::Map::isSameAs) as the input "
847 "Export/Import object's source Map.");
848 }
849 else if (restrictedMode && revOp == DoForward) {
850 const bool myMapLocallyFittedTransferTgtMap =
851 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
852 TEUCHOS_TEST_FOR_EXCEPTION
853 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
854 "Tpetra::DistObject::" << modeString << ": For forward-mode "
855 "communication using restricted mode, Export/Import object's "
856 "target Map must be locally fitted (in the sense of "
857 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
858 }
859 else { // if (restrictedMode && revOp == DoReverse)
860 const bool myMapLocallyFittedTransferSrcMap =
861 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
862 TEUCHOS_TEST_FOR_EXCEPTION
863 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
864 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
865 "communication using restricted mode, Export/Import object's "
866 "source Map must be locally fitted (in the sense of "
867 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
868 }
869
870 // SrcDistObject need not even _have_ Maps. However, if the
871 // source object is a DistObject, it has a Map, and we may
872 // compare that Map with the Transfer's Maps.
873 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
874 if (srcDistObj != nullptr) {
875 if (revOp == DoForward) {
876 const bool srcMapSameAsImportSrcMap =
877 srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
878 TEUCHOS_TEST_FOR_EXCEPTION
879 (! srcMapSameAsImportSrcMap, std::invalid_argument,
880 "Tpetra::DistObject::" << modeString << ": For forward-mode "
881 "communication, the source DistObject's Map must be the same "
882 "as the input Export/Import object's source Map.");
883 }
884 else { // revOp == DoReverse
885 const bool srcMapSameAsImportTgtMap =
886 srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
887 TEUCHOS_TEST_FOR_EXCEPTION
888 (! srcMapSameAsImportTgtMap, std::invalid_argument,
889 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
890 "communication, the source DistObject's Map must be the same "
891 "as the input Export/Import object's target Map.");
892 }
893 }
894 }
895
896 const size_t numSameIDs = transfer.getNumSameIDs ();
897 Distributor& distor = transfer.getDistributor ();
898 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
899
900 TEUCHOS_TEST_FOR_EXCEPTION
901 (debug && restrictedMode &&
902 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
903 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
904 std::invalid_argument,
905 "Tpetra::DistObject::" << modeString << ": Transfer object "
906 "cannot have permutes in restricted mode.");
907
908 // Do we need all communication buffers to live on host?
909 const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
910 if (verbose) {
911 std::ostringstream os;
912 os << *prefix << "doTransfer: Use new interface; "
913 "commOnHost=" << (commOnHost ? "true" : "false") << endl;
914 std::cerr << os.str ();
916
917 using const_lo_dv_type =
918 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
919 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
920 transfer.getPermuteToLIDs_dv () :
921 transfer.getPermuteFromLIDs_dv ();
922 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
923 transfer.getPermuteFromLIDs_dv () :
924 transfer.getPermuteToLIDs_dv ();
925 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
926 transfer.getRemoteLIDs_dv () :
927 transfer.getExportLIDs_dv ();
928 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
929 transfer.getExportLIDs_dv () :
930 transfer.getRemoteLIDs_dv ();
931 const bool canTryAliasing = (revOp == DoForward) ?
932 transfer.areRemoteLIDsContiguous() :
933 transfer.areExportLIDsContiguous();
934 // const bool canTryAliasing = false;
935
936 ProfilingRegion region_dTN(funcName);
937#ifdef HAVE_TPETRA_TRANSFER_TIMERS
938 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
939 // of Kokkos profiling.
940 Teuchos::TimeMonitor doXferMon (*doXferTimer_);
941#endif // HAVE_TPETRA_TRANSFER_TIMERS
942
943 if (verbose) {
944 std::ostringstream os;
945 os << *prefix << "Input arguments:" << endl
946 << *prefix << " combineMode: " << combineModeToString (CM) << endl
947 << *prefix << " numSameIDs: " << numSameIDs << endl
948 << *prefix << " "
949 << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
950 << *prefix << " "
951 << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
952 << *prefix << " "
953 << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
954 << *prefix << " "
955 << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
956 << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
957 << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
958 std::cerr << os.str ();
959 }
960
961 {
962 ProfilingRegion region_cs ("Tpetra::DistObject::doTransferNew::checkSizes");
963 if (verbose) {
964 std::ostringstream os;
965 os << *prefix << "1. checkSizes" << endl;
966 std::cerr << os.str ();
967 }
968 const bool checkSizesResult = this->checkSizes (src);
969 TEUCHOS_TEST_FOR_EXCEPTION
970 (! checkSizesResult, std::invalid_argument,
971 "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
972 "destination object is not a legal target for redistribution from the "
973 "source object. This probably means that they do not have the same "
974 "dimensions. For example, MultiVectors must have the same number of "
975 "rows and columns.");
976 }
978 // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
979 // that if CM == INSERT || CM == REPLACE, the target object could
980 // be write only. We don't optimize for that here.
981
982 if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
983 // There is at least one GID to copy or permute.
984 if (verbose) {
985 std::ostringstream os;
986 os << *prefix << "2. copyAndPermute" << endl;
987 std::cerr << os.str ();
988 }
989 ProfilingRegion region_cp
990 ("Tpetra::DistObject::doTransferNew::copyAndPermute");
991#ifdef HAVE_TPETRA_TRANSFER_TIMERS
992 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
993 // of Kokkos profiling.
994 Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
995#endif // HAVE_TPETRA_TRANSFER_TIMERS
996
997 if (numSameIDs + permuteToLIDs.extent (0) != 0) {
998 // There is at least one GID to copy or permute.
999 if (verbose) {
1000 std::ostringstream os;
1001 os << *prefix << "2. copyAndPermute" << endl;
1002 std::cerr << os.str ();
1003 }
1004 this->copyAndPermute (src, numSameIDs, permuteToLIDs,
1005 permuteFromLIDs, CM);
1006 if (verbose) {
1007 std::ostringstream os;
1008 os << *prefix << "After copyAndPermute:" << endl
1009 << *prefix << " "
1010 << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
1011 << endl
1012 << *prefix << " "
1013 << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
1014 << endl;
1015 std::cerr << os.str ();
1016 }
1017 }
1018 }
1019
1020 // The method may return zero even if the implementation actually
1021 // does have a constant number of packets per LID. However, if it
1022 // returns nonzero, we may use this information to avoid
1023 // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
1024 // will set this to its final value.
1025 //
1026 // We only need this if CM != ZERO, but it has to be lifted out of
1027 // that scope because there are multiple tests for CM != ZERO.
1028 size_t constantNumPackets = this->constantNumberOfPackets ();
1029 if (verbose) {
1030 std::ostringstream os;
1031 os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
1032 std::cerr << os.str ();
1033 }
1034
1035 // We only need to pack communication buffers if the combine mode
1036 // is not ZERO. A "ZERO combine mode" means that the results are
1037 // the same as if we had received all zeros, and added them to the
1038 // existing values. That means we don't need to communicate.
1039 if (CM != ZERO) {
1040 if (constantNumPackets == 0) {
1041 if (verbose) {
1042 std::ostringstream os;
1043 os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1044 << endl;
1045 std::cerr << os.str ();
1046 }
1047 // This only reallocates if necessary, that is, if the sizes
1048 // don't match.
1049 this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1050 remoteLIDs.extent (0));
1051 }
1052
1053 if (verbose) {
1054 std::ostringstream os;
1055 os << *prefix << "4. packAndPrepare: before, "
1056 << dualViewStatusToString (this->exports_, "exports_")
1057 << endl;
1058 std::cerr << os.str ();
1059 }
1060
1061 doPackAndPrepare(src, exportLIDs, constantNumPackets, execution_space());
1062 if (commOnHost) {
1063 this->exports_.sync_host();
1064 }
1065 else {
1066 this->exports_.sync_device();
1067 }
1068
1069 if (verbose) {
1070 std::ostringstream os;
1071 os << *prefix << "5.1. After packAndPrepare, "
1072 << dualViewStatusToString (this->exports_, "exports_")
1073 << endl;
1074 std::cerr << os.str ();
1075 }
1076 } // if (CM != ZERO)
1077
1078 // We only need to send data if the combine mode is not ZERO.
1079 if (CM != ZERO) {
1080 if (constantNumPackets != 0) {
1081 // There are a constant number of packets per element. We
1082 // already know (from the number of "remote" (incoming)
1083 // elements) how many incoming elements we expect, so we can
1084 // resize the buffer accordingly.
1085 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1086 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1087 }
1088
1089 // Do we need to do communication (via doPostsAndWaits)?
1090 bool needCommunication = true;
1091
1092 // This may be NULL. It will be used below.
1093 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1094
1095 if (revOp == DoReverse && ! this->isDistributed ()) {
1096 needCommunication = false;
1097 }
1098 // FIXME (mfh 30 Jun 2013): Checking whether the source object
1099 // is distributed requires a cast to DistObject. If it's not a
1100 // DistObject, then I'm not quite sure what to do. Perhaps it
1101 // would be more appropriate for SrcDistObject to have an
1102 // isDistributed() method. For now, I'll just assume that we
1103 // need to do communication unless the cast succeeds and the
1104 // source is not distributed.
1105 else if (revOp == DoForward && srcDistObj != NULL &&
1106 ! srcDistObj->isDistributed ()) {
1107 needCommunication = false;
1108 }
1109
1110 if (! needCommunication) {
1111 if (verbose) {
1112 std::ostringstream os;
1113 os << *prefix << "Comm not needed; skipping" << endl;
1114 std::cerr << os.str ();
1115 }
1116 }
1117 else {
1118 ProfilingRegion region_dpw
1119 ("Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1120#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1121 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1122 // favor of Kokkos profiling.
1123 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1124#endif // HAVE_TPETRA_TRANSFER_TIMERS
1125
1126 if (verbose) {
1127 std::ostringstream os;
1128 os << *prefix << "7.0. "
1129 << (revOp == DoReverse ? "Reverse" : "Forward")
1130 << " mode" << endl;
1131 std::cerr << os.str ();
1132 }
1133
1134 doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1135 } // if (needCommunication)
1136 } // if (CM != ZERO)
1137 }
1138
1139 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1140 void
1142 endTransfer(const SrcDistObject& src,
1143 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1144 const char modeString[],
1145 const ReverseOption revOp,
1146 const CombineMode CM,
1147 bool restrictedMode)
1148 {
1149 using Details::Behavior;
1153 using Kokkos::Compat::getArrayView;
1154 using Kokkos::Compat::getConstArrayView;
1155 using Kokkos::Compat::getKokkosViewDeepCopy;
1156 using Kokkos::Compat::create_const_view;
1157 using std::endl;
1160 const char funcName[] = "Tpetra::DistObject::doTransfer";
1161
1162 ProfilingRegion region_doTransfer(funcName);
1163 const bool verbose = Behavior::verbose("DistObject");
1164 std::shared_ptr<std::string> prefix;
1165 if (verbose) {
1166 std::ostringstream os;
1167 prefix = this->createPrefix("DistObject", "doTransfer");
1168 os << *prefix << "Source type: " << Teuchos::typeName(src)
1169 << ", Target type: " << Teuchos::typeName(*this) << endl;
1170 std::cerr << os.str();
1171 }
1172
1173 // "Restricted Mode" does two things:
1174 // 1) Skips copyAndPermute
1175 // 2) Allows the "target" Map of the transfer to be a subset of
1176 // the Map of *this, in a "locallyFitted" sense.
1177 //
1178 // This cannot be used if #2 is not true, OR there are permutes.
1179 // Source Maps still need to match
1180
1181 // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1182 // checks. These may communicate more.
1183 const bool debug = Behavior::debug("DistObject");
1184 if (debug) {
1185 if (! restrictedMode && revOp == DoForward) {
1186 const bool myMapSameAsTransferTgtMap =
1187 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1188 TEUCHOS_TEST_FOR_EXCEPTION
1189 (! myMapSameAsTransferTgtMap, std::invalid_argument,
1190 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1191 "communication, the target DistObject's Map must be the same "
1192 "(in the sense of Tpetra::Map::isSameAs) as the input "
1193 "Export/Import object's target Map.");
1194 }
1195 else if (! restrictedMode && revOp == DoReverse) {
1196 const bool myMapSameAsTransferSrcMap =
1197 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1198 TEUCHOS_TEST_FOR_EXCEPTION
1199 (! myMapSameAsTransferSrcMap, std::invalid_argument,
1200 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1201 "communication, the target DistObject's Map must be the same "
1202 "(in the sense of Tpetra::Map::isSameAs) as the input "
1203 "Export/Import object's source Map.");
1204 }
1205 else if (restrictedMode && revOp == DoForward) {
1206 const bool myMapLocallyFittedTransferTgtMap =
1207 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1208 TEUCHOS_TEST_FOR_EXCEPTION
1209 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1210 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1211 "communication using restricted mode, Export/Import object's "
1212 "target Map must be locally fitted (in the sense of "
1213 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1214 }
1215 else { // if (restrictedMode && revOp == DoReverse)
1216 const bool myMapLocallyFittedTransferSrcMap =
1217 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1218 TEUCHOS_TEST_FOR_EXCEPTION
1219 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1220 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1221 "communication using restricted mode, Export/Import object's "
1222 "source Map must be locally fitted (in the sense of "
1223 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1224 }
1225
1226 // SrcDistObject need not even _have_ Maps. However, if the
1227 // source object is a DistObject, it has a Map, and we may
1228 // compare that Map with the Transfer's Maps.
1229 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1230 if (srcDistObj != nullptr) {
1231 if (revOp == DoForward) {
1232 const bool srcMapSameAsImportSrcMap =
1233 srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1234 TEUCHOS_TEST_FOR_EXCEPTION
1235 (! srcMapSameAsImportSrcMap, std::invalid_argument,
1236 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1237 "communication, the source DistObject's Map must be the same "
1238 "as the input Export/Import object's source Map.");
1239 }
1240 else { // revOp == DoReverse
1241 const bool srcMapSameAsImportTgtMap =
1242 srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1243 TEUCHOS_TEST_FOR_EXCEPTION
1244 (! srcMapSameAsImportTgtMap, std::invalid_argument,
1245 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1246 "communication, the source DistObject's Map must be the same "
1247 "as the input Export/Import object's target Map.");
1248 }
1249 }
1250 }
1251
1252 Distributor& distor = transfer.getDistributor ();
1253 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1254
1255 TEUCHOS_TEST_FOR_EXCEPTION
1256 (debug && restrictedMode &&
1257 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1258 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1259 std::invalid_argument,
1260 "Tpetra::DistObject::" << modeString << ": Transfer object "
1261 "cannot have permutes in restricted mode.");
1262
1263 // Do we need all communication buffers to live on host?
1264 const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
1265 if (verbose) {
1266 std::ostringstream os;
1267 os << *prefix << "doTransfer: Use new interface; "
1268 "commOnHost=" << (commOnHost ? "true" : "false") << endl;
1269 std::cerr << os.str ();
1270 }
1271
1272 using const_lo_dv_type =
1273 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1274 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1275 transfer.getPermuteToLIDs_dv () :
1276 transfer.getPermuteFromLIDs_dv ();
1277 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1278 transfer.getPermuteFromLIDs_dv () :
1279 transfer.getPermuteToLIDs_dv ();
1280 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1281 transfer.getRemoteLIDs_dv () :
1282 transfer.getExportLIDs_dv ();
1283 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1284 transfer.getExportLIDs_dv () :
1285 transfer.getRemoteLIDs_dv ();
1286 const bool canTryAliasing = (revOp == DoForward) ?
1287 transfer.areRemoteLIDsContiguous() :
1288 transfer.areExportLIDsContiguous();
1289
1290 size_t constantNumPackets = this->constantNumberOfPackets ();
1291
1292 // We only need to send data if the combine mode is not ZERO.
1293 if (CM != ZERO) {
1294 if (constantNumPackets != 0) {
1295 // There are a constant number of packets per element. We
1296 // already know (from the number of "remote" (incoming)
1297 // elements) how many incoming elements we expect, so we can
1298 // resize the buffer accordingly.
1299 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1300 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1301 }
1302
1303 // Do we need to do communication (via doPostsAndWaits)?
1304 bool needCommunication = true;
1305
1306 // This may be NULL. It will be used below.
1307 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1308
1309 if (revOp == DoReverse && ! this->isDistributed ()) {
1310 needCommunication = false;
1311 }
1312 // FIXME (mfh 30 Jun 2013): Checking whether the source object
1313 // is distributed requires a cast to DistObject. If it's not a
1314 // DistObject, then I'm not quite sure what to do. Perhaps it
1315 // would be more appropriate for SrcDistObject to have an
1316 // isDistributed() method. For now, I'll just assume that we
1317 // need to do communication unless the cast succeeds and the
1318 // source is not distributed.
1319 else if (revOp == DoForward && srcDistObj != NULL &&
1320 ! srcDistObj->isDistributed ()) {
1321 needCommunication = false;
1322 }
1323
1324 if (! needCommunication) {
1325 if (verbose) {
1326 std::ostringstream os;
1327 os << *prefix << "Comm not needed; skipping" << endl;
1328 std::cerr << os.str ();
1329 }
1330 }
1331 else {
1332 distributorActor_.doWaits(distributorPlan);
1333
1334 if (verbose) {
1335 std::ostringstream os;
1336 os << *prefix << "8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) << ", constantNumPackets " << constantNumPackets << endl;
1337 std::cerr << os.str ();
1338 }
1339 doUnpackAndCombine(remoteLIDs, constantNumPackets, CM, execution_space());
1340 } // if (needCommunication)
1341 } // if (CM != ZERO)
1342
1343 if (verbose) {
1344 std::ostringstream os;
1345 os << *prefix << "9. Done!" << endl;
1346 std::cerr << os.str ();
1347 }
1348
1349 if (verbose) {
1350 std::ostringstream os;
1351 os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1352 std::cerr << os.str ();
1353 }
1354 }
1355
1356 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1357 void
1359 doPosts(const Details::DistributorPlan& distributorPlan,
1360 size_t constantNumPackets,
1361 bool commOnHost,
1362 std::shared_ptr<std::string> prefix,
1363 const bool canTryAliasing,
1364 const CombineMode CM)
1365 {
1368 using Kokkos::Compat::create_const_view;
1369 using std::endl;
1370
1371 const bool verbose = Details::Behavior::verbose("DistObject");
1372
1373 if (constantNumPackets == 0) { // variable num packets per LID
1374 if (verbose) {
1375 std::ostringstream os;
1376 os << *prefix << "7.1. Variable # packets / LID: first comm "
1377 << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1378 << endl;
1379 std::cerr << os.str ();
1380 }
1381 size_t totalImportPackets = 0;
1382 if (commOnHost) {
1383 if (this->numExportPacketsPerLID_.need_sync_host ()) {
1384 this->numExportPacketsPerLID_.sync_host ();
1385 }
1386 if (this->numImportPacketsPerLID_.need_sync_host ()) {
1387 this->numImportPacketsPerLID_.sync_host ();
1388 }
1389 this->numImportPacketsPerLID_.modify_host (); // out arg
1390 auto numExp_h =
1391 create_const_view (this->numExportPacketsPerLID_.view_host ());
1392 auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1393
1394 // MPI communication happens here.
1395 if (verbose) {
1396 std::ostringstream os;
1397 os << *prefix << "Call doPostsAndWaits"
1398 << endl;
1399 std::cerr << os.str ();
1400 }
1401 distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1402
1403 if (verbose) {
1404 std::ostringstream os;
1405 os << *prefix << "Count totalImportPackets" << std::endl;
1406 std::cerr << os.str ();
1407 }
1408 using the_dev_type = typename decltype (numImp_h)::device_type;
1409 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1410 }
1411 else { // ! commOnHost
1412 this->numExportPacketsPerLID_.sync_device ();
1413 this->numImportPacketsPerLID_.sync_device ();
1414 this->numImportPacketsPerLID_.modify_device (); // out arg
1415 auto numExp_d = create_const_view
1416 (this->numExportPacketsPerLID_.view_device ());
1417 auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1418
1419 // MPI communication happens here.
1420 if (verbose) {
1421 std::ostringstream os;
1422 os << *prefix << "Call doPostsAndWaits"
1423 << endl;
1424 std::cerr << os.str ();
1425 }
1426
1427 distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1428
1429 if (verbose) {
1430 std::ostringstream os;
1431 os << *prefix << "Count totalImportPackets" << std::endl;
1432 std::cerr << os.str ();
1433 }
1434 using the_dev_type = typename decltype (numImp_d)::device_type;
1435 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1436 }
1437
1438 if (verbose) {
1439 std::ostringstream os;
1440 os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1441 std::cerr << os.str ();
1442 }
1443 this->reallocImportsIfNeeded (totalImportPackets, verbose,
1444 prefix.get (), canTryAliasing, CM);
1445 if (verbose) {
1446 std::ostringstream os;
1447 os << *prefix << "7.3. Second comm" << std::endl;
1448 std::cerr << os.str ();
1449 }
1450
1451 // mfh 04 Feb 2019: Distributor expects the "num packets per
1452 // LID" arrays on host, so that it can issue MPI sends and
1453 // receives correctly.
1454 this->numExportPacketsPerLID_.sync_host ();
1455 this->numImportPacketsPerLID_.sync_host ();
1456
1457 // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1458 // doReversePostsAndWaits currently want
1459 // numExportPacketsPerLID and numImportPacketsPerLID as
1460 // Teuchos::ArrayView, rather than as Kokkos::View.
1461 //
1462 // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1463 // device. The above syncs might.
1464 auto numExportPacketsPerLID_av =
1465 getArrayViewFromDualView (this->numExportPacketsPerLID_);
1466 auto numImportPacketsPerLID_av =
1467 getArrayViewFromDualView (this->numImportPacketsPerLID_);
1468
1469 // imports_ is for output only, so we don't need to sync it
1470 // before marking it as modified. However, in order to
1471 // prevent spurious debug-mode errors (e.g., "modified on
1472 // both device and host"), we first need to clear its
1473 // "modified" flags.
1474 this->imports_.clear_sync_state ();
1475
1476 if (verbose) {
1477 std::ostringstream os;
1478 os << *prefix << "Comm on "
1479 << (commOnHost ? "host" : "device")
1480 << "; call doPosts" << endl;
1481 std::cerr << os.str ();
1482 }
1483
1484 if (commOnHost) {
1485 this->imports_.modify_host ();
1486 distributorActor_.doPosts
1487 (distributorPlan,
1488 create_const_view (this->exports_.view_host ()),
1489 numExportPacketsPerLID_av,
1490 this->imports_.view_host (),
1491 numImportPacketsPerLID_av);
1492 }
1493 else { // pack on device
1494 Kokkos::fence(); // for UVM
1495 this->imports_.modify_device ();
1496 distributorActor_.doPosts
1497 (distributorPlan,
1498 create_const_view (this->exports_.view_device ()),
1499 numExportPacketsPerLID_av,
1500 this->imports_.view_device (),
1501 numImportPacketsPerLID_av);
1502 }
1503 }
1504 else { // constant number of packets per LID
1505 if (verbose) {
1506 std::ostringstream os;
1507 os << *prefix << "7.1. Const # packets per LID: " << endl
1508 << *prefix << " "
1509 << dualViewStatusToString (this->exports_, "exports_")
1510 << endl
1511 << *prefix << " "
1512 << dualViewStatusToString (this->exports_, "imports_")
1513 << endl;
1514 std::cerr << os.str ();
1515 }
1516 // imports_ is for output only, so we don't need to sync it
1517 // before marking it as modified. However, in order to
1518 // prevent spurious debug-mode errors (e.g., "modified on
1519 // both device and host"), we first need to clear its
1520 // "modified" flags.
1521 this->imports_.clear_sync_state ();
1522
1523 if (verbose) {
1524 std::ostringstream os;
1525 os << *prefix << "7.2. Comm on "
1526 << (commOnHost ? "host" : "device")
1527 << "; call doPosts" << endl;
1528 std::cerr << os.str ();
1529 }
1530 if (commOnHost) {
1531 this->imports_.modify_host ();
1532 distributorActor_.doPosts
1533 (distributorPlan,
1534 create_const_view (this->exports_.view_host ()),
1535 constantNumPackets,
1536 this->imports_.view_host ());
1537 }
1538 else { // pack on device
1539 Kokkos::fence(); // for UVM
1540 this->imports_.modify_device ();
1541 distributorActor_.doPosts
1542 (distributorPlan,
1543 create_const_view (this->exports_.view_device ()),
1544 constantNumPackets,
1545 this->imports_.view_device ());
1546 } // commOnHost
1547 } // constant or variable num packets per LID
1548 }
1549
1550 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1551 void
1554 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1555 size_t& constantNumPackets,
1556 const execution_space &space)
1557 {
1559 using std::endl;
1560 const bool debug = Details::Behavior::debug("DistObject");
1561
1562 ProfilingRegion region_pp
1563 ("Tpetra::DistObject::doPackAndPrepare");
1564#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1565 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1566 // favor of Kokkos profiling.
1567 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1568#endif // HAVE_TPETRA_TRANSFER_TIMERS
1569
1570 // Ask the source to pack data. Also ask it whether there are
1571 // a constant number of packets per element
1572 // (constantNumPackets is an output argument). If there are,
1573 // constantNumPackets will come back nonzero. Otherwise, the
1574 // source will fill the numExportPacketsPerLID_ array.
1575
1576 // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1577 // Alternately, make packAndPrepare take a "commOnHost"
1578 // argument to tell it where to leave the data?
1579 //
1580 // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1581 // the freedom to pack and unpack either on host or device.
1582 // We should prefer sync'ing only on demand. Thus, we can
1583 // answer the above question: packAndPrepare should not
1584 // take a commOnHost argument, and doTransferNew should sync
1585 // where needed, if needed.
1586 if (debug) {
1587 std::ostringstream lclErrStrm;
1588 bool lclSuccess = false;
1589 try {
1590 this->packAndPrepare (src, exportLIDs, this->exports_,
1591 this->numExportPacketsPerLID_,
1592 constantNumPackets, space);
1593 lclSuccess = true;
1594 }
1595 catch (std::exception& e) {
1596 lclErrStrm << "packAndPrepare threw an exception: "
1597 << endl << e.what();
1598 }
1599 catch (...) {
1600 lclErrStrm << "packAndPrepare threw an exception "
1601 "not a subclass of std::exception.";
1602 }
1603 const char gblErrMsgHeader[] = "Tpetra::DistObject "
1604 "threw an exception in packAndPrepare on "
1605 "one or more processes in the DistObject's communicator.";
1606 auto comm = getMap()->getComm();
1607 Details::checkGlobalError(std::cerr, lclSuccess,
1608 lclErrStrm.str().c_str(),
1609 gblErrMsgHeader, *comm);
1610 }
1611 else {
1612 this->packAndPrepare (src, exportLIDs, this->exports_,
1613 this->numExportPacketsPerLID_,
1614 constantNumPackets, space);
1615 }
1616 }
1617
1618 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1619 void
1621 doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1622 size_t constantNumPackets,
1623 CombineMode CM,
1624 const execution_space &space)
1625 {
1627 using std::endl;
1628 const bool debug = Details::Behavior::debug("DistObject");
1629
1630 ProfilingRegion region_uc
1631 ("Tpetra::DistObject::doUnpackAndCombine");
1632#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1633 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1634 // favor of Kokkos profiling.
1635 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1636#endif // HAVE_TPETRA_TRANSFER_TIMERS
1637
1638 if (debug) {
1639 std::ostringstream lclErrStrm;
1640 bool lclSuccess = false;
1641 try {
1642 this->unpackAndCombine (remoteLIDs, this->imports_,
1643 this->numImportPacketsPerLID_,
1644 constantNumPackets, CM, space);
1645 lclSuccess = true;
1646 }
1647 catch (std::exception& e) {
1648 lclErrStrm << "doUnpackAndCombine threw an exception: "
1649 << endl << e.what();
1650 }
1651 catch (...) {
1652 lclErrStrm << "doUnpackAndCombine threw an exception "
1653 "not a subclass of std::exception.";
1654 }
1655 const char gblErrMsgHeader[] = "Tpetra::DistObject "
1656 "threw an exception in unpackAndCombine on "
1657 "one or more processes in the DistObject's communicator.";
1658 auto comm = getMap()->getComm();
1659 Details::checkGlobalError(std::cerr, lclSuccess,
1660 lclErrStrm.str().c_str(),
1661 gblErrMsgHeader, *comm);
1662 }
1663 else {
1664 this->unpackAndCombine (remoteLIDs, this->imports_,
1665 this->numImportPacketsPerLID_,
1666 constantNumPackets, CM, space);
1667 }
1668 }
1669
1670 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1671 void
1674 (const SrcDistObject&,
1675 const size_t,
1676 const Kokkos::DualView<
1677 const local_ordinal_type*,
1679 const Kokkos::DualView<
1680 const local_ordinal_type*,
1682 const CombineMode CM)
1683 {}
1684
1685// clang-format on
1686template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1688 const SrcDistObject &source, const size_t numSameIDs,
1689 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1690 &permuteToLIDs,
1691 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1692 &permuteFromLIDs,
1693 const CombineMode CM, const execution_space &space) {
1694 /*
1695 This is called if the derived class doesn't know how to pack and prepare in
1696 an arbitrary execution space instance, but it was asked to anyway.
1697 Provide a safe illusion by actually doing the work in the default instance,
1698 and syncing the default instance with the provided instance.
1699 The caller expects
1700 1. any work in the provided instance to complete before this.
1701 2. This to complete before any following work in the provided instance.
1702 */
1703
1704 space.fence(); // // TODO: Tpetra::Details::Spaces::exec_space_wait
1705 copyAndPermute(source, numSameIDs, permuteToLIDs, permuteFromLIDs,
1706 CM); // default instance
1707 execution_space().fence(); // TODO:
1708 // Tpetra::Details::Spaces::exec_space_wait
1709}
1710// clang-format off
1711
1712
1713 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1714 void
1717 (const SrcDistObject&,
1718 const Kokkos::DualView<
1719 const local_ordinal_type*,
1721 Kokkos::DualView<
1722 packet_type*,
1724 Kokkos::DualView<
1725 size_t*,
1727 size_t&)
1728 {}
1729
1730// clang-format on
1731template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1733 const SrcDistObject &source,
1734 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1735 &exportLIDs,
1736 Kokkos::DualView<packet_type *, buffer_device_type> &exports,
1737 Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1738 size_t &constantNumPackets, const execution_space &space) {
1739 /*
1740 This is called if the derived class doesn't know how to pack and prepare in
1741 an arbitrary execution space instance, but it was asked to anyway.
1742 Provide a safe illusion by actually doing the work in the default instance,
1743 and syncing the default instance with the provided instance.
1744
1745 The caller expects
1746 1. any work in the provided instance to complete before this.
1747 2. This to complete before any following work in the provided instance.
1748 */
1749
1750 // wait for any work from prior operations in the provided instance to
1751 // complete
1752 space.fence(); // TODO: Details::Spaces::exec_space_wait
1753
1754 // pack and prepare in the default instance.
1755 packAndPrepare(source, exportLIDs, exports, numPacketsPerLID,
1756 constantNumPackets); // default instance
1757
1758 // wait for the default instance to complete before returning, so any
1759 // following work inserted into the provided instance will be done after this
1760 execution_space().fence(); // TODO: Details::Spaces::exec_space_wait
1761}
1762// clang-format off
1763
1764 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1765 void
1768 (const Kokkos::DualView<
1769 const local_ordinal_type*,
1770 buffer_device_type>& /* importLIDs */,
1771 Kokkos::DualView<
1772 packet_type*,
1773 buffer_device_type> /* imports */,
1774 Kokkos::DualView<
1775 size_t*,
1776 buffer_device_type> /* numPacketsPerLID */,
1777 const size_t /* constantNumPackets */,
1778 const CombineMode /* combineMode */)
1779 {}
1780
1781// clang-format on
1782template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1784 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1785 &importLIDs,
1786 Kokkos::DualView<packet_type *, buffer_device_type> imports,
1787 Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1788 const size_t constantNumPackets, const CombineMode combineMode,
1789 const execution_space &space) {
1790 // Wait for any work in the provided space to complete
1791 space.fence(); // TODO: Details::Spaces::exec_space_wait(execution_space(),
1792 // space);
1793 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets,
1794 combineMode); // default instance
1795 // wait for unpack to finish in the default instance, since the caller
1796 // may be expecting sequential semantics in the `space` instance
1797 execution_space().fence(); // TODO: Details::Spaces::exec_space_wait(space,
1798 // execution_space());
1799}
1800// clang-format off
1801
1802template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1804 std::ostream &os) const {
1805 using std::endl;
1806 using Teuchos::FancyOStream;
1807 using Teuchos::getFancyOStream;
1808 using Teuchos::RCP;
1809 using Teuchos::rcpFromRef;
1810
1811 RCP<FancyOStream> out = getFancyOStream(rcpFromRef(os));
1812 this->describe(*out, Teuchos::VERB_DEFAULT);
1813}
1814
1815template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1816std::unique_ptr<std::string>
1817DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::createPrefix(
1818 const char className[], const char methodName[]) const {
1819 auto map = this->getMap();
1820 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1821 return Details::createPrefix(comm.getRawPtr(), className, methodName);
1822}
1823
1824template <class DistObjectType>
1826 Teuchos::RCP<DistObjectType> &input,
1827 const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
1828 typename DistObjectType::global_ordinal_type,
1829 typename DistObjectType::node_type>> &newMap) {
1830 input->removeEmptyProcessesInPlace(newMap);
1831 if (newMap.is_null()) { // my process is excluded
1832 input = Teuchos::null;
1833 }
1834}
1835
1836template <class DistObjectType>
1837void removeEmptyProcessesInPlace(Teuchos::RCP<DistObjectType> &input) {
1838 auto newMap = input->getMap()->removeEmptyProcesses();
1840}
1841
1842// Explicit instantiation macro for general DistObject.
1843#define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1844 template class DistObject<SCALAR, LO, GO, NODE>;
1845
1846// Explicit instantiation macro for DistObject<char, ...>.
1847// The "SLGN" stuff above doesn't work for Packet=char.
1848#define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1849 template class DistObject<char, LO, GO, NODE>;
1850
1851} // namespace Tpetra
1852
1853#endif // TPETRA_DISTOBJECT_DEF_HPP
1854// clang-format on
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
Base class for distributed Tpetra objects that support data redistribution.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object ("forward mode").
void beginTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Implementation detail of doTransfer.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
bool transferArrived() const
Whether the data from an import/export operation has arrived, and is ready for the unpack and combine...
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets)
Pack data and metadata for communication (sends).
LocalOrdinal local_ordinal_type
The type of local indices.
typename ::Kokkos::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
typename device_type::execution_space execution_space
The Kokkos execution space.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode)
Perform any unpacking and combining after communication.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
virtual size_t constantNumberOfPackets() const
Whether the implementation's instance promises always to have a constant number of packets per LID (l...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
virtual std::string description() const
One-line descriptiion of this object.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object's Map.
bool isDistributed() const
Whether this is a globally distributed object.
Sets up and executes a communication plan for a Tpetra DistObject.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
Abstract base class for objects that can be the source of an Import or Export operation.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
CombineMode
Rule for combining data in an Import or Export.
@ ZERO
Replace old values with zero.