Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Map_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
44
45#ifndef TPETRA_MAP_DEF_HPP
46#define TPETRA_MAP_DEF_HPP
47
48#include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
50#include "Tpetra_Details_FixedHashTable.hpp"
53#include "Tpetra_Core.hpp"
54#include "Tpetra_Util.hpp"
55#include "Teuchos_as.hpp"
56#include "Teuchos_TypeNameTraits.hpp"
57#include "Teuchos_CommHelpers.hpp"
58#include "Tpetra_Details_mpiIsInitialized.hpp"
59#include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
61#include <memory>
62#include <sstream>
63#include <stdexcept>
64#include <typeinfo>
65
66namespace { // (anonymous)
67
68 void
69 checkMapInputArray (const char ctorName[],
70 const void* indexList,
71 const size_t indexListSize,
72 const Teuchos::Comm<int>* const comm)
73 {
75
76 const bool debug = Behavior::debug("Map");
77 if (debug) {
78 using Teuchos::outArg;
79 using Teuchos::REDUCE_MIN;
80 using Teuchos::reduceAll;
81 using std::endl;
82
83 const int myRank = comm == nullptr ? 0 : comm->getRank ();
84 const bool verbose = Behavior::verbose("Map");
85 std::ostringstream lclErrStrm;
86 int lclSuccess = 1;
87
88 if (indexListSize != 0 && indexList == nullptr) {
89 lclSuccess = 0;
90 if (verbose) {
91 lclErrStrm << "Proc " << myRank << ": indexList is null, "
92 "but indexListSize=" << indexListSize << " != 0." << endl;
93 }
94 }
95 int gblSuccess = 0; // output argument
96 reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
97 if (gblSuccess != 1) {
98 std::ostringstream gblErrStrm;
99 gblErrStrm << "Tpetra::Map constructor " << ctorName <<
100 " detected a problem with the input array "
101 "(raw array, Teuchos::ArrayView, or Kokkos::View) "
102 "of global indices." << endl;
103 if (verbose) {
105 gathervPrint (gblErrStrm, lclErrStrm.str (), *comm);
106 }
107 TEUCHOS_TEST_FOR_EXCEPTION
108 (true, std::invalid_argument, gblErrStrm.str ());
109 }
110 }
111 }
112} // namespace (anonymous)
113
114namespace Tpetra {
115
116 template <class LocalOrdinal, class GlobalOrdinal, class Node>
118 Map () :
119 comm_ (new Teuchos::SerialComm<int> ()),
120 indexBase_ (0),
121 numGlobalElements_ (0),
122 numLocalElements_ (0),
123 minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
124 maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
125 minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
126 maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
127 firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
128 lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
129 uniform_ (false), // trivially
130 contiguous_ (false),
131 distributed_ (false), // no communicator yet
132 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
133 {
135 }
136
137
138 template <class LocalOrdinal, class GlobalOrdinal, class Node>
140 Map (const global_size_t numGlobalElements,
141 const global_ordinal_type indexBase,
142 const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
143 const LocalGlobal lOrG) :
144 comm_ (comm),
145 uniform_ (true),
146 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
147 {
148 using Teuchos::as;
149 using Teuchos::broadcast;
150 using Teuchos::outArg;
151 using Teuchos::reduceAll;
152 using Teuchos::REDUCE_MIN;
153 using Teuchos::REDUCE_MAX;
154 using Teuchos::typeName;
155 using std::endl;
156 using GO = global_ordinal_type;
157 using GST = global_size_t;
158 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
159 const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
160 const char exPfx[] =
161 "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
162
163 const bool debug = Details::Behavior::debug("Map");
164 const bool verbose = Details::Behavior::verbose("Map");
165 std::unique_ptr<std::string> prefix;
166 if (verbose) {
167 prefix = Details::createPrefix(
168 comm_.getRawPtr(), "Map", funcName);
169 std::ostringstream os;
170 os << *prefix << "Start" << endl;
171 std::cerr << os.str();
172 }
174
175 // In debug mode only, check whether numGlobalElements and
176 // indexBase are the same over all processes in the communicator.
177 if (debug) {
178 GST proc0NumGlobalElements = numGlobalElements;
179 broadcast(*comm_, 0, outArg(proc0NumGlobalElements));
180 GST minNumGlobalElements = numGlobalElements;
181 GST maxNumGlobalElements = numGlobalElements;
182 reduceAll(*comm, REDUCE_MIN, numGlobalElements,
183 outArg(minNumGlobalElements));
184 reduceAll(*comm, REDUCE_MAX, numGlobalElements,
185 outArg(maxNumGlobalElements));
186 TEUCHOS_TEST_FOR_EXCEPTION
187 (minNumGlobalElements != maxNumGlobalElements ||
188 numGlobalElements != minNumGlobalElements,
189 std::invalid_argument, exPfx << "All processes must "
190 "provide the same number of global elements. Process 0 set "
191 "numGlobalElements="<< proc0NumGlobalElements << ". The "
192 "calling process " << comm->getRank() << " set "
193 "numGlobalElements=" << numGlobalElements << ". The min "
194 "and max values over all processes are "
195 << minNumGlobalElements << " resp. " << maxNumGlobalElements
196 << ".");
197
198 GO proc0IndexBase = indexBase;
199 broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
200 GO minIndexBase = indexBase;
201 GO maxIndexBase = indexBase;
202 reduceAll(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
203 reduceAll(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
204 TEUCHOS_TEST_FOR_EXCEPTION
205 (minIndexBase != maxIndexBase || indexBase != minIndexBase,
206 std::invalid_argument, exPfx << "All processes must "
207 "provide the same indexBase argument. Process 0 set "
208 "indexBase=" << proc0IndexBase << ". The calling process "
209 << comm->getRank() << " set indexBase=" << indexBase
210 << ". The min and max values over all processes are "
211 << minIndexBase << " resp. " << maxIndexBase << ".");
212 }
213
214 // Distribute the elements across the processes in the given
215 // communicator so that global IDs (GIDs) are
216 //
217 // - Nonoverlapping (only one process owns each GID)
218 // - Contiguous (the sequence of GIDs is nondecreasing, and no two
219 // adjacent GIDs differ by more than one)
220 // - As evenly distributed as possible (the numbers of GIDs on two
221 // different processes do not differ by more than one)
222
223 // All processes have the same numGlobalElements, but we still
224 // need to check that it is valid. numGlobalElements must be
225 // positive and not the "invalid" value (GSTI).
226 //
227 // This comparison looks funny, but it avoids compiler warnings
228 // for comparing unsigned integers (numGlobalElements_in is a
229 // GST, which is unsigned) while still working if we
230 // later decide to make GST signed.
231 TEUCHOS_TEST_FOR_EXCEPTION(
232 (numGlobalElements < 1 && numGlobalElements != 0),
233 std::invalid_argument, exPfx << "numGlobalElements (= "
234 << numGlobalElements << ") must be nonnegative.");
235
236 TEUCHOS_TEST_FOR_EXCEPTION
237 (numGlobalElements == GSTI, std::invalid_argument, exPfx <<
238 "You provided numGlobalElements = Teuchos::OrdinalTraits<"
239 "Tpetra::global_size_t>::invalid(). This version of the "
240 "constructor requires a valid value of numGlobalElements. "
241 "You probably mistook this constructor for the \"contiguous "
242 "nonuniform\" constructor, which can compute the global "
243 "number of elements for you if you set numGlobalElements to "
244 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
245
246 size_t numLocalElements = 0; // will set below
247 if (lOrG == GloballyDistributed) {
248 // Compute numLocalElements:
249 //
250 // If numGlobalElements == numProcs * B + remainder,
251 // then Proc r gets B+1 elements if r < remainder,
252 // and B elements if r >= remainder.
253 //
254 // This strategy is valid for any value of numGlobalElements and
255 // numProcs, including the following border cases:
256 // - numProcs == 1
257 // - numLocalElements < numProcs
258 //
259 // In the former case, remainder == 0 && numGlobalElements ==
260 // numLocalElements. In the latter case, remainder ==
261 // numGlobalElements && numLocalElements is either 0 or 1.
262 const GST numProcs = static_cast<GST> (comm_->getSize ());
263 const GST myRank = static_cast<GST> (comm_->getRank ());
264 const GST quotient = numGlobalElements / numProcs;
265 const GST remainder = numGlobalElements - quotient * numProcs;
266
267 GO startIndex;
268 if (myRank < remainder) {
269 numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
270 // myRank was originally an int, so it should never overflow
271 // reasonable GO types.
272 startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
273 } else {
274 numLocalElements = as<size_t> (quotient);
275 startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
276 as<GO> (remainder);
277 }
278
279 minMyGID_ = indexBase + startIndex;
280 maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
281 minAllGID_ = indexBase;
282 maxAllGID_ = indexBase + numGlobalElements - 1;
283 distributed_ = (numProcs > 1);
284 }
285 else { // lOrG == LocallyReplicated
286 numLocalElements = as<size_t> (numGlobalElements);
287 minMyGID_ = indexBase;
288 maxMyGID_ = indexBase + numGlobalElements - 1;
289 distributed_ = false;
290 }
291
292 minAllGID_ = indexBase;
293 maxAllGID_ = indexBase + numGlobalElements - 1;
294 indexBase_ = indexBase;
295 numGlobalElements_ = numGlobalElements;
296 numLocalElements_ = numLocalElements;
297 firstContiguousGID_ = minMyGID_;
298 lastContiguousGID_ = maxMyGID_;
299 contiguous_ = true;
300
301 // Create the Directory on demand in getRemoteIndexList().
302 //setupDirectory ();
303
304 if (verbose) {
305 std::ostringstream os;
306 os << *prefix << "Done" << endl;
307 std::cerr << os.str();
308 }
309 }
310
311
312 template <class LocalOrdinal, class GlobalOrdinal, class Node>
314 Map (const global_size_t numGlobalElements,
315 const size_t numLocalElements,
316 const global_ordinal_type indexBase,
317 const Teuchos::RCP<const Teuchos::Comm<int> > &comm) :
318 comm_ (comm),
319 uniform_ (false),
320 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
321 {
322 using Teuchos::as;
323 using Teuchos::broadcast;
324 using Teuchos::outArg;
325 using Teuchos::reduceAll;
326 using Teuchos::REDUCE_MIN;
327 using Teuchos::REDUCE_MAX;
328 using Teuchos::REDUCE_SUM;
329 using Teuchos::scan;
330 using std::endl;
331 using GO = global_ordinal_type;
332 using GST = global_size_t;
333 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
334 const char funcName[] =
335 "Map(gblNumInds,lclNumInds,indexBase,comm)";
336 const char exPfx[] =
337 "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
338 const char suffix[] =
339 ". Please report this bug to the Tpetra developers.";
340
341 const bool debug = Details::Behavior::debug("Map");
342 const bool verbose = Details::Behavior::verbose("Map");
343 std::unique_ptr<std::string> prefix;
344 if (verbose) {
345 prefix = Details::createPrefix(
346 comm_.getRawPtr(), "Map", funcName);
347 std::ostringstream os;
348 os << *prefix << "Start" << endl;
349 std::cerr << os.str();
350 }
352
353 // Global sum of numLocalElements over all processes.
354 // Keep this for later debug checks.
355 GST debugGlobalSum {};
356 if (debug) {
357 debugGlobalSum = initialNonuniformDebugCheck(exPfx,
358 numGlobalElements, numLocalElements, indexBase, comm);
359 }
360
361 // Distribute the elements across the nodes so that they are
362 // - non-overlapping
363 // - contiguous
364
365 // This differs from the first Map constructor (that only takes a
366 // global number of elements) in that the user has specified the
367 // number of local elements, so that the elements are not
368 // (necessarily) evenly distributed over the processes.
369
370 // Compute my local offset. This is an inclusive scan, so to get
371 // the final offset, we subtract off the input.
372 GO scanResult = 0;
373 scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
374 const GO myOffset = scanResult - numLocalElements;
375
376 if (numGlobalElements != GSTI) {
377 numGlobalElements_ = numGlobalElements; // Use the user's value.
378 }
379 else {
380 // Inclusive scan means that the last process has the final sum.
381 // Rather than doing a reduceAll to get the sum of
382 // numLocalElements, we can just have the last process broadcast
383 // its result. That saves us a round of log(numProcs) messages.
384 const int numProcs = comm->getSize ();
385 GST globalSum = scanResult;
386 if (numProcs > 1) {
387 broadcast (*comm, numProcs - 1, outArg (globalSum));
388 }
389 numGlobalElements_ = globalSum;
390
391 if (debug) {
392 // No need for an all-reduce here; both come from collectives.
393 TEUCHOS_TEST_FOR_EXCEPTION
394 (globalSum != debugGlobalSum, std::logic_error, exPfx <<
395 "globalSum = " << globalSum << " != debugGlobalSum = " <<
396 debugGlobalSum << suffix);
397 }
398 }
399 numLocalElements_ = numLocalElements;
400 indexBase_ = indexBase;
401 minAllGID_ = (numGlobalElements_ == 0) ?
402 std::numeric_limits<GO>::max () :
403 indexBase;
404 maxAllGID_ = (numGlobalElements_ == 0) ?
405 std::numeric_limits<GO>::lowest () :
406 indexBase + GO(numGlobalElements_) - GO(1);
407 minMyGID_ = (numLocalElements_ == 0) ?
408 std::numeric_limits<GO>::max () :
409 indexBase + GO(myOffset);
410 maxMyGID_ = (numLocalElements_ == 0) ?
411 std::numeric_limits<GO>::lowest () :
412 indexBase + myOffset + GO(numLocalElements) - GO(1);
413 firstContiguousGID_ = minMyGID_;
414 lastContiguousGID_ = maxMyGID_;
415 contiguous_ = true;
416 distributed_ = checkIsDist ();
417
418 // Create the Directory on demand in getRemoteIndexList().
419 //setupDirectory ();
420
421 if (verbose) {
422 std::ostringstream os;
423 os << *prefix << "Done" << endl;
424 std::cerr << os.str();
425 }
426 }
427
428 template <class LocalOrdinal, class GlobalOrdinal, class Node>
432 const char errorMessagePrefix[],
433 const global_size_t numGlobalElements,
434 const size_t numLocalElements,
435 const global_ordinal_type indexBase,
436 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const
437 {
438 const bool debug = Details::Behavior::debug("Map");
439 if (! debug) {
440 return global_size_t(0);
441 }
442
443 using Teuchos::broadcast;
444 using Teuchos::outArg;
445 using Teuchos::ptr;
446 using Teuchos::REDUCE_MAX;
447 using Teuchos::REDUCE_MIN;
448 using Teuchos::REDUCE_SUM;
449 using Teuchos::reduceAll;
450 using GO = global_ordinal_type;
451 using GST = global_size_t;
452 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
453
454 // The user has specified the distribution of indices over the
455 // processes. The distribution is not necessarily contiguous or
456 // equally shared over the processes.
457 //
458 // We assume that the number of local elements can be stored in a
459 // size_t. The instance member numLocalElements_ is a size_t, so
460 // this variable and that should have the same type.
461
462 GST debugGlobalSum = 0; // Will be global sum of numLocalElements
463 reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
464 outArg (debugGlobalSum));
465 // In debug mode only, check whether numGlobalElements and
466 // indexBase are the same over all processes in the communicator.
467 {
468 GST proc0NumGlobalElements = numGlobalElements;
469 broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
470 GST minNumGlobalElements = numGlobalElements;
471 GST maxNumGlobalElements = numGlobalElements;
472 reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
473 outArg (minNumGlobalElements));
474 reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
475 outArg (maxNumGlobalElements));
476 TEUCHOS_TEST_FOR_EXCEPTION
477 (minNumGlobalElements != maxNumGlobalElements ||
478 numGlobalElements != minNumGlobalElements,
479 std::invalid_argument, errorMessagePrefix << "All processes "
480 "must provide the same number of global elements, even if "
481 "that argument is "
482 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
483 "(which signals that the Map should compute the global "
484 "number of elements). Process 0 set numGlobalElements"
485 "=" << proc0NumGlobalElements << ". The calling process "
486 << comm->getRank() << " set numGlobalElements=" <<
487 numGlobalElements << ". The min and max values over all "
488 "processes are " << minNumGlobalElements << " resp. " <<
489 maxNumGlobalElements << ".");
490
491 GO proc0IndexBase = indexBase;
492 broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
493 GO minIndexBase = indexBase;
494 GO maxIndexBase = indexBase;
495 reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
496 reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
497 TEUCHOS_TEST_FOR_EXCEPTION
498 (minIndexBase != maxIndexBase || indexBase != minIndexBase,
499 std::invalid_argument, errorMessagePrefix <<
500 "All processes must provide the same indexBase argument. "
501 "Process 0 set indexBase = " << proc0IndexBase << ". The "
502 "calling process " << comm->getRank() << " set indexBase="
503 << indexBase << ". The min and max values over all "
504 "processes are " << minIndexBase << " resp. " << maxIndexBase
505 << ".");
506
507 // Make sure that the sum of numLocalElements over all processes
508 // equals numGlobalElements.
509 TEUCHOS_TEST_FOR_EXCEPTION
510 (numGlobalElements != GSTI &&
511 debugGlobalSum != numGlobalElements, std::invalid_argument,
512 errorMessagePrefix << "The sum of each process' number of "
513 "indices over all processes, " << debugGlobalSum << ", != "
514 << "numGlobalElements=" << numGlobalElements << ". If you "
515 "would like this constructor to compute numGlobalElements "
516 "for you, you may set numGlobalElements="
517 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
518 "on input. Please note that this is NOT necessarily -1.");
519 }
520 return debugGlobalSum;
521 }
522
523 template <class LocalOrdinal, class GlobalOrdinal, class Node>
524 void
527 const char errorMessagePrefix[],
528 const global_size_t numGlobalElements,
529 const Kokkos::View<const global_ordinal_type*,
530 Kokkos::LayoutLeft,
531 Kokkos::HostSpace,
532 Kokkos::MemoryUnmanaged>& entryList_host,
533 const global_ordinal_type indexBase,
534 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
535 {
536 using Kokkos::LayoutLeft;
537 using Kokkos::subview;
538 using Kokkos::View;
539 using Kokkos::view_alloc;
540 using Kokkos::WithoutInitializing;
541 using Teuchos::as;
542 using Teuchos::broadcast;
543 using Teuchos::outArg;
544 using Teuchos::ptr;
545 using Teuchos::REDUCE_MAX;
546 using Teuchos::REDUCE_MIN;
547 using Teuchos::REDUCE_SUM;
548 using Teuchos::reduceAll;
549 using LO = local_ordinal_type;
550 using GO = global_ordinal_type;
551 using GST = global_size_t;
552 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
553
554 // Make sure that Kokkos has been initialized (Github Issue #513).
555 TEUCHOS_TEST_FOR_EXCEPTION
556 (! Kokkos::is_initialized (), std::runtime_error,
557 errorMessagePrefix << "The Kokkos execution space "
558 << Teuchos::TypeNameTraits<execution_space>::name()
559 << " has not been initialized. "
560 "Please initialize it before creating a Map.")
561
562 // The user has specified the distribution of indices over the
563 // processes, via the input array of global indices on each
564 // process. The distribution is not necessarily contiguous or
565 // equally shared over the processes.
566
567 // The length of the input array on this process is the number of
568 // local indices to associate with this process, even though the
569 // input array contains global indices. We assume that the number
570 // of local indices on a process can be stored in a size_t;
571 // numLocalElements_ is a size_t, so this variable and that should
572 // have the same type.
573 const size_t numLocalElements(entryList_host.size());
574
575 initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
576 numLocalElements, indexBase, comm);
577
578 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
579 // reduction is redundant, since the directory Map will have to do
580 // the same thing. Thus, we could do the scan and broadcast for
581 // the directory Map here, and give the computed offsets to the
582 // directory Map's constructor. However, a reduction costs less
583 // than a scan and broadcast, so this still saves time if users of
584 // this Map don't ever need the Directory (i.e., if they never
585 // call getRemoteIndexList on this Map).
586 if (numGlobalElements != GSTI) {
587 numGlobalElements_ = numGlobalElements; // Use the user's value.
588 }
589 else { // The user wants us to compute the sum.
590 reduceAll(*comm, REDUCE_SUM,
591 static_cast<GST>(numLocalElements),
592 outArg(numGlobalElements_));
593 }
594
595 // mfh 20 Feb 2013: We've never quite done the right thing for
596 // duplicate GIDs here. Duplicate GIDs have always been counted
597 // distinctly in numLocalElements_, and thus should get a
598 // different LID. However, we've always used std::map or a hash
599 // table for the GID -> LID lookup table, so distinct GIDs always
600 // map to the same LID. Furthermore, the order of the input GID
601 // list matters, so it's not desirable to sort for determining
602 // uniqueness.
603 //
604 // I've chosen for now to write this code as if the input GID list
605 // contains no duplicates. If this is not desired, we could use
606 // the lookup table itself to determine uniqueness: If we haven't
607 // seen the GID before, it gets a new LID and it's added to the
608 // LID -> GID and GID -> LID tables. If we have seen the GID
609 // before, it doesn't get added to either table. I would
610 // implement this, but it would cost more to do the double lookups
611 // in the table (one to check, and one to insert).
612 //
613 // More importantly, since we build the GID -> LID table in (a
614 // thread-) parallel (way), the order in which duplicate GIDs may
615 // get inserted is not defined. This would make the assignment of
616 // LID to GID nondeterministic.
617
618 numLocalElements_ = numLocalElements;
619 indexBase_ = indexBase;
620
621 minMyGID_ = indexBase_;
622 maxMyGID_ = indexBase_;
623
624 // NOTE (mfh 27 May 2015): While finding the initial contiguous
625 // GID range requires looking at all the GIDs in the range,
626 // dismissing an interval of GIDs only requires looking at the
627 // first and last GIDs. Thus, we could do binary search backwards
628 // from the end in order to catch the common case of a contiguous
629 // interval followed by noncontiguous entries. On the other hand,
630 // we could just expose this case explicitly as yet another Map
631 // constructor, and avoid the trouble of detecting it.
632 if (numLocalElements_ > 0) {
633 // Find contiguous GID range, with the restriction that the
634 // beginning of the range starts with the first entry. While
635 // doing so, fill in the LID -> GID table.
636 typename decltype (lgMap_)::non_const_type lgMap
637 (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
638 auto lgMap_host =
639 Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
640
641 // The input array entryList_host is already on host, so we
642 // don't need to take a host view of it.
643 // auto entryList_host =
644 // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
645 // Kokkos::deep_copy (entryList_host, entryList);
646
647 firstContiguousGID_ = entryList_host[0];
648 lastContiguousGID_ = firstContiguousGID_+1;
649
650 // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
651 // anyway, so we have to look at them all. The logical way to
652 // find the first noncontiguous entry would thus be to "reduce,"
653 // where the local reduction result is whether entryList[i] + 1
654 // == entryList[i+1].
655
656 lgMap_host[0] = firstContiguousGID_;
657 size_t i = 1;
658 for ( ; i < numLocalElements_; ++i) {
659 const GO curGid = entryList_host[i];
660 const LO curLid = as<LO> (i);
661
662 if (lastContiguousGID_ != curGid) break;
663
664 // Add the entry to the LID->GID table only after we know that
665 // the current GID is in the initial contiguous sequence, so
666 // that we don't repeat adding it in the first iteration of
667 // the loop below over the remaining noncontiguous GIDs.
668 lgMap_host[curLid] = curGid;
669 ++lastContiguousGID_;
670 }
671 --lastContiguousGID_;
672
673 // [firstContiguousGID_, lastContigousGID_] is the initial
674 // sequence of contiguous GIDs. We can start the min and max
675 // GID using this range.
676 minMyGID_ = firstContiguousGID_;
677 maxMyGID_ = lastContiguousGID_;
678
679 // Compute the GID -> LID lookup table, _not_ including the
680 // initial sequence of contiguous GIDs.
681 {
682 const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
683 auto nonContigGids_host = subview (entryList_host, ncRange);
684 TEUCHOS_TEST_FOR_EXCEPTION
685 (static_cast<size_t> (nonContigGids_host.extent (0)) !=
686 static_cast<size_t> (entryList_host.extent (0) - i),
687 std::logic_error, "Tpetra::Map noncontiguous constructor: "
688 "nonContigGids_host.extent(0) = "
689 << nonContigGids_host.extent (0)
690 << " != entryList_host.extent(0) - i = "
691 << (entryList_host.extent (0) - i) << " = "
692 << entryList_host.extent (0) << " - " << i
693 << ". Please report this bug to the Tpetra developers.");
694
695 // FixedHashTable's constructor expects an owned device View,
696 // so we must deep-copy the subview of the input indices.
697 View<GO*, LayoutLeft, device_type>
698 nonContigGids (view_alloc ("nonContigGids", WithoutInitializing),
699 nonContigGids_host.size ());
700 // DEEP_COPY REVIEW - HOST-TO-DEVICE
701 Kokkos::deep_copy (execution_space(), nonContigGids, nonContigGids_host);
702 Kokkos::fence(); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
703
704 glMap_ = global_to_local_table_type(nonContigGids,
705 firstContiguousGID_,
706 lastContiguousGID_,
707 static_cast<LO> (i));
708 // Make host version - when memory spaces match these just do trivial assignment
709 glMapHost_ = global_to_local_table_host_type(glMap_);
710 }
711
712 // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
713 // table above, we have to look at all the (noncontiguous) input
714 // indices anyway. Thus, why not have the constructor compute
715 // and return the min and max?
716
717 for ( ; i < numLocalElements_; ++i) {
718 const GO curGid = entryList_host[i];
719 const LO curLid = static_cast<LO> (i);
720 lgMap_host[curLid] = curGid; // LID -> GID table
721
722 // While iterating through entryList, we compute its
723 // (process-local) min and max elements.
724 if (curGid < minMyGID_) {
725 minMyGID_ = curGid;
726 }
727 if (curGid > maxMyGID_) {
728 maxMyGID_ = curGid;
729 }
730 }
731
732 // We filled lgMap on host above; now sync back to device.
733 // DEEP_COPY REVIEW - HOST-TO-DEVICE
734 Kokkos::deep_copy (execution_space(), lgMap, lgMap_host);
735
736 // "Commit" the local-to-global lookup table we filled in above.
737 lgMap_ = lgMap;
738 // We've already created this, so use it.
739 lgMapHost_ = lgMap_host;
740 }
741 else {
742 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
743 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
744 // This insures tests for GIDs in the range
745 // [firstContiguousGID_, lastContiguousGID_] fail for processes
746 // with no local elements.
747 firstContiguousGID_ = indexBase_+1;
748 lastContiguousGID_ = indexBase_;
749 // glMap_ was default constructed, so it's already empty.
750 }
751
752 // Compute the min and max of all processes' GIDs. If
753 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
754 // are both indexBase_. This is wrong, but fixing it would
755 // require either a fancy sparse all-reduce, or a custom reduction
756 // operator that ignores invalid values ("invalid" means
757 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
758 //
759 // Also, while we're at it, use the same all-reduce to figure out
760 // if the Map is distributed. "Distributed" means that there is
761 // at least one process with a number of local elements less than
762 // the number of global elements.
763 //
764 // We're computing the min and max of all processes' GIDs using a
765 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
766 // and y are signed). (This lets us combine the min and max into
767 // a single all-reduce.) If each process sets localDist=1 if its
768 // number of local elements is strictly less than the number of
769 // global elements, and localDist=0 otherwise, then a MAX
770 // all-reduce on localDist tells us if the Map is distributed (1
771 // if yes, 0 if no). Thus, we can append localDist onto the end
772 // of the data and get the global result from the all-reduce.
773 if (std::numeric_limits<GO>::is_signed) {
774 // Does my process NOT own all the elements?
775 const GO localDist =
776 (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
777
778 GO minMaxInput[3];
779 minMaxInput[0] = -minMyGID_;
780 minMaxInput[1] = maxMyGID_;
781 minMaxInput[2] = localDist;
782
783 GO minMaxOutput[3];
784 minMaxOutput[0] = 0;
785 minMaxOutput[1] = 0;
786 minMaxOutput[2] = 0;
787 reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
788 minAllGID_ = -minMaxOutput[0];
789 maxAllGID_ = minMaxOutput[1];
790 const GO globalDist = minMaxOutput[2];
791 distributed_ = (comm_->getSize () > 1 && globalDist == 1);
792 }
793 else { // unsigned; use two reductions
794 // This is always correct, no matter the signedness of GO.
795 reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
796 reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
797 distributed_ = checkIsDist ();
798 }
799
800 contiguous_ = false; // "Contiguous" is conservative.
801
802 TEUCHOS_TEST_FOR_EXCEPTION(
803 minAllGID_ < indexBase_,
804 std::invalid_argument,
805 "Tpetra::Map constructor (noncontiguous): "
806 "Minimum global ID = " << minAllGID_ << " over all process(es) is "
807 "less than the given indexBase = " << indexBase_ << ".");
808
809 // Create the Directory on demand in getRemoteIndexList().
810 //setupDirectory ();
811 }
812
813 template <class LocalOrdinal, class GlobalOrdinal, class Node>
815 Map (const global_size_t numGlobalElements,
816 const GlobalOrdinal indexList[],
817 const LocalOrdinal indexListSize,
818 const GlobalOrdinal indexBase,
819 const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
820 comm_ (comm),
821 uniform_ (false),
822 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
823 {
824 using std::endl;
825 const char funcName[] =
826 "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
827
828 const bool verbose = Details::Behavior::verbose("Map");
829 std::unique_ptr<std::string> prefix;
830 if (verbose) {
831 prefix = Details::createPrefix(
832 comm_.getRawPtr(), "Map", funcName);
833 std::ostringstream os;
834 os << *prefix << "Start" << endl;
835 std::cerr << os.str();
836 }
838 checkMapInputArray ("(GST, const GO[], LO, GO, comm)",
839 indexList, static_cast<size_t> (indexListSize),
840 comm.getRawPtr ());
841 // Not quite sure if I trust all code to behave correctly if the
842 // pointer is nonnull but the array length is nonzero, so I'll
843 // make sure the raw pointer is null if the length is zero.
844 const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
845 Kokkos::View<const GlobalOrdinal*,
846 Kokkos::LayoutLeft,
847 Kokkos::HostSpace,
848 Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
849 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
850 indexBase, comm);
851 if (verbose) {
852 std::ostringstream os;
853 os << *prefix << "Done" << endl;
854 std::cerr << os.str();
855 }
856 }
857
858 template <class LocalOrdinal, class GlobalOrdinal, class Node>
860 Map (const global_size_t numGlobalElements,
861 const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
862 const GlobalOrdinal indexBase,
863 const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
864 comm_ (comm),
865 uniform_ (false),
866 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
867 {
868 using std::endl;
869 const char funcName[] =
870 "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
871
872 const bool verbose = Details::Behavior::verbose("Map");
873 std::unique_ptr<std::string> prefix;
874 if (verbose) {
875 prefix = Details::createPrefix(
876 comm_.getRawPtr(), "Map", funcName);
877 std::ostringstream os;
878 os << *prefix << "Start" << endl;
879 std::cerr << os.str();
880 }
882 const size_t numLclInds = static_cast<size_t> (entryList.size ());
883 checkMapInputArray ("(GST, ArrayView, GO, comm)",
884 entryList.getRawPtr (), numLclInds,
885 comm.getRawPtr ());
886 // Not quite sure if I trust both ArrayView and View to behave
887 // correctly if the pointer is nonnull but the array length is
888 // nonzero, so I'll make sure it's null if the length is zero.
889 const GlobalOrdinal* const indsRaw =
890 numLclInds == 0 ? NULL : entryList.getRawPtr ();
891 Kokkos::View<const GlobalOrdinal*,
892 Kokkos::LayoutLeft,
893 Kokkos::HostSpace,
894 Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
895 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
896 indexBase, comm);
897 if (verbose) {
898 std::ostringstream os;
899 os << *prefix << "Done" << endl;
900 std::cerr << os.str();
901 }
902 }
903
904 template <class LocalOrdinal, class GlobalOrdinal, class Node>
906 Map (const global_size_t numGlobalElements,
907 const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
908 const GlobalOrdinal indexBase,
909 const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
910 comm_ (comm),
911 uniform_ (false),
912 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
913 {
914 using Kokkos::LayoutLeft;
915 using Kokkos::subview;
916 using Kokkos::View;
917 using Kokkos::view_alloc;
918 using Kokkos::WithoutInitializing;
919 using Teuchos::arcp;
920 using Teuchos::ArrayView;
921 using Teuchos::as;
922 using Teuchos::broadcast;
923 using Teuchos::outArg;
924 using Teuchos::ptr;
925 using Teuchos::REDUCE_MAX;
926 using Teuchos::REDUCE_MIN;
927 using Teuchos::REDUCE_SUM;
928 using Teuchos::reduceAll;
929 using Teuchos::typeName;
930 using std::endl;
931 using LO = local_ordinal_type;
932 using GO = global_ordinal_type;
933 using GST = global_size_t;
934 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
935 const char funcName[] =
936 "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
937
938 const bool verbose = Details::Behavior::verbose("Map");
939 std::unique_ptr<std::string> prefix;
940 if (verbose) {
941 prefix = Details::createPrefix(
942 comm_.getRawPtr(), "Map", funcName);
943 std::ostringstream os;
944 os << *prefix << "Start" << endl;
945 std::cerr << os.str();
946 }
948 checkMapInputArray ("(GST, Kokkos::View, GO, comm)",
949 entryList.data (),
950 static_cast<size_t> (entryList.extent (0)),
951 comm.getRawPtr ());
952
953 // The user has specified the distribution of indices over the
954 // processes, via the input array of global indices on each
955 // process. The distribution is not necessarily contiguous or
956 // equally shared over the processes.
957
958 // The length of the input array on this process is the number of
959 // local indices to associate with this process, even though the
960 // input array contains global indices. We assume that the number
961 // of local indices on a process can be stored in a size_t;
962 // numLocalElements_ is a size_t, so this variable and that should
963 // have the same type.
964 const size_t numLocalElements(entryList.size());
965
966 initialNonuniformDebugCheck(funcName, numGlobalElements,
967 numLocalElements, indexBase, comm);
968
969 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
970 // reduction is redundant, since the directory Map will have to do
971 // the same thing. Thus, we could do the scan and broadcast for
972 // the directory Map here, and give the computed offsets to the
973 // directory Map's constructor. However, a reduction costs less
974 // than a scan and broadcast, so this still saves time if users of
975 // this Map don't ever need the Directory (i.e., if they never
976 // call getRemoteIndexList on this Map).
977 if (numGlobalElements != GSTI) {
978 numGlobalElements_ = numGlobalElements; // Use the user's value.
979 }
980 else { // The user wants us to compute the sum.
981 reduceAll(*comm, REDUCE_SUM,
982 static_cast<GST>(numLocalElements),
983 outArg(numGlobalElements_));
984 }
985
986 // mfh 20 Feb 2013: We've never quite done the right thing for
987 // duplicate GIDs here. Duplicate GIDs have always been counted
988 // distinctly in numLocalElements_, and thus should get a
989 // different LID. However, we've always used std::map or a hash
990 // table for the GID -> LID lookup table, so distinct GIDs always
991 // map to the same LID. Furthermore, the order of the input GID
992 // list matters, so it's not desirable to sort for determining
993 // uniqueness.
994 //
995 // I've chosen for now to write this code as if the input GID list
996 // contains no duplicates. If this is not desired, we could use
997 // the lookup table itself to determine uniqueness: If we haven't
998 // seen the GID before, it gets a new LID and it's added to the
999 // LID -> GID and GID -> LID tables. If we have seen the GID
1000 // before, it doesn't get added to either table. I would
1001 // implement this, but it would cost more to do the double lookups
1002 // in the table (one to check, and one to insert).
1003 //
1004 // More importantly, since we build the GID -> LID table in (a
1005 // thread-) parallel (way), the order in which duplicate GIDs may
1006 // get inserted is not defined. This would make the assignment of
1007 // LID to GID nondeterministic.
1008
1009 numLocalElements_ = numLocalElements;
1010 indexBase_ = indexBase;
1011
1012 minMyGID_ = indexBase_;
1013 maxMyGID_ = indexBase_;
1014
1015 // NOTE (mfh 27 May 2015): While finding the initial contiguous
1016 // GID range requires looking at all the GIDs in the range,
1017 // dismissing an interval of GIDs only requires looking at the
1018 // first and last GIDs. Thus, we could do binary search backwards
1019 // from the end in order to catch the common case of a contiguous
1020 // interval followed by noncontiguous entries. On the other hand,
1021 // we could just expose this case explicitly as yet another Map
1022 // constructor, and avoid the trouble of detecting it.
1023 if (numLocalElements_ > 0) {
1024 // Find contiguous GID range, with the restriction that the
1025 // beginning of the range starts with the first entry. While
1026 // doing so, fill in the LID -> GID table.
1027 typename decltype (lgMap_)::non_const_type lgMap
1028 (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
1029 auto lgMap_host =
1030 Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1031
1032 using array_layout =
1033 typename View<const GO*, device_type>::array_layout;
1034 View<GO*, array_layout, Kokkos::HostSpace> entryList_host
1035 (view_alloc ("entryList_host", WithoutInitializing),
1036 entryList.extent(0));
1037 // DEEP_COPY REVIEW - DEVICE-TO-HOST
1038 Kokkos::deep_copy (execution_space(), entryList_host, entryList);
1039 Kokkos::fence(); // UVM follows
1040 firstContiguousGID_ = entryList_host[0];
1041 lastContiguousGID_ = firstContiguousGID_+1;
1042
1043 // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
1044 // anyway, so we have to look at them all. The logical way to
1045 // find the first noncontiguous entry would thus be to "reduce,"
1046 // where the local reduction result is whether entryList[i] + 1
1047 // == entryList[i+1].
1048
1049 lgMap_host[0] = firstContiguousGID_;
1050 size_t i = 1;
1051 for ( ; i < numLocalElements_; ++i) {
1052 const GO curGid = entryList_host[i];
1053 const LO curLid = as<LO> (i);
1054
1055 if (lastContiguousGID_ != curGid) break;
1056
1057 // Add the entry to the LID->GID table only after we know that
1058 // the current GID is in the initial contiguous sequence, so
1059 // that we don't repeat adding it in the first iteration of
1060 // the loop below over the remaining noncontiguous GIDs.
1061 lgMap_host[curLid] = curGid;
1062 ++lastContiguousGID_;
1063 }
1064 --lastContiguousGID_;
1065
1066 // [firstContiguousGID_, lastContigousGID_] is the initial
1067 // sequence of contiguous GIDs. We can start the min and max
1068 // GID using this range.
1069 minMyGID_ = firstContiguousGID_;
1070 maxMyGID_ = lastContiguousGID_;
1071
1072 // Compute the GID -> LID lookup table, _not_ including the
1073 // initial sequence of contiguous GIDs.
1074 {
1075 const std::pair<size_t, size_t> ncRange (i, entryList.extent (0));
1076 auto nonContigGids = subview (entryList, ncRange);
1077 TEUCHOS_TEST_FOR_EXCEPTION
1078 (static_cast<size_t> (nonContigGids.extent (0)) !=
1079 static_cast<size_t> (entryList.extent (0) - i),
1080 std::logic_error, "Tpetra::Map noncontiguous constructor: "
1081 "nonContigGids.extent(0) = "
1082 << nonContigGids.extent (0)
1083 << " != entryList.extent(0) - i = "
1084 << (entryList.extent (0) - i) << " = "
1085 << entryList.extent (0) << " - " << i
1086 << ". Please report this bug to the Tpetra developers.");
1087
1088 glMap_ = global_to_local_table_type(nonContigGids,
1089 firstContiguousGID_,
1090 lastContiguousGID_,
1091 static_cast<LO> (i));
1092 // Make host version - when memory spaces match these just do trivial assignment
1093 glMapHost_ = global_to_local_table_host_type(glMap_);
1094 }
1095
1096 // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
1097 // table above, we have to look at all the (noncontiguous) input
1098 // indices anyway. Thus, why not have the constructor compute
1099 // and return the min and max?
1100
1101 for ( ; i < numLocalElements_; ++i) {
1102 const GO curGid = entryList_host[i];
1103 const LO curLid = static_cast<LO> (i);
1104 lgMap_host[curLid] = curGid; // LID -> GID table
1105
1106 // While iterating through entryList, we compute its
1107 // (process-local) min and max elements.
1108 if (curGid < minMyGID_) {
1109 minMyGID_ = curGid;
1110 }
1111 if (curGid > maxMyGID_) {
1112 maxMyGID_ = curGid;
1113 }
1114 }
1115
1116 // We filled lgMap on host above; now sync back to device.
1117 // DEEP_COPY REVIEW - HOST-TO-DEVICE
1118 Kokkos::deep_copy (execution_space(), lgMap, lgMap_host);
1119
1120 // "Commit" the local-to-global lookup table we filled in above.
1121 lgMap_ = lgMap;
1122 // We've already created this, so use it.
1123 lgMapHost_ = lgMap_host;
1124 }
1125 else {
1126 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1127 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1128 // This insures tests for GIDs in the range
1129 // [firstContiguousGID_, lastContiguousGID_] fail for processes
1130 // with no local elements.
1131 firstContiguousGID_ = indexBase_+1;
1132 lastContiguousGID_ = indexBase_;
1133 // glMap_ was default constructed, so it's already empty.
1134 }
1135
1136 // Compute the min and max of all processes' GIDs. If
1137 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1138 // are both indexBase_. This is wrong, but fixing it would
1139 // require either a fancy sparse all-reduce, or a custom reduction
1140 // operator that ignores invalid values ("invalid" means
1141 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1142 //
1143 // Also, while we're at it, use the same all-reduce to figure out
1144 // if the Map is distributed. "Distributed" means that there is
1145 // at least one process with a number of local elements less than
1146 // the number of global elements.
1147 //
1148 // We're computing the min and max of all processes' GIDs using a
1149 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1150 // and y are signed). (This lets us combine the min and max into
1151 // a single all-reduce.) If each process sets localDist=1 if its
1152 // number of local elements is strictly less than the number of
1153 // global elements, and localDist=0 otherwise, then a MAX
1154 // all-reduce on localDist tells us if the Map is distributed (1
1155 // if yes, 0 if no). Thus, we can append localDist onto the end
1156 // of the data and get the global result from the all-reduce.
1157 if (std::numeric_limits<GO>::is_signed) {
1158 // Does my process NOT own all the elements?
1159 const GO localDist =
1160 (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
1161
1162 GO minMaxInput[3];
1163 minMaxInput[0] = -minMyGID_;
1164 minMaxInput[1] = maxMyGID_;
1165 minMaxInput[2] = localDist;
1166
1167 GO minMaxOutput[3];
1168 minMaxOutput[0] = 0;
1169 minMaxOutput[1] = 0;
1170 minMaxOutput[2] = 0;
1171 reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1172 minAllGID_ = -minMaxOutput[0];
1173 maxAllGID_ = minMaxOutput[1];
1174 const GO globalDist = minMaxOutput[2];
1175 distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1176 }
1177 else { // unsigned; use two reductions
1178 // This is always correct, no matter the signedness of GO.
1179 reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1180 reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1181 distributed_ = checkIsDist ();
1182 }
1183
1184 contiguous_ = false; // "Contiguous" is conservative.
1185
1186 TEUCHOS_TEST_FOR_EXCEPTION(
1187 minAllGID_ < indexBase_,
1188 std::invalid_argument,
1189 "Tpetra::Map constructor (noncontiguous): "
1190 "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1191 "less than the given indexBase = " << indexBase_ << ".");
1192
1193 // Create the Directory on demand in getRemoteIndexList().
1194 //setupDirectory ();
1195
1196 if (verbose) {
1197 std::ostringstream os;
1198 os << *prefix << "Done" << endl;
1199 std::cerr << os.str();
1200 }
1201 }
1202
1203
1204 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1206 {
1207 if (! Kokkos::is_initialized ()) {
1208 std::ostringstream os;
1209 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1210 "Kokkos::finalize() has been called. This is user error! There are "
1211 "two likely causes: " << std::endl <<
1212 " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1213 << std::endl <<
1214 " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1215 "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1216 "or Tpetra::finalize()." << std::endl << std::endl <<
1217 "Don't do either of these! Please refer to GitHib Issue #2372."
1218 << std::endl;
1219 ::Tpetra::Details::printOnce (std::cerr, os.str (),
1220 this->getComm ().getRawPtr ());
1221 }
1222 else {
1226
1227 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1228 if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1229 mpiIsInitialized () && mpiIsFinalized ()) {
1230 // Tpetra itself does not require MPI, even if building with
1231 // MPI. It is legal to create Tpetra objects that do not use
1232 // MPI, even in an MPI program. However, calling Tpetra stuff
1233 // after MPI_Finalize() has been called is a bad idea, since
1234 // some Tpetra defaults may use MPI if available.
1235 std::ostringstream os;
1236 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1237 "MPI_Finalize() has been called. This is user error! There are "
1238 "two likely causes: " << std::endl <<
1239 " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1240 << std::endl <<
1241 " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1242 "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1243 "Tpetra::finalize()." << std::endl << std::endl <<
1244 "Don't do either of these! Please refer to GitHib Issue #2372."
1245 << std::endl;
1246 ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1247 }
1248 }
1249 // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1250 // because Tpetra does not yet require Tpetra::initialize /
1251 // Tpetra::finalize.
1252 }
1253
1254 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1255 bool
1257 {
1258 TEUCHOS_TEST_FOR_EXCEPTION(
1259 getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1260 "getComm() returns null. Please report this bug to the Tpetra "
1261 "developers.");
1262
1263 // This is a collective operation, if it hasn't been called before.
1264 setupDirectory ();
1265 return directory_->isOneToOne (*this);
1266 }
1267
1268 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1269 LocalOrdinal
1271 getLocalElement (GlobalOrdinal globalIndex) const
1272 {
1273 if (isContiguous ()) {
1274 if (globalIndex < getMinGlobalIndex () ||
1275 globalIndex > getMaxGlobalIndex ()) {
1276 return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1277 }
1278 return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1279 }
1280 else if (globalIndex >= firstContiguousGID_ &&
1281 globalIndex <= lastContiguousGID_) {
1282 return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1283 }
1284 else {
1285 // If the given global index is not in the table, this returns
1286 // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1287 // glMapHost_ is Host and does not assume UVM
1288 return glMapHost_.get (globalIndex);
1289 }
1290 }
1291
1292 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1293 GlobalOrdinal
1295 getGlobalElement (LocalOrdinal localIndex) const
1296 {
1297 if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1298 return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1299 }
1300 if (isContiguous ()) {
1301 return getMinGlobalIndex () + localIndex;
1302 }
1303 else {
1304 // This is a host Kokkos::View access, with no RCP or ArrayRCP
1305 // involvement. As a result, it is thread safe.
1306 //
1307 // lgMapHost_ is a host pointer; this does NOT assume UVM.
1308 return lgMapHost_[localIndex];
1309 }
1310 }
1311
1312 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1313 bool
1315 isNodeLocalElement (LocalOrdinal localIndex) const
1316 {
1317 if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1318 return false;
1319 } else {
1320 return true;
1321 }
1322 }
1323
1324 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1325 bool
1327 isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1328 return this->getLocalElement (globalIndex) !=
1329 Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1330 }
1331
1332 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1334 return uniform_;
1335 }
1336
1337 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1339 return contiguous_;
1340 }
1341
1342
1343 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1346 getLocalMap () const
1347 {
1348 return local_map_type (glMap_, lgMap_, getIndexBase (),
1350 firstContiguousGID_, lastContiguousGID_,
1352 }
1353
1354 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1355 bool
1358 {
1359 using Teuchos::outArg;
1360 using Teuchos::REDUCE_MIN;
1361 using Teuchos::reduceAll;
1362 //
1363 // Tests that avoid the Boolean all-reduce below by using
1364 // globally consistent quantities.
1365 //
1366 if (this == &map) {
1367 // Pointer equality on one process always implies pointer
1368 // equality on all processes, since Map is immutable.
1369 return true;
1370 }
1371 else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1372 // The two communicators have different numbers of processes.
1373 // It's not correct to call isCompatible() in that case. This
1374 // may result in the all-reduce hanging below.
1375 return false;
1376 }
1377 else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1378 // Two Maps are definitely NOT compatible if they have different
1379 // global numbers of indices.
1380 return false;
1381 }
1382 else if (isContiguous () && isUniform () &&
1383 map.isContiguous () && map.isUniform ()) {
1384 // Contiguous uniform Maps with the same number of processes in
1385 // their communicators, and with the same global numbers of
1386 // indices, are always compatible.
1387 return true;
1388 }
1389 else if (! isContiguous () && ! map.isContiguous () &&
1390 lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1391 lgMap_.data () == map.lgMap_.data ()) {
1392 // Noncontiguous Maps whose global index lists are nonempty and
1393 // have the same pointer must be the same (and therefore
1394 // contiguous).
1395 //
1396 // Nonempty is important. For example, consider a communicator
1397 // with two processes, and two Maps that share this
1398 // communicator, with zero global indices on the first process,
1399 // and different nonzero numbers of global indices on the second
1400 // process. In that case, on the first process, the pointers
1401 // would both be NULL.
1402 return true;
1403 }
1404
1405 TEUCHOS_TEST_FOR_EXCEPTION(
1406 getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1407 "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1408 "checked that this condition is true above, but it's false here. "
1409 "Please report this bug to the Tpetra developers.");
1410
1411 // Do both Maps have the same number of indices on each process?
1412 const int locallyCompat =
1413 (getLocalNumElements () == map.getLocalNumElements ()) ? 1 : 0;
1414
1415 int globallyCompat = 0;
1416 reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1417 return (globallyCompat == 1);
1418 }
1419
1420 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1421 bool
1424 {
1425 using Teuchos::ArrayView;
1426 using GO = global_ordinal_type;
1427 using size_type = typename ArrayView<const GO>::size_type;
1428
1429 // If both Maps are contiguous, we can compare their GID ranges
1430 // easily by looking at the min and max GID on this process.
1431 // Otherwise, we'll compare their GID lists. If only one Map is
1432 // contiguous, then we only have to call getLocalElementList() on
1433 // the noncontiguous Map. (It's best to avoid calling it on a
1434 // contiguous Map, since it results in unnecessary storage that
1435 // persists for the lifetime of the Map.)
1436
1437 if (this == &map) {
1438 // Pointer equality on one process always implies pointer
1439 // equality on all processes, since Map is immutable.
1440 return true;
1441 }
1442 else if (getLocalNumElements () != map.getLocalNumElements ()) {
1443 return false;
1444 }
1445 else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1446 getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1447 return false;
1448 }
1449 else {
1450 if (isContiguous ()) {
1451 if (map.isContiguous ()) {
1452 return true; // min and max match, so the ranges match.
1453 }
1454 else { // *this is contiguous, but map is not contiguous
1455 TEUCHOS_TEST_FOR_EXCEPTION(
1456 ! this->isContiguous () || map.isContiguous (), std::logic_error,
1457 "Tpetra::Map::locallySameAs: BUG");
1458 ArrayView<const GO> rhsElts = map.getLocalElementList ();
1459 const GO minLhsGid = this->getMinGlobalIndex ();
1460 const size_type numRhsElts = rhsElts.size ();
1461 for (size_type k = 0; k < numRhsElts; ++k) {
1462 const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1463 if (curLhsGid != rhsElts[k]) {
1464 return false; // stop on first mismatch
1465 }
1466 }
1467 return true;
1468 }
1469 }
1470 else if (map.isContiguous ()) { // *this is not contiguous, but map is
1471 TEUCHOS_TEST_FOR_EXCEPTION(
1472 this->isContiguous () || ! map.isContiguous (), std::logic_error,
1473 "Tpetra::Map::locallySameAs: BUG");
1474 ArrayView<const GO> lhsElts = this->getLocalElementList ();
1475 const GO minRhsGid = map.getMinGlobalIndex ();
1476 const size_type numLhsElts = lhsElts.size ();
1477 for (size_type k = 0; k < numLhsElts; ++k) {
1478 const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1479 if (curRhsGid != lhsElts[k]) {
1480 return false; // stop on first mismatch
1481 }
1482 }
1483 return true;
1484 }
1485 else if (this->lgMap_.data () == map.lgMap_.data ()) {
1486 // Pointers to LID->GID "map" (actually just an array) are the
1487 // same, and the number of GIDs are the same.
1488 return this->getLocalNumElements () == map.getLocalNumElements ();
1489 }
1490 else { // we actually have to compare the GIDs
1491 if (this->getLocalNumElements () != map.getLocalNumElements ()) {
1492 return false; // We already checked above, but check just in case
1493 }
1494 else {
1495 ArrayView<const GO> lhsElts = getLocalElementList ();
1496 ArrayView<const GO> rhsElts = map.getLocalElementList ();
1497
1498 // std::equal requires that the latter range is as large as
1499 // the former. We know the ranges have equal length, because
1500 // they have the same number of local entries.
1501 return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1502 }
1503 }
1504 }
1505 }
1506
1507 template <class LocalOrdinal,class GlobalOrdinal, class Node>
1508 bool
1511 {
1512 if (this == &map)
1513 return true;
1514
1515 // We are going to check if lmap1 is fitted into lmap2:
1516 // Is lmap1 (map) a subset of lmap2 (this)?
1517 // And do the first lmap1.getLocalNumElements() global elements
1518 // of lmap1,lmap2 owned on each process exactly match?
1519 auto lmap1 = map.getLocalMap();
1520 auto lmap2 = this->getLocalMap();
1521
1522 auto numLocalElements1 = lmap1.getLocalNumElements();
1523 auto numLocalElements2 = lmap2.getLocalNumElements();
1524
1525 if (numLocalElements1 > numLocalElements2) {
1526 // There are more indices in the first map on this process than in second map.
1527 return false;
1528 }
1529
1530 if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1531 // When both Maps are contiguous, just check the interval inclusion.
1532 return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1533 (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1534 }
1535
1536 if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1537 lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1538 // The second map does not include the first map bounds, and thus some of
1539 // the first map global indices are not in the second map.
1540 return false;
1541 }
1542
1543 using LO = local_ordinal_type;
1544 using range_type =
1545 Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1546
1547 // Check all elements.
1548 LO numDiff = 0;
1549 Kokkos::parallel_reduce(
1550 "isLocallyFitted",
1551 range_type(0, numLocalElements1),
1552 KOKKOS_LAMBDA (const LO i, LO& diff) {
1553 diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1554 }, numDiff);
1555
1556 return (numDiff == 0);
1557 }
1558
1559 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1560 bool
1563 {
1564 using Teuchos::outArg;
1565 using Teuchos::REDUCE_MIN;
1566 using Teuchos::reduceAll;
1567 //
1568 // Tests that avoid the Boolean all-reduce below by using
1569 // globally consistent quantities.
1570 //
1571 if (this == &map) {
1572 // Pointer equality on one process always implies pointer
1573 // equality on all processes, since Map is immutable.
1574 return true;
1575 }
1576 else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1577 // The two communicators have different numbers of processes.
1578 // It's not correct to call isSameAs() in that case. This
1579 // may result in the all-reduce hanging below.
1580 return false;
1581 }
1582 else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1583 // Two Maps are definitely NOT the same if they have different
1584 // global numbers of indices.
1585 return false;
1586 }
1587 else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1589 getIndexBase () != map.getIndexBase ()) {
1590 // If the global min or max global index doesn't match, or if
1591 // the index base doesn't match, then the Maps aren't the same.
1592 return false;
1593 }
1594 else if (isDistributed () != map.isDistributed ()) {
1595 // One Map is distributed and the other is not, which means that
1596 // the Maps aren't the same.
1597 return false;
1598 }
1599 else if (isContiguous () && isUniform () &&
1600 map.isContiguous () && map.isUniform ()) {
1601 // Contiguous uniform Maps with the same number of processes in
1602 // their communicators, with the same global numbers of indices,
1603 // and with matching index bases and ranges, must be the same.
1604 return true;
1605 }
1606
1607 // The two communicators must have the same number of processes,
1608 // with process ranks occurring in the same order. This uses
1609 // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1610 // "Operations that access communicators are local and their
1611 // execution does not require interprocess communication."
1612 // However, just to be sure, I'll put this call after the above
1613 // tests that don't communicate.
1614 if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1615 return false;
1616 }
1617
1618 // If we get this far, we need to check local properties and then
1619 // communicate local sameness across all processes.
1620 const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1621
1622 // Return true if and only if all processes report local sameness.
1623 int isSame_gbl = 0;
1624 reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1625 return isSame_gbl == 1;
1626 }
1627
1628 namespace { // (anonymous)
1629 template <class LO, class GO, class DT>
1630 class FillLgMap {
1631 public:
1632 FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1633 const GO startGid) :
1634 lgMap_ (lgMap), startGid_ (startGid)
1635 {
1636 Kokkos::RangePolicy<LO, typename DT::execution_space>
1637 range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1638 Kokkos::parallel_for (range, *this);
1639 }
1640
1641 KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1642 lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1643 }
1644
1645 private:
1646 const Kokkos::View<GO*, DT> lgMap_;
1647 const GO startGid_;
1648 };
1649
1650 } // namespace (anonymous)
1651
1652
1653 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1654 typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1656 {
1657 using std::endl;
1658 using LO = local_ordinal_type;
1659 using GO = global_ordinal_type;
1660 using const_lg_view_type = decltype(lgMap_);
1661 using lg_view_type = typename const_lg_view_type::non_const_type;
1662 const bool debug = Details::Behavior::debug("Map");
1663 const bool verbose = Details::Behavior::verbose("Map");
1664
1665 std::unique_ptr<std::string> prefix;
1666 if (verbose) {
1667 prefix = Details::createPrefix(
1668 comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1669 std::ostringstream os;
1670 os << *prefix << "Start" << endl;
1671 std::cerr << os.str();
1672 }
1673
1674 // If the local-to-global mapping doesn't exist yet, and if we
1675 // have local entries, then create and fill the local-to-global
1676 // mapping.
1677 const bool needToCreateLocalToGlobalMapping =
1678 lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1679
1680 if (needToCreateLocalToGlobalMapping) {
1681 if (verbose) {
1682 std::ostringstream os;
1683 os << *prefix << "Need to create lgMap" << endl;
1684 std::cerr << os.str();
1685 }
1686 if (debug) {
1687 // The local-to-global mapping should have been set up already
1688 // for a noncontiguous map.
1689 TEUCHOS_TEST_FOR_EXCEPTION
1690 (! isContiguous(), std::logic_error,
1691 "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1692 "mapping (lgMap_) should have been set up already for a "
1693 "noncontiguous Map. Please report this bug to the Tpetra "
1694 "developers.");
1695 }
1696 const LO numElts = static_cast<LO> (getLocalNumElements ());
1697
1698 using Kokkos::view_alloc;
1699 using Kokkos::WithoutInitializing;
1700 lg_view_type lgMap ("lgMap", numElts);
1701 if (verbose) {
1702 std::ostringstream os;
1703 os << *prefix << "Fill lgMap" << endl;
1704 std::cerr << os.str();
1705 }
1706 FillLgMap<LO, GO, no_uvm_device_type> fillIt (lgMap, minMyGID_);
1707
1708 if (verbose) {
1709 std::ostringstream os;
1710 os << *prefix << "Copy lgMap to lgMapHost" << endl;
1711 std::cerr << os.str();
1712 }
1713
1714 auto lgMapHost =
1715 Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1716 // DEEP_COPY REVIEW - DEVICE-TO-HOST
1717 auto exec_instance = execution_space();
1718 Kokkos::deep_copy (exec_instance, lgMapHost, lgMap);
1719
1720 // There's a non-trivial chance we'll grab this on the host,
1721 // so let's make sure the copy finishes
1722 exec_instance.fence();
1723
1724 // "Commit" the local-to-global lookup table we filled in above.
1725 lgMap_ = lgMap;
1726 lgMapHost_ = lgMapHost;
1727 }
1728
1729 if (verbose) {
1730 std::ostringstream os;
1731 os << *prefix << "Done" << endl;
1732 std::cerr << os.str();
1733 }
1734 return lgMapHost_;
1735 }
1736
1737 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1738 Teuchos::ArrayView<const GlobalOrdinal>
1740 {
1741 using GO = global_ordinal_type;
1742
1743 // If the local-to-global mapping doesn't exist yet, and if we
1744 // have local entries, then create and fill the local-to-global
1745 // mapping.
1746 (void) this->getMyGlobalIndices ();
1747
1748 // This does NOT assume UVM; lgMapHost_ is a host pointer.
1749 const GO* lgMapHostRawPtr = lgMapHost_.data ();
1750 // The third argument forces ArrayView not to try to track memory
1751 // in a debug build. We have to use it because the memory does
1752 // not belong to a Teuchos memory management class.
1753 return Teuchos::ArrayView<const GO>(
1754 lgMapHostRawPtr,
1755 lgMapHost_.extent (0),
1756 Teuchos::RCP_DISABLE_NODE_LOOKUP);
1757 }
1758
1759 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1761 return distributed_;
1762 }
1763
1764 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1766 using Teuchos::TypeNameTraits;
1767 std::ostringstream os;
1768
1769 os << "Tpetra::Map: {"
1770 << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1771 << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1772 << ", NodeType: " << TypeNameTraits<Node>::name ();
1773 if (this->getObjectLabel () != "") {
1774 os << ", Label: \"" << this->getObjectLabel () << "\"";
1775 }
1776 os << ", Global number of entries: " << getGlobalNumElements ()
1777 << ", Number of processes: " << getComm ()->getSize ()
1778 << ", Uniform: " << (isUniform () ? "true" : "false")
1779 << ", Contiguous: " << (isContiguous () ? "true" : "false")
1780 << ", Distributed: " << (isDistributed () ? "true" : "false")
1781 << "}";
1782 return os.str ();
1783 }
1784
1789 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1790 std::string
1792 localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1793 {
1794 using LO = local_ordinal_type;
1795 using std::endl;
1796
1797 // This preserves current behavior of Map.
1798 if (vl < Teuchos::VERB_HIGH) {
1799 return std::string ();
1800 }
1801 auto outStringP = Teuchos::rcp (new std::ostringstream ());
1802 Teuchos::RCP<Teuchos::FancyOStream> outp =
1803 Teuchos::getFancyOStream (outStringP);
1804 Teuchos::FancyOStream& out = *outp;
1805
1806 auto comm = this->getComm ();
1807 const int myRank = comm->getRank ();
1808 const int numProcs = comm->getSize ();
1809 out << "Process " << myRank << " of " << numProcs << ":" << endl;
1810 Teuchos::OSTab tab1 (out);
1811
1812 const LO numEnt = static_cast<LO> (this->getLocalNumElements ());
1813 out << "My number of entries: " << numEnt << endl
1814 << "My minimum global index: " << this->getMinGlobalIndex () << endl
1815 << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1816
1817 if (vl == Teuchos::VERB_EXTREME) {
1818 out << "My global indices: [";
1819 const LO minLclInd = this->getMinLocalIndex ();
1820 for (LO k = 0; k < numEnt; ++k) {
1821 out << minLclInd + this->getGlobalElement (k);
1822 if (k + 1 < numEnt) {
1823 out << ", ";
1824 }
1825 }
1826 out << "]" << endl;
1827 }
1828
1829 out.flush (); // make sure the ostringstream got everything
1830 return outStringP->str ();
1831 }
1832
1833 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1834 void
1836 describe (Teuchos::FancyOStream &out,
1837 const Teuchos::EVerbosityLevel verbLevel) const
1838 {
1839 using Teuchos::TypeNameTraits;
1840 using Teuchos::VERB_DEFAULT;
1841 using Teuchos::VERB_NONE;
1842 using Teuchos::VERB_LOW;
1843 using Teuchos::VERB_HIGH;
1844 using std::endl;
1845 using LO = local_ordinal_type;
1846 using GO = global_ordinal_type;
1847 const Teuchos::EVerbosityLevel vl =
1848 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1849
1850 if (vl == VERB_NONE) {
1851 return; // don't print anything
1852 }
1853 // If this Map's Comm is null, then the Map does not participate
1854 // in collective operations with the other processes. In that
1855 // case, it is not even legal to call this method. The reasonable
1856 // thing to do in that case is nothing.
1857 auto comm = this->getComm ();
1858 if (comm.is_null ()) {
1859 return;
1860 }
1861 const int myRank = comm->getRank ();
1862 const int numProcs = comm->getSize ();
1863
1864 // Only Process 0 should touch the output stream, but this method
1865 // in general may need to do communication. Thus, we may need to
1866 // preserve the current tab level across multiple "if (myRank ==
1867 // 0) { ... }" inner scopes. This is why we sometimes create
1868 // OSTab instances by pointer, instead of by value. We only need
1869 // to create them by pointer if the tab level must persist through
1870 // multiple inner scopes.
1871 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1872
1873 if (myRank == 0) {
1874 // At every verbosity level but VERB_NONE, Process 0 prints.
1875 // By convention, describe() always begins with a tab before
1876 // printing.
1877 tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1878 out << "\"Tpetra::Map\":" << endl;
1879 tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1880 {
1881 out << "Template parameters:" << endl;
1882 Teuchos::OSTab tab2 (out);
1883 out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1884 << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1885 << "Node: " << TypeNameTraits<Node>::name () << endl;
1886 }
1887 const std::string label = this->getObjectLabel ();
1888 if (label != "") {
1889 out << "Label: \"" << label << "\"" << endl;
1890 }
1891 out << "Global number of entries: " << getGlobalNumElements () << endl
1892 << "Minimum global index: " << getMinAllGlobalIndex () << endl
1893 << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1894 << "Index base: " << getIndexBase () << endl
1895 << "Number of processes: " << numProcs << endl
1896 << "Uniform: " << (isUniform () ? "true" : "false") << endl
1897 << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1898 << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1899 }
1900
1901 // This is collective over the Map's communicator.
1902 if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1903 const std::string lclStr = this->localDescribeToString (vl);
1904 Tpetra::Details::gathervPrint (out, lclStr, *comm);
1905 }
1906 }
1907
1908 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1909 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1911 replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1912 {
1913 using Teuchos::RCP;
1914 using Teuchos::rcp;
1915 using GST = global_size_t;
1916 using LO = local_ordinal_type;
1917 using GO = global_ordinal_type;
1919
1920 // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1921 // the Map by calling its ordinary public constructor, using the
1922 // original Map's data. This only involves O(1) all-reduces over
1923 // the new communicator, which in the common case only includes a
1924 // small number of processes.
1925
1926 // Create the Map to return.
1927 if (newComm.is_null () || newComm->getSize () < 1) {
1928 return Teuchos::null; // my process does not participate in the new Map
1929 }
1930 else if (newComm->getSize () == 1) {
1931 // The case where the new communicator has only one process is
1932 // easy. We don't have to communicate to get all the
1933 // information we need. Use the default comm to create the new
1934 // Map, then fill in all the fields directly.
1935 RCP<map_type> newMap (new map_type ());
1936
1937 newMap->comm_ = newComm;
1938 // mfh 07 Oct 2016: Preserve original behavior, even though the
1939 // original index base may no longer be the globally min global
1940 // index. See #616 for why this doesn't matter so much anymore.
1941 newMap->indexBase_ = this->indexBase_;
1942 newMap->numGlobalElements_ = this->numLocalElements_;
1943 newMap->numLocalElements_ = this->numLocalElements_;
1944 newMap->minMyGID_ = this->minMyGID_;
1945 newMap->maxMyGID_ = this->maxMyGID_;
1946 newMap->minAllGID_ = this->minMyGID_;
1947 newMap->maxAllGID_ = this->maxMyGID_;
1948 newMap->firstContiguousGID_ = this->firstContiguousGID_;
1949 newMap->lastContiguousGID_ = this->lastContiguousGID_;
1950 // Since the new communicator has only one process, neither
1951 // uniformity nor contiguity have changed.
1952 newMap->uniform_ = this->uniform_;
1953 newMap->contiguous_ = this->contiguous_;
1954 // The new communicator only has one process, so the new Map is
1955 // not distributed.
1956 newMap->distributed_ = false;
1957 newMap->lgMap_ = this->lgMap_;
1958 newMap->lgMapHost_ = this->lgMapHost_;
1959 newMap->glMap_ = this->glMap_;
1960 newMap->glMapHost_ = this->glMapHost_;
1961 // It's OK not to initialize the new Map's Directory.
1962 // This is initialized lazily, on first call to getRemoteIndexList.
1963
1964 return newMap;
1965 }
1966 else { // newComm->getSize() != 1
1967 // Even if the original Map is contiguous, the new Map might not
1968 // be, especially if the excluded processes have ranks != 0 or
1969 // newComm->getSize()-1. The common case for this method is to
1970 // exclude many (possibly even all but one) processes, so it
1971 // likely doesn't pay to do the global communication (over the
1972 // original communicator) to figure out whether we can optimize
1973 // the result Map. Thus, we just set up the result Map as
1974 // noncontiguous.
1975 //
1976 // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1977 // the global-to-local table, etc. Optimize this code path to
1978 // avoid unnecessary local work.
1979
1980 // Make Map (re)compute the global number of elements.
1981 const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
1982 // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1983 // to use the noncontiguous Map constructor, since the new Map
1984 // might not be contiguous. Even if the old Map was contiguous,
1985 // some process in the "middle" might have been excluded. If we
1986 // want to avoid local work, we either have to do the setup by
1987 // hand, or write a new Map constructor.
1988#if 1
1989 // The disabled code here throws the following exception in
1990 // Map's replaceCommWithSubset test:
1991 //
1992 // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ())
1993 // 10:
1994 // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
1995 // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
1996
1997 auto lgMap = this->getMyGlobalIndices ();
1998 using size_type =
1999 typename std::decay<decltype (lgMap.extent (0)) >::type;
2000 const size_type lclNumInds =
2001 static_cast<size_type> (this->getLocalNumElements ());
2002 using Teuchos::TypeNameTraits;
2003 TEUCHOS_TEST_FOR_EXCEPTION
2004 (lgMap.extent (0) != lclNumInds, std::logic_error,
2005 "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
2006 "has length " << lgMap.extent (0) << " (of type " <<
2007 TypeNameTraits<size_type>::name () << ") != this->getLocalNumElements()"
2008 " = " << this->getLocalNumElements () << ". The latter, upon being "
2009 "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
2010 "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
2011 "developers.");
2012#else
2013 Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList ();
2014#endif // 1
2015
2016 const GO indexBase = this->getIndexBase ();
2017 // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
2018 auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
2019 return rcp (new map_type (RECOMPUTE, lgMap_device, indexBase, newComm));
2020 }
2021 }
2022
2023 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2024 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
2026 removeEmptyProcesses () const
2027 {
2028 using Teuchos::Comm;
2029 using Teuchos::null;
2030 using Teuchos::outArg;
2031 using Teuchos::RCP;
2032 using Teuchos::rcp;
2033 using Teuchos::REDUCE_MIN;
2034 using Teuchos::reduceAll;
2035
2036 // Create the new communicator. split() returns a valid
2037 // communicator on all processes. On processes where color == 0,
2038 // ignore the result. Passing key == 0 tells MPI to order the
2039 // processes in the new communicator by their rank in the old
2040 // communicator.
2041 const int color = (numLocalElements_ == 0) ? 0 : 1;
2042 // MPI_Comm_split must be called collectively over the original
2043 // communicator. We can't just call it on processes with color
2044 // one, even though we will ignore its result on processes with
2045 // color zero.
2046 RCP<const Comm<int> > newComm = comm_->split (color, 0);
2047 if (color == 0) {
2048 newComm = null;
2049 }
2050
2051 // Create the Map to return.
2052 if (newComm.is_null ()) {
2053 return null; // my process does not participate in the new Map
2054 } else {
2055 RCP<Map> map = rcp (new Map ());
2056
2057 map->comm_ = newComm;
2058 map->indexBase_ = indexBase_;
2059 map->numGlobalElements_ = numGlobalElements_;
2060 map->numLocalElements_ = numLocalElements_;
2061 map->minMyGID_ = minMyGID_;
2062 map->maxMyGID_ = maxMyGID_;
2063 map->minAllGID_ = minAllGID_;
2064 map->maxAllGID_ = maxAllGID_;
2065 map->firstContiguousGID_= firstContiguousGID_;
2066 map->lastContiguousGID_ = lastContiguousGID_;
2067
2068 // Uniformity and contiguity have not changed. The directory
2069 // has changed, but we've taken care of that above.
2070 map->uniform_ = uniform_;
2071 map->contiguous_ = contiguous_;
2072
2073 // If the original Map was NOT distributed, then the new Map
2074 // cannot be distributed.
2075 //
2076 // If the number of processes in the new communicator is 1, then
2077 // the new Map is not distributed.
2078 //
2079 // Otherwise, we have to check the new Map using an all-reduce
2080 // (over the new communicator). For example, the original Map
2081 // may have had some processes with zero elements, and all other
2082 // processes with the same number of elements as in the whole
2083 // Map. That Map is technically distributed, because of the
2084 // processes with zero elements. Removing those processes would
2085 // make the new Map locally replicated.
2086 if (! distributed_ || newComm->getSize () == 1) {
2087 map->distributed_ = false;
2088 } else {
2089 const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2090 int allProcsOwnAllGids = 0;
2091 reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
2092 map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2093 }
2094
2095 map->lgMap_ = lgMap_;
2096 map->lgMapHost_ = lgMapHost_;
2097 map->glMap_ = glMap_;
2098 map->glMapHost_ = glMapHost_;
2099
2100 // Map's default constructor creates an uninitialized Directory.
2101 // The Directory will be initialized on demand in
2102 // getRemoteIndexList().
2103 //
2104 // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2105 // directory more efficiently than just recreating it. If
2106 // directory recreation proves a bottleneck, we can always
2107 // revisit this. On the other hand, Directory creation is only
2108 // collective over the new, presumably much smaller
2109 // communicator, so it may not be worth the effort to optimize.
2110
2111 return map;
2112 }
2113 }
2114
2115 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2116 void
2117 Map<LocalOrdinal,GlobalOrdinal,Node>::setupDirectory () const
2118 {
2119 TEUCHOS_TEST_FOR_EXCEPTION(
2120 directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
2121 "The Directory is null. "
2122 "Please report this bug to the Tpetra developers.");
2123
2124 // Only create the Directory if it hasn't been created yet.
2125 // This is a collective operation.
2126 if (! directory_->initialized ()) {
2127 directory_->initialize (*this);
2128 }
2129 }
2130
2131 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2134 getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2135 const Teuchos::ArrayView<int>& PIDs,
2136 const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
2137 {
2138 using Tpetra::Details::OrdinalTraits;
2140 using std::endl;
2141 using size_type = Teuchos::ArrayView<int>::size_type;
2142
2143 const bool verbose = Details::Behavior::verbose("Map");
2144 const size_t maxNumToPrint = verbose ?
2146 std::unique_ptr<std::string> prefix;
2147 if (verbose) {
2148 prefix = Details::createPrefix(comm_.getRawPtr(),
2149 "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2150 std::ostringstream os;
2151 os << *prefix << "Start: ";
2152 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2153 os << endl;
2154 std::cerr << os.str();
2155 }
2156
2157 // Empty Maps (i.e., containing no indices on any processes in the
2158 // Map's communicator) are perfectly valid. In that case, if the
2159 // input GID list is nonempty, we fill the output arrays with
2160 // invalid values, and return IDNotPresent to notify the caller.
2161 // It's perfectly valid to give getRemoteIndexList GIDs that the
2162 // Map doesn't own. SubmapImport test 2 needs this functionality.
2163 if (getGlobalNumElements () == 0) {
2164 if (GIDs.size () == 0) {
2165 if (verbose) {
2166 std::ostringstream os;
2167 os << *prefix << "Done; both Map & input are empty" << endl;
2168 std::cerr << os.str();
2169 }
2170 return AllIDsPresent; // trivially
2171 }
2172 else {
2173 if (verbose) {
2174 std::ostringstream os;
2175 os << *prefix << "Done: Map is empty on all processes, "
2176 "so all output PIDs & LIDs are invalid (-1)." << endl;
2177 std::cerr << os.str();
2178 }
2179 for (size_type k = 0; k < PIDs.size (); ++k) {
2180 PIDs[k] = OrdinalTraits<int>::invalid ();
2181 }
2182 for (size_type k = 0; k < LIDs.size (); ++k) {
2183 LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
2184 }
2185 return IDNotPresent;
2186 }
2187 }
2188
2189 // getRemoteIndexList must be called collectively, and Directory
2190 // initialization is collective too, so it's OK to initialize the
2191 // Directory on demand.
2192
2193 if (verbose) {
2194 std::ostringstream os;
2195 os << *prefix << "Call setupDirectory" << endl;
2196 std::cerr << os.str();
2197 }
2198 setupDirectory();
2199 if (verbose) {
2200 std::ostringstream os;
2201 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2202 std::cerr << os.str();
2203 }
2204 const Tpetra::LookupStatus retVal =
2205 directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
2206 if (verbose) {
2207 std::ostringstream os;
2208 os << *prefix << "Done; getDirectoryEntries returned "
2209 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2210 << "; ";
2211 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2212 os << ", ";
2213 verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2214 os << endl;
2215 std::cerr << os.str();
2216 }
2217 return retVal;
2218 }
2219
2220 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2223 getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
2224 const Teuchos::ArrayView<int> & PIDs) const
2225 {
2227 using std::endl;
2228
2229 const bool verbose = Details::Behavior::verbose("Map");
2230 const size_t maxNumToPrint = verbose ?
2232 std::unique_ptr<std::string> prefix;
2233 if (verbose) {
2234 prefix = Details::createPrefix(comm_.getRawPtr(),
2235 "Map", "getRemoteIndexList(GIDs,PIDs)");
2236 std::ostringstream os;
2237 os << *prefix << "Start: ";
2238 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2239 os << endl;
2240 std::cerr << os.str();
2241 }
2242
2243 if (getGlobalNumElements () == 0) {
2244 if (GIDs.size () == 0) {
2245 if (verbose) {
2246 std::ostringstream os;
2247 os << *prefix << "Done; both Map & input are empty" << endl;
2248 std::cerr << os.str();
2249 }
2250 return AllIDsPresent; // trivially
2251 }
2252 else {
2253 if (verbose) {
2254 std::ostringstream os;
2255 os << *prefix << "Done: Map is empty on all processes, "
2256 "so all output PIDs are invalid (-1)." << endl;
2257 std::cerr << os.str();
2258 }
2259 for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
2260 PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
2261 }
2262 return IDNotPresent;
2263 }
2264 }
2265
2266 // getRemoteIndexList must be called collectively, and Directory
2267 // initialization is collective too, so it's OK to initialize the
2268 // Directory on demand.
2269
2270 if (verbose) {
2271 std::ostringstream os;
2272 os << *prefix << "Call setupDirectory" << endl;
2273 std::cerr << os.str();
2274 }
2275 setupDirectory();
2276 if (verbose) {
2277 std::ostringstream os;
2278 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2279 std::cerr << os.str();
2280 }
2281 const Tpetra::LookupStatus retVal =
2282 directory_->getDirectoryEntries(*this, GIDs, PIDs);
2283 if (verbose) {
2284 std::ostringstream os;
2285 os << *prefix << "Done; getDirectoryEntries returned "
2286 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2287 << "; ";
2288 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2289 os << endl;
2290 std::cerr << os.str();
2291 }
2292 return retVal;
2293 }
2294
2295 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2296 Teuchos::RCP<const Teuchos::Comm<int> >
2298 return comm_;
2299 }
2300
2301 template <class LocalOrdinal,class GlobalOrdinal, class Node>
2302 bool
2304 checkIsDist() const
2305 {
2306 using Teuchos::as;
2307 using Teuchos::outArg;
2308 using Teuchos::REDUCE_MIN;
2309 using Teuchos::reduceAll;
2310 using std::endl;
2311
2312 const bool verbose = Details::Behavior::verbose("Map");
2313 std::unique_ptr<std::string> prefix;
2314 if (verbose) {
2315 prefix = Details::createPrefix(
2316 comm_.getRawPtr(), "Map", "checkIsDist");
2317 std::ostringstream os;
2318 os << *prefix << "Start" << endl;
2319 std::cerr << os.str();
2320 }
2321
2322 bool global = false;
2323 if (comm_->getSize () > 1) {
2324 // The communicator has more than one process, but that doesn't
2325 // necessarily mean the Map is distributed.
2326 int localRep = 0;
2327 if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
2328 // The number of local elements on this process equals the
2329 // number of global elements.
2330 //
2331 // NOTE (mfh 22 Nov 2011) Does this still work if there were
2332 // duplicates in the global ID list on input (the third Map
2333 // constructor), so that the number of local elements (which
2334 // are not duplicated) on this process could be less than the
2335 // number of global elements, even if this process owns all
2336 // the elements?
2337 localRep = 1;
2338 }
2339 int allLocalRep;
2340 reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2341 if (allLocalRep != 1) {
2342 // At least one process does not own all the elements.
2343 // This makes the Map a distributed Map.
2344 global = true;
2345 }
2346 }
2347 // If the communicator has only one process, then the Map is not
2348 // distributed.
2349
2350 if (verbose) {
2351 std::ostringstream os;
2352 os << *prefix << "Done; global=" << (global ? "true" : "false")
2353 << endl;
2354 std::cerr << os.str();
2355 }
2356 return global;
2357 }
2358
2359} // namespace Tpetra
2360
2361template <class LocalOrdinal, class GlobalOrdinal>
2362Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2363Tpetra::createLocalMap (const size_t numElements,
2364 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2365{
2366 typedef LocalOrdinal LO;
2367 typedef GlobalOrdinal GO;
2368 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2369 return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2370}
2371
2372template <class LocalOrdinal, class GlobalOrdinal>
2373Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2374Tpetra::createUniformContigMap (const global_size_t numElements,
2375 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2376{
2377 typedef LocalOrdinal LO;
2378 typedef GlobalOrdinal GO;
2379 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2380 return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2381}
2382
2383template <class LocalOrdinal, class GlobalOrdinal, class Node>
2384Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2385Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2386 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2387)
2388{
2389 using Teuchos::rcp;
2391 const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2392
2393 return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2394}
2395
2396template <class LocalOrdinal, class GlobalOrdinal, class Node>
2397Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2398Tpetra::createLocalMapWithNode (const size_t numElements,
2399 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2400)
2401{
2403 using Teuchos::rcp;
2405 const GlobalOrdinal indexBase = 0;
2406 const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2407
2408 return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2409}
2410
2411template <class LocalOrdinal, class GlobalOrdinal, class Node>
2412Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2414 const size_t localNumElements,
2415 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2416)
2417{
2418 using Teuchos::rcp;
2420 const GlobalOrdinal indexBase = 0;
2421
2422 return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2423}
2424
2425template <class LocalOrdinal, class GlobalOrdinal>
2426Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2428 const size_t localNumElements,
2429 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2430{
2431 typedef LocalOrdinal LO;
2432 typedef GlobalOrdinal GO;
2433 using NT = typename Tpetra::Map<LO, GO>::node_type;
2434
2435 return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2436}
2437
2438template <class LocalOrdinal, class GlobalOrdinal>
2439Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2440Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2441 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2442{
2443 typedef LocalOrdinal LO;
2444 typedef GlobalOrdinal GO;
2445 using NT = typename Tpetra::Map<LO, GO>::node_type;
2446
2447 return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2448}
2449
2450template <class LocalOrdinal, class GlobalOrdinal, class Node>
2451Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2452Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2453 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2454)
2455{
2456 using Teuchos::rcp;
2458 using GST = Tpetra::global_size_t;
2459 const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2460 // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2461 // shouldn't be zero, given that the index base is supposed to equal
2462 // the globally min global index?
2463 const GlobalOrdinal indexBase = 0;
2464
2465 return rcp (new map_type (INV, elementList, indexBase, comm));
2466}
2467
2468template<class LO, class GO, class NT>
2469Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2470Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2471{
2472 using Details::verbosePrintArray;
2473 using Teuchos::Array;
2474 using Teuchos::ArrayView;
2475 using Teuchos::as;
2476 using Teuchos::rcp;
2477 using std::cerr;
2478 using std::endl;
2479 using map_type = Tpetra::Map<LO, GO, NT>;
2480 using GST = global_size_t;
2481
2482 const bool verbose = Details::Behavior::verbose("Map");
2483 std::unique_ptr<std::string> prefix;
2484 if (verbose) {
2485 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2486 prefix = Details::createPrefix(
2487 comm.getRawPtr(), "createOneToOne(Map)");
2488 std::ostringstream os;
2489 os << *prefix << "Start" << endl;
2490 cerr << os.str();
2491 }
2492 const size_t maxNumToPrint = verbose ?
2493 Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2494 const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2495 const int myRank = M->getComm ()->getRank ();
2496
2497 // Bypasses for special cases where either M is known to be
2498 // one-to-one, or the one-to-one version of M is easy to compute.
2499 // This is why we take M as an RCP, not as a const reference -- so
2500 // that we can return M itself if it is 1-to-1.
2501 if (! M->isDistributed ()) {
2502 // For a locally replicated Map, we assume that users want to push
2503 // all the GIDs to Process 0.
2504
2505 // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2506 // you think it should return, in this special case of a locally
2507 // replicated contiguous Map.
2508 const GST numGlobalEntries = M->getGlobalNumElements ();
2509 if (M->isContiguous()) {
2510 const size_t numLocalEntries =
2511 (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2512 if (verbose) {
2513 std::ostringstream os;
2514 os << *prefix << "Input is locally replicated & contiguous; "
2515 "numLocalEntries=" << numLocalEntries << endl;
2516 cerr << os.str ();
2517 }
2518 auto retMap =
2519 rcp(new map_type(numGlobalEntries, numLocalEntries,
2520 M->getIndexBase(), M->getComm()));
2521 if (verbose) {
2522 std::ostringstream os;
2523 os << *prefix << "Done" << endl;
2524 cerr << os.str ();
2525 }
2526 return retMap;
2527 }
2528 else {
2529 if (verbose) {
2530 std::ostringstream os;
2531 os << *prefix << "Input is locally replicated & noncontiguous"
2532 << endl;
2533 cerr << os.str ();
2534 }
2535 ArrayView<const GO> myGids =
2536 (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2537 auto retMap =
2538 rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2539 M->getComm()));
2540 if (verbose) {
2541 std::ostringstream os;
2542 os << *prefix << "Done" << endl;
2543 cerr << os.str ();
2544 }
2545 return retMap;
2546 }
2547 }
2548 else if (M->isContiguous ()) {
2549 if (verbose) {
2550 std::ostringstream os;
2551 os << *prefix << "Input is distributed & contiguous" << endl;
2552 cerr << os.str ();
2553 }
2554 // Contiguous, distributed Maps are one-to-one by construction.
2555 // (Locally replicated Maps can be contiguous.)
2556 return M;
2557 }
2558 else {
2559 if (verbose) {
2560 std::ostringstream os;
2561 os << *prefix << "Input is distributed & noncontiguous" << endl;
2562 cerr << os.str ();
2563 }
2565 const size_t numMyElems = M->getLocalNumElements ();
2566 ArrayView<const GO> myElems = M->getLocalElementList ();
2567 Array<int> owner_procs_vec (numMyElems);
2568
2569 if (verbose) {
2570 std::ostringstream os;
2571 os << *prefix << "Call Directory::getDirectoryEntries: ";
2572 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2573 os << endl;
2574 cerr << os.str();
2575 }
2576 directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2577 if (verbose) {
2578 std::ostringstream os;
2579 os << *prefix << "getDirectoryEntries result: ";
2580 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2581 os << endl;
2582 cerr << os.str();
2583 }
2584
2585 Array<GO> myOwned_vec (numMyElems);
2586 size_t numMyOwnedElems = 0;
2587 for (size_t i = 0; i < numMyElems; ++i) {
2588 const GO GID = myElems[i];
2589 const int owner = owner_procs_vec[i];
2590
2591 if (myRank == owner) {
2592 myOwned_vec[numMyOwnedElems++] = GID;
2593 }
2594 }
2595 myOwned_vec.resize (numMyOwnedElems);
2596
2597 if (verbose) {
2598 std::ostringstream os;
2599 os << *prefix << "Create Map: ";
2600 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2601 os << endl;
2602 cerr << os.str();
2603 }
2604 auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2605 M->getIndexBase(), M->getComm()));
2606 if (verbose) {
2607 std::ostringstream os;
2608 os << *prefix << "Done" << endl;
2609 cerr << os.str();
2610 }
2611 return retMap;
2612 }
2613}
2614
2615template<class LocalOrdinal, class GlobalOrdinal, class Node>
2616Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2619{
2620 using Details::Behavior;
2621 using Details::verbosePrintArray;
2622 using Teuchos::Array;
2623 using Teuchos::ArrayView;
2624 using Teuchos::RCP;
2625 using Teuchos::rcp;
2626 using Teuchos::toString;
2627 using std::cerr;
2628 using std::endl;
2629 using LO = LocalOrdinal;
2630 using GO = GlobalOrdinal;
2631 using map_type = Tpetra::Map<LO, GO, Node>;
2632
2633 const bool verbose = Behavior::verbose("Map");
2634 std::unique_ptr<std::string> prefix;
2635 if (verbose) {
2636 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2637 prefix = Details::createPrefix(
2638 comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2639 std::ostringstream os;
2640 os << *prefix << "Start" << endl;
2641 cerr << os.str();
2642 }
2643 const size_t maxNumToPrint = verbose ?
2644 Behavior::verbosePrintCountThreshold() : size_t(0);
2645
2646 // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2647 // Maps (which are 1-to-1 by construction).
2648
2650 if (verbose) {
2651 std::ostringstream os;
2652 os << *prefix << "Initialize Directory" << endl;
2653 cerr << os.str ();
2654 }
2655 directory.initialize (*M, tie_break);
2656 if (verbose) {
2657 std::ostringstream os;
2658 os << *prefix << "Done initializing Directory" << endl;
2659 cerr << os.str ();
2660 }
2661 size_t numMyElems = M->getLocalNumElements ();
2662 ArrayView<const GO> myElems = M->getLocalElementList ();
2663 Array<int> owner_procs_vec (numMyElems);
2664 if (verbose) {
2665 std::ostringstream os;
2666 os << *prefix << "Call Directory::getDirectoryEntries: ";
2667 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2668 os << endl;
2669 cerr << os.str();
2670 }
2671 directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2672 if (verbose) {
2673 std::ostringstream os;
2674 os << *prefix << "getDirectoryEntries result: ";
2675 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2676 os << endl;
2677 cerr << os.str();
2678 }
2679
2680 const int myRank = M->getComm()->getRank();
2681 Array<GO> myOwned_vec (numMyElems);
2682 size_t numMyOwnedElems = 0;
2683 for (size_t i = 0; i < numMyElems; ++i) {
2684 const GO GID = myElems[i];
2685 const int owner = owner_procs_vec[i];
2686 if (myRank == owner) {
2687 myOwned_vec[numMyOwnedElems++] = GID;
2688 }
2689 }
2690 myOwned_vec.resize (numMyOwnedElems);
2691
2692 // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2693 // valid for the new Map. Why can't we reuse it?
2694 const global_size_t GINV =
2695 Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2696 if (verbose) {
2697 std::ostringstream os;
2698 os << *prefix << "Create Map: ";
2699 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2700 os << endl;
2701 cerr << os.str();
2702 }
2703 RCP<const map_type> retMap
2704 (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2705 M->getComm ()));
2706 if (verbose) {
2707 std::ostringstream os;
2708 os << *prefix << "Done" << endl;
2709 cerr << os.str();
2710 }
2711 return retMap;
2712}
2713
2714//
2715// Explicit instantiation macro
2716//
2717// Must be expanded from within the Tpetra namespace!
2718//
2719
2721
2722#define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2723 \
2724 template class Map< LO , GO , NODE >; \
2725 \
2726 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2727 createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2728 const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2729 \
2730 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2731 createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2732 const size_t localNumElements, \
2733 const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2734 \
2735 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2736 createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2737 const Teuchos::RCP<const Teuchos::Comm<int> > &comm); \
2738 \
2739 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2740 createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2741 const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2742 \
2743 template Teuchos::RCP<const Map<LO,GO,NODE> > \
2744 createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2745 \
2746 template Teuchos::RCP<const Map<LO,GO,NODE> > \
2747 createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2748 const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2749
2750
2752#define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2753 template Teuchos::RCP< const Map<LO,GO> > \
2754 createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2755 \
2756 template Teuchos::RCP< const Map<LO,GO> > \
2757 createContigMap<LO,GO>( global_size_t, size_t, \
2758 const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2759 \
2760 template Teuchos::RCP< const Map<LO,GO> > \
2761 createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2762 const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2763 \
2764 template Teuchos::RCP< const Map<LO,GO> > \
2765 createUniformContigMap<LO,GO>( const global_size_t, \
2766 const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2767
2768#endif // TPETRA_MAP_DEF_HPP
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::initializeKokkos.
Declaration of Tpetra::Details::printOnce.
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Interface for breaking ties in ownership.
Implement mapping from global ID to process ID and local ID.
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
void initialize(const map_type &map)
Initialize the Directory with its Map.
A parallel distribution of indices over processes.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
bool isOneToOne() const
Whether the Map is one to one.
std::string description() const
Implementation of Teuchos::Describable.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Node node_type
Legacy typedef that will go away at some point.
Map()
Default constructor (that does nothing).
GlobalOrdinal global_ordinal_type
The type of global indices.
Map(const global_size_t numGlobalElements, const global_ordinal_type indexBase, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const LocalGlobal lg=GloballyDistributed)
Constructor with contiguous uniform distribution.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
typename device_type::execution_space execution_space
The Kokkos execution space.
LocalOrdinal local_ordinal_type
The type of local indices.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
global_ordinal_type getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
global_ordinal_type getIndexBase() const
The index base for this Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
global_ordinal_type getMinGlobalIndex() const
The minimum global index owned by the calling process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map's communicator with a subset communicator.
global_ordinal_type getMaxGlobalIndex() const
The maximum global index owned by the calling process.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
local_ordinal_type getMinLocalIndex() const
The minimum local index.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
local_ordinal_type getMaxLocalIndex() const
The maximum local index on the calling process.
typename Node::device_type device_type
This class' Kokkos::Device specialization.
::Tpetra::Details::LocalMap< local_ordinal_type, global_ordinal_type, device_type > local_map_type
Type of the "local" Map.
Implementation details of Tpetra.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void printOnce(std::ostream &out, const std::string &s, const Teuchos::Comm< int > *comm)
Print on one process of the given communicator, or at least try to do so (if MPI is not initialized).
bool teuchosCommIsAnMpiComm(const Teuchos::Comm< int > &)
Is the given Comm a Teuchos::MpiComm<int> instance?
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
bool mpiIsInitialized()
Has MPI_Init been called (on this process)?
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
bool mpiIsFinalized()
Has MPI_Finalize been called (on this process)?
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
LocalGlobal
Enum for local versus global allocation of Map entries.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...