Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_CRSGRAPH_DEF_HPP
41#define TPETRA_CRSGRAPH_DEF_HPP
42
45
50#include "Tpetra_Details_getGraphDiagOffsets.hpp"
51#include "Tpetra_Details_getGraphOffRankOffsets.hpp"
52#include "Tpetra_Details_makeColMap.hpp"
56#include "Tpetra_Distributor.hpp"
57#include "Teuchos_SerialDenseMatrix.hpp"
58#include "Tpetra_Vector.hpp"
61#include "Tpetra_Details_packCrsGraph.hpp"
62#include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
63#include "Tpetra_Details_CrsPadding.hpp"
64#include "Tpetra_Util.hpp"
65#include <algorithm>
66#include <limits>
67#include <map>
68#include <sstream>
69#include <string>
70#include <type_traits>
71#include <utility>
72#include <vector>
73
74namespace Tpetra {
75 namespace Details {
76 namespace Impl {
77
78 template<class MapIter>
79 void
80 verbosePrintMap(std::ostream& out,
81 MapIter beg,
82 MapIter end,
83 const size_t numEnt,
84 const char mapName[])
85 {
86 using ::Tpetra::Details::Behavior;
88
89 out << mapName << ": {";
90 const size_t maxNumToPrint =
92 if (maxNumToPrint == 0) {
93 if (numEnt != 0) {
94 out << "...";
95 }
96 }
97 else {
98 const size_t numToPrint = numEnt > maxNumToPrint ?
99 maxNumToPrint : numEnt;
100 size_t count = 0;
101 for (MapIter it = beg; it != end; ++it) {
102 out << "(" << (*it).first << ", ";
103 verbosePrintArray(out, (*it).second, "gblColInds",
104 maxNumToPrint);
105 out << ")";
106 if (count + size_t(1) < numToPrint) {
107 out << ", ";
108 }
109 ++count;
110 }
111 if (count < numEnt) {
112 out << ", ...";
113 }
114 }
115 out << "}";
116 }
117
118 template<class LO, class GO, class Node>
119 Teuchos::ArrayView<GO>
120 getRowGraphGlobalRow(
121 std::vector<GO>& gblColIndsStorage,
122 const RowGraph<LO, GO, Node>& graph,
123 const GO gblRowInd)
124 {
125 size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
126 if (gblColIndsStorage.size() < origNumEnt) {
127 gblColIndsStorage.resize(origNumEnt);
128 }
129 typename CrsGraph<LO,GO,Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
130 origNumEnt);
131 graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
132 Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(),origNumEnt);
133 return retval;
134 }
135
136 template<class LO, class GO, class DT, class OffsetType, class NumEntType>
137 class ConvertColumnIndicesFromGlobalToLocal {
138 public:
139 ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
140 const ::Kokkos::View<const GO*, DT>& gblColInds,
141 const ::Kokkos::View<const OffsetType*, DT>& ptr,
142 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
143 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
144 lclColInds_ (lclColInds),
145 gblColInds_ (gblColInds),
146 ptr_ (ptr),
147 lclColMap_ (lclColMap),
148 numRowEnt_ (numRowEnt)
149 {}
150
151 KOKKOS_FUNCTION void
152 operator () (const LO& lclRow, OffsetType& curNumBad) const
153 {
154 const OffsetType offset = ptr_(lclRow);
155 // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
156 // of entries in a row to LO, as long as the row doesn't have
157 // too many duplicate entries.
158 const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
159 for (LO j = 0; j < numEnt; ++j) {
160 const GO gid = gblColInds_(offset + j);
161 const LO lid = lclColMap_.getLocalElement (gid);
162 lclColInds_(offset + j) = lid;
163 if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
164 ++curNumBad;
165 }
166 }
167 }
168
169 static OffsetType
170 run (const ::Kokkos::View<LO*, DT>& lclColInds,
171 const ::Kokkos::View<const GO*, DT>& gblColInds,
172 const ::Kokkos::View<const OffsetType*, DT>& ptr,
173 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
174 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
175 {
176 typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
177 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
178
179 const LO lclNumRows = ptr.extent (0) == 0 ?
180 static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
181 OffsetType numBad = 0;
182 // Count of "bad" column indices is a reduction over rows.
183 ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
184 functor_type (lclColInds, gblColInds, ptr,
185 lclColMap, numRowEnt),
186 numBad);
187 return numBad;
188 }
189
190 private:
191 ::Kokkos::View<LO*, DT> lclColInds_;
192 ::Kokkos::View<const GO*, DT> gblColInds_;
193 ::Kokkos::View<const OffsetType*, DT> ptr_;
194 ::Tpetra::Details::LocalMap<LO, GO, DT> lclColMap_;
195 ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
196 };
197
198 } // namespace Impl
199
214 template<class LO, class GO, class DT, class OffsetType, class NumEntType>
215 OffsetType
216 convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
217 const Kokkos::View<const GO*, DT>& gblColInds,
218 const Kokkos::View<const OffsetType*, DT>& ptr,
219 const LocalMap<LO, GO, DT>& lclColMap,
220 const Kokkos::View<const NumEntType*, DT>& numRowEnt)
221 {
222 using Impl::ConvertColumnIndicesFromGlobalToLocal;
223 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
224 return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
225 }
226
227 template<class ViewType, class LO>
228 class MaxDifference {
229 public:
230 MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
231
232 KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
233 dst = 0;
234 }
235
236 KOKKOS_INLINE_FUNCTION void
237 join (LO& dst, const LO& src) const
238 {
239 dst = (src > dst) ? src : dst;
240 }
241
242 KOKKOS_INLINE_FUNCTION void
243 operator () (const LO lclRow, LO& maxNumEnt) const
244 {
245 const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
246 maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
247 }
248 private:
249 typename ViewType::const_type ptr_;
250 };
251
252 template<class ViewType, class LO>
253 typename ViewType::non_const_value_type
254 maxDifference (const char kernelLabel[],
255 const ViewType& ptr,
256 const LO lclNumRows)
257 {
258 if (lclNumRows == 0) {
259 // mfh 07 May 2018: Weirdly, I need this special case,
260 // otherwise I get the wrong answer.
261 return static_cast<LO> (0);
262 }
263 else {
264 using execution_space = typename ViewType::execution_space;
265 using range_type = Kokkos::RangePolicy<execution_space, LO>;
266 LO theMaxNumEnt {0};
267 Kokkos::parallel_reduce (kernelLabel,
268 range_type (0, lclNumRows),
269 MaxDifference<ViewType, LO> (ptr),
270 theMaxNumEnt);
271 return theMaxNumEnt;
272 }
273 }
274
275 } // namespace Details
276
277 template <class LocalOrdinal, class GlobalOrdinal, class Node>
278 bool
280 getDebug() {
281 return Details::Behavior::debug("CrsGraph");
282 }
283
284 template <class LocalOrdinal, class GlobalOrdinal, class Node>
285 bool
287 getVerbose() {
288 return Details::Behavior::verbose("CrsGraph");
289 }
290
291 template <class LocalOrdinal, class GlobalOrdinal, class Node>
292 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
293 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
294 const size_t maxNumEntriesPerRow,
295 const Teuchos::RCP<Teuchos::ParameterList>& params) :
296 dist_object_type (rowMap)
297 , rowMap_ (rowMap)
298 , numAllocForAllRows_ (maxNumEntriesPerRow)
299 {
300 const char tfecfFuncName[] =
301 "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
302 staticAssertions ();
303 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
304 (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
305 std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
306 "a valid size_t value, which in this case means it must not be "
307 "Teuchos::OrdinalTraits<size_t>::invalid().");
308 resumeFill (params);
310 }
311
312 template <class LocalOrdinal, class GlobalOrdinal, class Node>
313 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
314 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
315 const Teuchos::RCP<const map_type>& colMap,
316 const size_t maxNumEntriesPerRow,
317 const Teuchos::RCP<Teuchos::ParameterList>& params) :
318 dist_object_type (rowMap)
319 , rowMap_ (rowMap)
320 , colMap_ (colMap)
321 , numAllocForAllRows_ (maxNumEntriesPerRow)
322 {
323 const char tfecfFuncName[] =
324 "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
325 staticAssertions ();
326 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
327 maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
328 std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
329 "a valid size_t value, which in this case means it must not be "
330 "Teuchos::OrdinalTraits<size_t>::invalid().");
331 resumeFill (params);
333 }
334
335
336 template <class LocalOrdinal, class GlobalOrdinal, class Node>
337 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
338 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
339 const Teuchos::ArrayView<const size_t>& numEntPerRow,
340 const Teuchos::RCP<Teuchos::ParameterList>& params) :
341 dist_object_type (rowMap)
342 , rowMap_ (rowMap)
344 {
345 const char tfecfFuncName[] =
346 "CrsGraph(rowMap,numEntPerRow,params): ";
347 staticAssertions ();
348
349 const size_t lclNumRows = rowMap.is_null () ?
350 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
351 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
352 static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
353 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
354 << " != the local number of rows " << lclNumRows << " as specified by "
355 "the input row Map.");
356
357 if (debug_) {
358 for (size_t r = 0; r < lclNumRows; ++r) {
359 const size_t curRowCount = numEntPerRow[r];
360 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
361 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
362 std::invalid_argument, "numEntPerRow(" << r << ") "
363 "specifies an invalid number of entries "
364 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
365 }
366 }
367
368 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
369 // The latter is a const View, so we have to copy into a nonconst
370 // View first, then assign.
371 typedef decltype (k_numAllocPerRow_) out_view_type;
372 typedef typename out_view_type::non_const_type nc_view_type;
373 typedef Kokkos::View<const size_t*,
374 typename nc_view_type::array_layout,
375 Kokkos::HostSpace,
376 Kokkos::MemoryUnmanaged> in_view_type;
377 in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
378 nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
379 lclNumRows);
380 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
381 using exec_space = typename nc_view_type::execution_space;
382 Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
383 k_numAllocPerRow_ = numAllocPerRowOut;
384
385 resumeFill (params);
387 }
388
389
390
391 template <class LocalOrdinal, class GlobalOrdinal, class Node>
392 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
393 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
394 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
395 const Teuchos::RCP<Teuchos::ParameterList>& params) :
396 dist_object_type (rowMap)
397 , rowMap_ (rowMap)
398 , k_numAllocPerRow_ (numEntPerRow.h_view)
400 {
401 const char tfecfFuncName[] =
402 "CrsGraph(rowMap,numEntPerRow,params): ";
403 staticAssertions ();
404
405 const size_t lclNumRows = rowMap.is_null () ?
406 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
407 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
408 static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
409 std::invalid_argument, "numEntPerRow has length " <<
410 numEntPerRow.extent (0) << " != the local number of rows " <<
411 lclNumRows << " as specified by " "the input row Map.");
412
413 if (debug_) {
414 for (size_t r = 0; r < lclNumRows; ++r) {
415 const size_t curRowCount = numEntPerRow.h_view(r);
416 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
417 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
418 std::invalid_argument, "numEntPerRow(" << r << ") "
419 "specifies an invalid number of entries "
420 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
421 }
422 }
423
424 resumeFill (params);
426 }
427
428
429 template <class LocalOrdinal, class GlobalOrdinal, class Node>
430 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
431 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
432 const Teuchos::RCP<const map_type>& colMap,
433 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
434 const Teuchos::RCP<Teuchos::ParameterList>& params) :
435 dist_object_type (rowMap)
436 , rowMap_ (rowMap)
437 , colMap_ (colMap)
438 , k_numAllocPerRow_ (numEntPerRow.h_view)
440 {
441 const char tfecfFuncName[] =
442 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
443 staticAssertions ();
444
445 const size_t lclNumRows = rowMap.is_null () ?
446 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
447 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
448 static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
449 std::invalid_argument, "numEntPerRow has length " <<
450 numEntPerRow.extent (0) << " != the local number of rows " <<
451 lclNumRows << " as specified by " "the input row Map.");
452
453 if (debug_) {
454 for (size_t r = 0; r < lclNumRows; ++r) {
455 const size_t curRowCount = numEntPerRow.h_view(r);
456 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
457 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
458 std::invalid_argument, "numEntPerRow(" << r << ") "
459 "specifies an invalid number of entries "
460 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
461 }
462 }
463
464 resumeFill (params);
466 }
467
468
469 template <class LocalOrdinal, class GlobalOrdinal, class Node>
470 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
471 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
472 const Teuchos::RCP<const map_type>& colMap,
473 const Teuchos::ArrayView<const size_t>& numEntPerRow,
474 const Teuchos::RCP<Teuchos::ParameterList>& params) :
475 dist_object_type (rowMap)
476 , rowMap_ (rowMap)
477 , colMap_ (colMap)
479 {
480 const char tfecfFuncName[] =
481 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
482 staticAssertions ();
483
484 const size_t lclNumRows = rowMap.is_null () ?
485 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
486 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
487 static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
488 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
489 << " != the local number of rows " << lclNumRows << " as specified by "
490 "the input row Map.");
491
492 if (debug_) {
493 for (size_t r = 0; r < lclNumRows; ++r) {
494 const size_t curRowCount = numEntPerRow[r];
495 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
496 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
497 std::invalid_argument, "numEntPerRow(" << r << ") "
498 "specifies an invalid number of entries "
499 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
500 }
501 }
502
503 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
504 // The latter is a const View, so we have to copy into a nonconst
505 // View first, then assign.
506 typedef decltype (k_numAllocPerRow_) out_view_type;
507 typedef typename out_view_type::non_const_type nc_view_type;
508 typedef Kokkos::View<const size_t*,
509 typename nc_view_type::array_layout,
510 Kokkos::HostSpace,
511 Kokkos::MemoryUnmanaged> in_view_type;
512 in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
513 nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
514 lclNumRows);
515 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
516 using exec_space = typename nc_view_type::execution_space;
517 Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
518 k_numAllocPerRow_ = numAllocPerRowOut;
519
520 resumeFill (params);
522 }
523
524
525 template <class LocalOrdinal, class GlobalOrdinal, class Node>
526 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
527 CrsGraph (CrsGraph<local_ordinal_type, global_ordinal_type, node_type>& originalGraph,
528 const Teuchos::RCP<const map_type>& rowMap,
529 const Teuchos::RCP<Teuchos::ParameterList>& params) :
530 dist_object_type (rowMap)
531 , rowMap_(rowMap)
532 , colMap_(originalGraph.colMap_)
534 , storageStatus_(originalGraph.storageStatus_)
535 , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
536 , indicesAreLocal_(originalGraph.indicesAreLocal_)
537 , indicesAreSorted_(originalGraph.indicesAreSorted_)
538 {
539 staticAssertions();
540
541 int numRows = rowMap->getLocalNumElements();
542 size_t numNonZeros = originalGraph.getRowPtrsPackedHost()(numRows);
543 auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows+1);
544
545
546 this->setRowPtrsUnpacked(Kokkos::subview(originalGraph.getRowPtrsUnpackedDevice(), rowsToUse));
547 this->setRowPtrsPacked(Kokkos::subview(originalGraph.getRowPtrsPackedDevice(), rowsToUse));
548
549 if (indicesAreLocal_) {
550 lclIndsUnpacked_wdv = local_inds_wdv_type(originalGraph.lclIndsUnpacked_wdv, 0, numNonZeros);
551 lclIndsPacked_wdv = local_inds_wdv_type(originalGraph.lclIndsPacked_wdv, 0, numNonZeros);
552 }
553 else {
554 gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
555 }
556
558 }
559
560 template <class LocalOrdinal, class GlobalOrdinal, class Node>
561 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
562 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
563 const Teuchos::RCP<const map_type>& colMap,
564 const typename local_graph_device_type::row_map_type& rowPointers,
565 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
566 const Teuchos::RCP<Teuchos::ParameterList>& params) :
567 dist_object_type (rowMap)
568 , rowMap_(rowMap)
569 , colMap_(colMap)
571 , storageStatus_(Details::STORAGE_1D_PACKED)
572 , indicesAreAllocated_(true)
573 , indicesAreLocal_(true)
574 {
575 staticAssertions ();
576 if (! params.is_null() && params->isParameter("sorted") &&
577 ! params->get<bool>("sorted")) {
578 indicesAreSorted_ = false;
579 }
580 else {
581 indicesAreSorted_ = true;
582 }
583 setAllIndices (rowPointers, columnIndices);
585 }
586
587 template <class LocalOrdinal, class GlobalOrdinal, class Node>
588 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
589 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
590 const Teuchos::RCP<const map_type>& colMap,
591 const Teuchos::ArrayRCP<size_t>& rowPointers,
592 const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
593 const Teuchos::RCP<Teuchos::ParameterList>& params) :
594 dist_object_type (rowMap)
595 , rowMap_ (rowMap)
596 , colMap_ (colMap)
598 , storageStatus_ (Details::STORAGE_1D_PACKED)
599 , indicesAreAllocated_ (true)
600 , indicesAreLocal_ (true)
601 {
602 staticAssertions ();
603 if (! params.is_null() && params->isParameter("sorted") &&
604 ! params->get<bool>("sorted")) {
605 indicesAreSorted_ = false;
606 }
607 else {
608 indicesAreSorted_ = true;
609 }
610 setAllIndices (rowPointers, columnIndices);
612 }
613
614 template <class LocalOrdinal, class GlobalOrdinal, class Node>
615 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
616 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
617 const Teuchos::RCP<const map_type>& colMap,
618 const local_graph_device_type& k_local_graph_,
619 const Teuchos::RCP<Teuchos::ParameterList>& params)
620 : CrsGraph (k_local_graph_,
621 rowMap,
622 colMap,
623 Teuchos::null,
624 Teuchos::null,
625 params)
626 {}
627
628 template <class LocalOrdinal, class GlobalOrdinal, class Node>
629 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
630 CrsGraph (const local_graph_device_type& k_local_graph_,
631 const Teuchos::RCP<const map_type>& rowMap,
632 const Teuchos::RCP<const map_type>& colMap,
633 const Teuchos::RCP<const map_type>& domainMap,
634 const Teuchos::RCP<const map_type>& rangeMap,
635 const Teuchos::RCP<Teuchos::ParameterList>& params)
636 : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
637 , rowMap_ (rowMap)
638 , colMap_ (colMap)
640 , storageStatus_ (Details::STORAGE_1D_PACKED)
641 , indicesAreAllocated_ (true)
642 , indicesAreLocal_ (true)
643 {
644 staticAssertions();
645 const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
646
647 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
648 colMap.is_null (), std::runtime_error,
649 ": The input column Map must be nonnull.");
650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
651 k_local_graph_.numRows () != rowMap->getLocalNumElements (),
652 std::runtime_error,
653 ": The input row Map and the input local graph need to have the same "
654 "number of rows. The row Map claims " << rowMap->getLocalNumElements ()
655 << " row(s), but the local graph claims " << k_local_graph_.numRows ()
656 << " row(s).");
657
658 // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
659 // rowMap_->getLocalNumElements(), but it doesn't have to.
660 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
661 // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
662 // ": The input row Map and the input local graph need to have the same "
663 // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
664 // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
665 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
666 lclIndsUnpacked_wdv.extent (0) != 0 || gblInds_wdv.extent (0) != 0, std::logic_error,
667 ": cannot have 1D data structures allocated.");
668
669 if(! params.is_null() && params->isParameter("sorted") &&
670 ! params->get<bool>("sorted")) {
671 indicesAreSorted_ = false;
672 }
673 else {
674 indicesAreSorted_ = true;
675 }
676
677 setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
678 rangeMap .is_null() ? rowMap_ : rangeMap);
679 Teuchos::Array<int> remotePIDs (0); // unused output argument
680 this->makeImportExport (remotePIDs, false);
681
682 lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
684 this->setRowPtrs(k_local_graph_.row_map);
685
686 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
687
688 const bool callComputeGlobalConstants = params.get () == nullptr ||
689 params->get ("compute global constants", true);
690
691 if (callComputeGlobalConstants) {
692 this->computeGlobalConstants ();
693 }
694 this->fillComplete_ = true;
695 this->checkInternalState ();
696 }
697
698 template <class LocalOrdinal, class GlobalOrdinal, class Node>
699 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
700 CrsGraph (const local_graph_device_type& lclGraph,
701 const Teuchos::RCP<const map_type>& rowMap,
702 const Teuchos::RCP<const map_type>& colMap,
703 const Teuchos::RCP<const map_type>& domainMap,
704 const Teuchos::RCP<const map_type>& rangeMap,
705 const Teuchos::RCP<const import_type>& importer,
706 const Teuchos::RCP<const export_type>& exporter,
707 const Teuchos::RCP<Teuchos::ParameterList>& params) :
708 DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
709 rowMap_ (rowMap),
710 colMap_ (colMap),
711 rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
712 domainMap_ (domainMap.is_null () ? rowMap : domainMap),
713 importer_ (importer),
714 exporter_ (exporter),
716 storageStatus_ (Details::STORAGE_1D_PACKED),
717 indicesAreAllocated_ (true),
718 indicesAreLocal_ (true)
719 {
720 staticAssertions();
721 const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_device_type,"
722 "Map,Map,Map,Map,Import,Export,params): ";
723
724 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
725 (colMap.is_null (), std::runtime_error,
726 "The input column Map must be nonnull.");
727
728 lclIndsPacked_wdv = local_inds_wdv_type(lclGraph.entries);
730 setRowPtrs(lclGraph.row_map);
731
732 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
733
734 if (! params.is_null() && params->isParameter("sorted") &&
735 ! params->get<bool>("sorted")) {
736 indicesAreSorted_ = false;
737 }
738 else {
739 indicesAreSorted_ = true;
740 }
741
742 const bool callComputeGlobalConstants =
743 params.get () == nullptr ||
744 params->get ("compute global constants", true);
745 if (callComputeGlobalConstants) {
746 this->computeGlobalConstants ();
747 }
748 fillComplete_ = true;
750 }
751
752 template <class LocalOrdinal, class GlobalOrdinal, class Node>
753 Teuchos::RCP<const Teuchos::ParameterList>
755 getValidParameters () const
756 {
757 using Teuchos::RCP;
758 using Teuchos::ParameterList;
759 using Teuchos::parameterList;
760
761 RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
762
763 // Make a sublist for the Import.
764 RCP<ParameterList> importSublist = parameterList ("Import");
765
766 // FIXME (mfh 02 Apr 2012) We should really have the Import and
767 // Export objects fill in these lists. However, we don't want to
768 // create an Import or Export unless we need them. For now, we
769 // know that the Import and Export just pass the list directly to
770 // their Distributor, so we can create a Distributor here
771 // (Distributor's constructor is a lightweight operation) and have
772 // it fill in the list.
773
774 // Fill in Distributor default parameters by creating a
775 // Distributor and asking it to do the work.
776 Distributor distributor (rowMap_->getComm (), importSublist);
777 params->set ("Import", *importSublist, "How the Import performs communication.");
778
779 // Make a sublist for the Export. For now, it's a clone of the
780 // Import sublist. It's not a shallow copy, though, since we
781 // might like the Import to do communication differently than the
782 // Export.
783 params->set ("Export", *importSublist, "How the Export performs communication.");
784
785 return params;
786 }
787
788 template <class LocalOrdinal, class GlobalOrdinal, class Node>
789 void
791 setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
792 {
793 Teuchos::RCP<const Teuchos::ParameterList> validParams =
795 params->validateParametersAndSetDefaults (*validParams);
796 this->setMyParamList (params);
797 }
798
799 template <class LocalOrdinal, class GlobalOrdinal, class Node>
802 getGlobalNumRows () const
803 {
804 return rowMap_->getGlobalNumElements ();
805 }
806
807 template <class LocalOrdinal, class GlobalOrdinal, class Node>
810 getGlobalNumCols () const
811 {
812 const char tfecfFuncName[] = "getGlobalNumCols: ";
813 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
814 ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
815 "The graph does not have a domain Map. You may not call this method in "
816 "that case.");
817 return getDomainMap ()->getGlobalNumElements ();
818 }
819
820
821 template <class LocalOrdinal, class GlobalOrdinal, class Node>
822 size_t
824 getLocalNumRows () const
825 {
826 return this->rowMap_.is_null () ?
827 static_cast<size_t> (0) :
828 this->rowMap_->getLocalNumElements ();
829 }
830
831
832 template <class LocalOrdinal, class GlobalOrdinal, class Node>
833 size_t
835 getLocalNumCols () const
836 {
837 const char tfecfFuncName[] = "getLocalNumCols: ";
838 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
839 ! hasColMap (), std::runtime_error,
840 "The graph does not have a column Map. You may not call this method "
841 "unless the graph has a column Map. This requires either that a custom "
842 "column Map was given to the constructor, or that fillComplete() has "
843 "been called.");
844 return colMap_.is_null () ? static_cast<size_t> (0) :
845 colMap_->getLocalNumElements ();
846 }
847
848
849
850 template <class LocalOrdinal, class GlobalOrdinal, class Node>
851 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
857
858 template <class LocalOrdinal, class GlobalOrdinal, class Node>
859 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
865
866 template <class LocalOrdinal, class GlobalOrdinal, class Node>
867 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
873
874 template <class LocalOrdinal, class GlobalOrdinal, class Node>
875 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
881
882 template <class LocalOrdinal, class GlobalOrdinal, class Node>
883 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
889
890 template <class LocalOrdinal, class GlobalOrdinal, class Node>
891 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
897
898 template <class LocalOrdinal, class GlobalOrdinal, class Node>
899 bool
901 hasColMap () const
902 {
903 return ! colMap_.is_null ();
904 }
905
906 template <class LocalOrdinal, class GlobalOrdinal, class Node>
907 bool
909 isStorageOptimized () const
910 {
911 // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
912 // getLocalNumRows() is zero?
913
914 const bool isOpt = indicesAreAllocated_ &&
915 k_numRowEntries_.extent (0) == 0 &&
916 getLocalNumRows () > 0;
917
918 return isOpt;
919 }
920
921
922 template <class LocalOrdinal, class GlobalOrdinal, class Node>
926 {
927 const char tfecfFuncName[] = "getGlobalNumEntries: ";
928 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
929 (! this->haveGlobalConstants_, std::logic_error,
930 "The graph does not have global constants computed, "
931 "but the user has requested them.");
932
933 return globalNumEntries_;
934 }
935
936
937 template <class LocalOrdinal, class GlobalOrdinal, class Node>
938 size_t
940 getLocalNumEntries () const
941 {
942 typedef LocalOrdinal LO;
943
944 if (this->indicesAreAllocated_) {
945 const LO lclNumRows = this->getLocalNumRows ();
946 if (lclNumRows == 0) {
947 return static_cast<size_t> (0);
948 }
949 else {
950 // Avoid the "*this capture" issue by creating a local Kokkos::View.
951 auto numEntPerRow = this->k_numRowEntries_;
952 const LO numNumEntPerRow = numEntPerRow.extent (0);
953 if (numNumEntPerRow == 0) {
954 if (static_cast<LO> (this->getRowPtrsPackedDevice().extent (0)) <
955 static_cast<LO> (lclNumRows + 1)) {
956 return static_cast<size_t> (0);
957 }
958 else {
959 return this->getRowPtrsPackedHost()(lclNumRows);
960 }
961 }
962 else { // k_numRowEntries_ is populated
963 // k_numRowEntries_ is actually be a host View, so we run
964 // the sum in its native execution space. This also means
965 // that we can use explicit capture (which could perhaps
966 // improve build time) instead of KOKKOS_LAMBDA, and avoid
967 // any CUDA build issues with trying to run a __device__ -
968 // only function on host.
969 typedef typename num_row_entries_type::execution_space
970 host_exec_space;
971 typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
972
973 const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
974 lclNumRows :
975 numNumEntPerRow;
976 size_t nodeNumEnt = 0;
977 Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
978 range_type (0, upperLoopBound),
979 [=] (const LO& k, size_t& lclSum) {
980 lclSum += numEntPerRow(k);
981 }, nodeNumEnt);
982 return nodeNumEnt;
983 }
984 }
985 }
986 else { // nothing allocated on this process, so no entries
987 return static_cast<size_t> (0);
988 }
989 }
990
991 template <class LocalOrdinal, class GlobalOrdinal, class Node>
995 {
996 const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
997 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
998 (! this->haveGlobalConstants_, std::logic_error,
999 "The graph does not have global constants computed, "
1000 "but the user has requested them.");
1001
1003 }
1004
1005 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1006 size_t
1012
1013 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1014 bool
1016 isFillComplete () const
1017 {
1018 return fillComplete_;
1019 }
1020
1021 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1022 bool
1024 isFillActive () const
1025 {
1026 return ! fillComplete_;
1027 }
1028
1029
1030 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1031 bool
1033 isLocallyIndexed () const
1034 {
1035 return indicesAreLocal_;
1036 }
1037
1038 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1039 bool
1041 isGloballyIndexed () const
1042 {
1043 return indicesAreGlobal_;
1044 }
1045
1046 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1047 size_t
1050 {
1051 typedef LocalOrdinal LO;
1052
1053 if (this->indicesAreAllocated_) {
1054 const LO lclNumRows = this->getLocalNumRows ();
1055 if (lclNumRows == 0) {
1056 return static_cast<size_t> (0);
1057 }
1058 else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1059 if (static_cast<LO> (this->getRowPtrsPackedDevice().extent (0)) <
1060 static_cast<LO> (lclNumRows + 1)) {
1061 return static_cast<size_t> (0);
1062 }
1063 else {
1064 return this->getRowPtrsPackedHost()(lclNumRows);
1065 }
1066 }
1067 else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1068 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1069 if (rowPtrsUnpacked_host.extent (0) == 0) {
1070 return static_cast<size_t> (0);
1071 }
1072 else {
1073 return rowPtrsUnpacked_host(lclNumRows);
1074 }
1075 }
1076 else {
1077 return static_cast<size_t> (0);
1078 }
1079 }
1080 else {
1081 return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1082 }
1083 }
1084
1085 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1086 Teuchos::RCP<const Teuchos::Comm<int> >
1088 getComm () const
1089 {
1090 return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1091 }
1092
1093 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1094 GlobalOrdinal
1096 getIndexBase () const
1097 {
1098 return rowMap_->getIndexBase ();
1099 }
1100
1101 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1102 bool
1104 indicesAreAllocated () const
1105 {
1106 return indicesAreAllocated_;
1107 }
1108
1109 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1110 bool
1116
1117 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1118 bool
1124
1125 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1126 void
1129 {
1130 // FIXME (mfh 07 May 2013) How do we know that the change
1131 // introduced a redundancy, or even that it invalidated the sorted
1132 // order of indices? CrsGraph has always made this conservative
1133 // guess. It could be a bit costly to check at insertion time,
1134 // though.
1135 indicesAreSorted_ = false;
1136 noRedundancies_ = false;
1137
1138 // We've modified the graph, so we'll have to recompute local
1139 // constants like the number of diagonal entries on this process.
1140 haveLocalConstants_ = false;
1141 }
1142
1143 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1144 void
1146 allocateIndices (const ELocalGlobal lg, const bool verbose)
1147 {
1149 using Teuchos::arcp;
1150 using Teuchos::Array;
1151 using Teuchos::ArrayRCP;
1152 using std::endl;
1153 typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1154 typedef typename local_graph_device_type::row_map_type::non_const_type
1155 non_const_row_map_type;
1156 const char tfecfFuncName[] = "allocateIndices: ";
1157 const char suffix[] =
1158 " Please report this bug to the Tpetra developers.";
1159 ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1160
1161 std::unique_ptr<std::string> prefix;
1162 if (verbose) {
1163 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1164 std::ostringstream os;
1165 os << *prefix << "Start: lg="
1166 << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1167 << ", numRows: " << this->getLocalNumRows() << endl;
1168 std::cerr << os.str();
1169 }
1170
1171 // This is a protected function, only callable by us. If it was
1172 // called incorrectly, it is our fault. That's why the tests
1173 // below throw std::logic_error instead of std::invalid_argument.
1174 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1175 (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1176 ": The graph is locally indexed, but Tpetra code is calling "
1177 "this method with lg=GlobalIndices." << suffix);
1178 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1179 (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1180 ": The graph is globally indexed, but Tpetra code is calling "
1181 "this method with lg=LocalIndices." << suffix);
1182 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1183 (indicesAreAllocated (), std::logic_error, ": The graph's "
1184 "indices are already allocated, but Tpetra is calling "
1185 "allocateIndices again." << suffix);
1186 const size_t numRows = this->getLocalNumRows ();
1187
1188 //
1189 // STATIC ALLOCATION PROFILE
1190 //
1191 {
1192 if (verbose) {
1193 std::ostringstream os;
1194 os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1195 std::cerr << os.str();
1196 }
1197 non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1198
1199 if (this->k_numAllocPerRow_.extent (0) != 0) {
1200 // It's OK to throw std::invalid_argument here, because we
1201 // haven't incurred any side effects yet. Throwing that
1202 // exception (and not, say, std::logic_error) implies that the
1203 // instance can recover.
1204 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1205 (this->k_numAllocPerRow_.extent (0) != numRows,
1206 std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1207 "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1208 << ", but its length != numRows = " << numRows << ".");
1209
1210 // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1211 // we want to compute here) lives on device. That's OK;
1212 // computeOffsetsFromCounts can handle this case.
1214
1215 // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1216 // doesn't attempt to check its input for "invalid" flag
1217 // values. For now, we omit that feature of the sequential
1218 // code disabled below.
1219 computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1220 }
1221 else {
1222 // It's OK to throw std::invalid_argument here, because we
1223 // haven't incurred any side effects yet. Throwing that
1224 // exception (and not, say, std::logic_error) implies that the
1225 // instance can recover.
1226 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1227 (this->numAllocForAllRows_ ==
1228 Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1229 std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1230 "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1231 Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1232
1234 computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1235 }
1236
1237 // "Commit" the resulting row offsets.
1238 setRowPtrsUnpacked(k_rowPtrs);
1239 }
1240
1241 const size_type numInds = this->getRowPtrsUnpackedHost()(numRows);
1242 if (lg == LocalIndices) {
1243 if (verbose) {
1244 std::ostringstream os;
1245 os << *prefix << "Allocate local column indices "
1246 "lclIndsUnpacked_wdv: " << numInds << endl;
1247 std::cerr << os.str();
1248 }
1249 lclIndsUnpacked_wdv = local_inds_wdv_type (
1250 local_inds_dualv_type("Tpetra::CrsGraph::lclInd",numInds));
1251 }
1252 else {
1253 if (verbose) {
1254 std::ostringstream os;
1255 os << *prefix << "Allocate global column indices "
1256 "gblInds_wdv: " << numInds << endl;
1257 std::cerr << os.str();
1258 }
1259 gblInds_wdv = global_inds_wdv_type (
1260 global_inds_dualv_type("Tpetra::CrsGraph::gblInd",numInds));
1261 }
1262 storageStatus_ = Details::STORAGE_1D_UNPACKED;
1263
1264 this->indicesAreLocal_ = (lg == LocalIndices);
1265 this->indicesAreGlobal_ = (lg == GlobalIndices);
1266
1267 if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1268 using Kokkos::ViewAllocateWithoutInitializing;
1269 typedef decltype (k_numRowEntries_) row_ent_type;
1270 const char label[] = "Tpetra::CrsGraph::numRowEntries";
1271 if (verbose) {
1272 std::ostringstream os;
1273 os << *prefix << "Allocate k_numRowEntries_: " << numRows
1274 << endl;
1275 std::cerr << os.str();
1276 }
1277 row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1278 // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1279 Kokkos::deep_copy (execution_space(), numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1280 Kokkos::fence(); // TODO: Need to understand downstream failure points and move this fence.
1281 this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1282 }
1283
1284 // Once indices are allocated, CrsGraph needs to free this information.
1285 this->numAllocForAllRows_ = 0;
1286 this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1287 this->indicesAreAllocated_ = true;
1288
1289 try {
1290 this->checkInternalState ();
1291 }
1292 catch (std::logic_error& e) {
1293 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1294 (true, std::logic_error, "At end of allocateIndices, "
1295 "checkInternalState threw std::logic_error: "
1296 << e.what ());
1297 }
1298 catch (std::exception& e) {
1299 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1300 (true, std::runtime_error, "At end of allocateIndices, "
1301 "checkInternalState threw std::exception: "
1302 << e.what ());
1303 }
1304 catch (...) {
1305 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1306 (true, std::runtime_error, "At end of allocateIndices, "
1307 "checkInternalState threw an exception "
1308 "not a subclass of std::exception.");
1309 }
1310
1311 if (verbose) {
1312 std::ostringstream os;
1313 os << *prefix << "Done" << endl;
1314 std::cerr << os.str();
1315 }
1316 }
1317
1318 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1320 local_inds_dualv_type::t_host::const_type
1322 getLocalIndsViewHost (const RowInfo& rowinfo) const
1323 {
1324 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1325 return typename local_inds_dualv_type::t_host::const_type ();
1326 else
1327 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1328 rowinfo.allocSize,
1329 Access::ReadOnly);
1330 }
1331
1332 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1334 local_inds_dualv_type::t_host
1336 getLocalIndsViewHostNonConst (const RowInfo& rowinfo)
1337 {
1338 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1339 return typename local_inds_dualv_type::t_host ();
1340 else
1341 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1342 rowinfo.allocSize,
1343 Access::ReadWrite);
1344 }
1345
1346 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1348 global_inds_dualv_type::t_host::const_type
1350 getGlobalIndsViewHost (const RowInfo& rowinfo) const
1351 {
1352 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1353 return typename global_inds_dualv_type::t_host::const_type ();
1354 else
1355 return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1356 rowinfo.allocSize,
1357 Access::ReadOnly);
1358 }
1359
1360 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1362 local_inds_dualv_type::t_dev::const_type
1364 getLocalIndsViewDevice (const RowInfo& rowinfo) const
1365 {
1366 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1367 return typename local_inds_dualv_type::t_dev::const_type ();
1368 else
1369 return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1370 rowinfo.allocSize,
1371 Access::ReadOnly);
1372 }
1373
1374 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1376 global_inds_dualv_type::t_dev::const_type
1378 getGlobalIndsViewDevice (const RowInfo& rowinfo) const
1379 {
1380 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1381 return typename global_inds_dualv_type::t_dev::const_type ();
1382 else
1383 return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1384 rowinfo.allocSize,
1385 Access::ReadOnly);
1386 }
1387
1388
1389 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1390 RowInfo
1392 getRowInfo (const LocalOrdinal myRow) const
1393 {
1394 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1395 RowInfo ret;
1396 if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1397 ret.localRow = STINV;
1398 ret.allocSize = 0;
1399 ret.numEntries = 0;
1400 ret.offset1D = STINV;
1401 return ret;
1402 }
1403
1404 ret.localRow = static_cast<size_t> (myRow);
1405 if (this->indicesAreAllocated ()) {
1406 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1407 // Offsets tell us the allocation size in this case.
1408 if (rowPtrsUnpacked_host.extent (0) == 0) {
1409 ret.offset1D = 0;
1410 ret.allocSize = 0;
1411 }
1412 else {
1413 ret.offset1D = rowPtrsUnpacked_host(myRow);
1414 ret.allocSize = rowPtrsUnpacked_host(myRow+1) - rowPtrsUnpacked_host(myRow);
1415 }
1416
1417 ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1418 ret.allocSize :
1419 this->k_numRowEntries_(myRow);
1420 }
1421 else { // haven't performed allocation yet; probably won't hit this code
1422 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1423 // allocate, rather than doing lazy allocation at first insert.
1424 // This will make k_numAllocPerRow_ obsolete.
1425 ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1426 this->k_numAllocPerRow_(myRow) : // this is a host View
1427 this->numAllocForAllRows_;
1428 ret.numEntries = 0;
1429 ret.offset1D = STINV;
1430 }
1431
1432 return ret;
1433 }
1434
1435
1436 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1437 RowInfo
1439 getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1440 {
1441 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1442 RowInfo ret;
1443 if (this->rowMap_.is_null ()) {
1444 ret.localRow = STINV;
1445 ret.allocSize = 0;
1446 ret.numEntries = 0;
1447 ret.offset1D = STINV;
1448 return ret;
1449 }
1450 const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1451 if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1452 ret.localRow = STINV;
1453 ret.allocSize = 0;
1454 ret.numEntries = 0;
1455 ret.offset1D = STINV;
1456 return ret;
1457 }
1458
1459 ret.localRow = static_cast<size_t> (myRow);
1460 if (this->indicesAreAllocated ()) {
1461 // graph data structures have the info that we need
1462 //
1463 // if static graph, offsets tell us the allocation size
1464 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1465 if (rowPtrsUnpacked_host.extent (0) == 0) {
1466 ret.offset1D = 0;
1467 ret.allocSize = 0;
1468 }
1469 else {
1470 ret.offset1D = rowPtrsUnpacked_host(myRow);
1471 ret.allocSize = rowPtrsUnpacked_host(myRow+1) - rowPtrsUnpacked_host(myRow);
1472 }
1473
1474 ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1475 ret.allocSize :
1476 this->k_numRowEntries_(myRow);
1477 }
1478 else { // haven't performed allocation yet; probably won't hit this code
1479 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1480 // allocate, rather than doing lazy allocation at first insert.
1481 // This will make k_numAllocPerRow_ obsolete.
1482 ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1483 this->k_numAllocPerRow_(myRow) : // this is a host View
1484 this->numAllocForAllRows_;
1485 ret.numEntries = 0;
1486 ret.offset1D = STINV;
1487 }
1488
1489 return ret;
1490 }
1491
1492
1493 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1494 void
1496 staticAssertions () const
1497 {
1498 using Teuchos::OrdinalTraits;
1499 typedef LocalOrdinal LO;
1500 typedef GlobalOrdinal GO;
1501 typedef global_size_t GST;
1502
1503 // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1504 // This is so that we can store local indices in the memory
1505 // formerly occupied by global indices.
1506 static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1507 "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1508 // Assumption: max(size_t) >= max(LocalOrdinal)
1509 // This is so that we can represent any LocalOrdinal as a size_t.
1510 static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1511 "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1512 static_assert (sizeof(GST) >= sizeof(size_t),
1513 "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1514
1515 // FIXME (mfh 30 Sep 2015) We're not using
1516 // Teuchos::CompileTimeAssert any more. Can we do these checks
1517 // with static_assert?
1518
1519 // can't call max() with CompileTimeAssert, because it isn't a
1520 // constant expression; will need to make this a runtime check
1521 const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1522 "given template arguments: size assumptions are not valid.";
1523 TEUCHOS_TEST_FOR_EXCEPTION(
1524 static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1525 std::runtime_error, msg);
1526 TEUCHOS_TEST_FOR_EXCEPTION(
1527 static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1528 std::runtime_error, msg);
1529 TEUCHOS_TEST_FOR_EXCEPTION(
1530 static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1531 std::runtime_error, msg);
1532 TEUCHOS_TEST_FOR_EXCEPTION(
1533 Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1534 std::runtime_error, msg);
1535 }
1536
1537
1538 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1539 size_t
1541 insertIndices (RowInfo& rowinfo,
1542 const SLocalGlobalViews &newInds,
1543 const ELocalGlobal lg,
1544 const ELocalGlobal I)
1545 {
1546 using Teuchos::ArrayView;
1547 typedef LocalOrdinal LO;
1548 typedef GlobalOrdinal GO;
1549 const char tfecfFuncName[] = "insertIndices: ";
1550
1551 size_t oldNumEnt = 0;
1552 if (debug_) {
1553 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1554 (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1555 "lg must be either GlobalIndices or LocalIndices.");
1556 oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1557 }
1558
1559 size_t numNewInds = 0;
1560 if (lg == GlobalIndices) { // input indices are global
1561 ArrayView<const GO> new_ginds = newInds.ginds;
1562 numNewInds = new_ginds.size();
1563 if (I == GlobalIndices) { // store global indices
1564 auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1565 if (debug_) {
1566 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1567 (static_cast<size_t> (gind_view.size ()) <
1568 rowinfo.numEntries + numNewInds, std::logic_error,
1569 "gind_view.size() = " << gind_view.size ()
1570 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1571 << ") + numNewInds (= " << numNewInds << ").");
1572 }
1573 GO* const gblColInds_out = gind_view.data () + rowinfo.offset1D
1574 + rowinfo.numEntries;
1575 for (size_t k = 0; k < numNewInds; ++k) {
1576 gblColInds_out[k] = new_ginds[k];
1577 }
1578 }
1579 else if (I == LocalIndices) { // store local indices
1580 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1581 if (debug_) {
1582 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1583 (static_cast<size_t> (lind_view.size ()) <
1584 rowinfo.numEntries + numNewInds, std::logic_error,
1585 "lind_view.size() = " << lind_view.size ()
1586 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1587 << ") + numNewInds (= " << numNewInds << ").");
1588 }
1589 LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1590 + rowinfo.numEntries;
1591 for (size_t k = 0; k < numNewInds; ++k) {
1592 lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1593 }
1594 }
1595 }
1596 else if (lg == LocalIndices) { // input indices are local
1597 ArrayView<const LO> new_linds = newInds.linds;
1598 numNewInds = new_linds.size();
1599 if (I == LocalIndices) { // store local indices
1600 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1601 if (debug_) {
1602 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1603 (static_cast<size_t> (lind_view.size ()) <
1604 rowinfo.numEntries + numNewInds, std::logic_error,
1605 "lind_view.size() = " << lind_view.size ()
1606 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1607 << ") + numNewInds (= " << numNewInds << ").");
1608 }
1609 LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1610 + rowinfo.numEntries;
1611 for (size_t k = 0; k < numNewInds; ++k) {
1612 lclColInds_out[k] = new_linds[k];
1613 }
1614 }
1615 else if (I == GlobalIndices) {
1616 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1617 (true, std::logic_error, "The case where the input indices are local "
1618 "and the indices to write are global (lg=LocalIndices, I="
1619 "GlobalIndices) is not implemented, because it does not make sense."
1620 << std::endl << "If you have correct local column indices, that "
1621 "means the graph has a column Map. In that case, you should be "
1622 "storing local indices.");
1623 }
1624 }
1625
1626 rowinfo.numEntries += numNewInds;
1627 this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1628 this->setLocallyModified ();
1629
1630 if (debug_) {
1631 const size_t chkNewNumEnt =
1632 this->getNumEntriesInLocalRow (rowinfo.localRow);
1633 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1634 (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1635 "chkNewNumEnt = " << chkNewNumEnt
1636 << " != oldNumEnt (= " << oldNumEnt
1637 << ") + numNewInds (= " << numNewInds << ").");
1638 }
1639
1640 return numNewInds;
1641 }
1642
1643 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1644 size_t
1646 insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1647 const GlobalOrdinal inputGblColInds[],
1648 const size_t numInputInds)
1649 {
1650 return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1651 inputGblColInds, numInputInds);
1652 }
1653
1654 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1655 size_t
1657 insertGlobalIndicesImpl (const RowInfo& rowInfo,
1658 const GlobalOrdinal inputGblColInds[],
1659 const size_t numInputInds,
1660 std::function<void(const size_t, const size_t, const size_t)> fun)
1661 {
1663 using Kokkos::View;
1664 using Kokkos::subview;
1665 using Kokkos::MemoryUnmanaged;
1666 using Teuchos::ArrayView;
1667 using LO = LocalOrdinal;
1668 using GO = GlobalOrdinal;
1669 const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1670 const LO lclRow = static_cast<LO> (rowInfo.localRow);
1671
1672 auto numEntries = rowInfo.numEntries;
1673 using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1674 inp_view_type inputInds(inputGblColInds, numInputInds);
1675 size_t numInserted;
1676 {
1677 auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1678 numInserted = Details::insertCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1679 gblIndsHostView,
1680 numEntries, inputInds, fun);
1681 }
1682
1683 const bool insertFailed =
1684 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1685 if(insertFailed) {
1686 constexpr size_t ONE (1);
1687 const int myRank = this->getComm()->getRank();
1688 std::ostringstream os;
1689
1690 os << "Proc " << myRank << ": Not enough capacity to insert "
1691 << numInputInds
1692 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1693 << " into local row " << lclRow << ", which currently has "
1694 << rowInfo.numEntries
1695 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1696 << " and total allocation size " << rowInfo.allocSize
1697 << ". ";
1698 const size_t maxNumToPrint =
1700 ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1701 numInputInds);
1702 verbosePrintArray(os, inputGblColIndsView, "Input global "
1703 "column indices", maxNumToPrint);
1704 os << ", ";
1705 auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1706 ArrayView<const GO> curGblColIndsView(curGblColInds.data(),
1707 rowInfo.numEntries);
1708 verbosePrintArray(os, curGblColIndsView, "Current global "
1709 "column indices", maxNumToPrint);
1710 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1711 (true, std::runtime_error, os.str());
1712 }
1713
1714 this->k_numRowEntries_(lclRow) += numInserted;
1715
1716 this->setLocallyModified();
1717 return numInserted;
1718 }
1719
1720
1721 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1722 void
1724 insertLocalIndicesImpl (const LocalOrdinal myRow,
1725 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1726 std::function<void(const size_t, const size_t, const size_t)> fun)
1727 {
1728 using Kokkos::MemoryUnmanaged;
1729 using Kokkos::subview;
1730 using Kokkos::View;
1731 using LO = LocalOrdinal;
1732 const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1733
1734 const RowInfo rowInfo = this->getRowInfo(myRow);
1735
1736 size_t numNewInds = 0;
1737 size_t newNumEntries = 0;
1738
1739 auto numEntries = rowInfo.numEntries;
1740 // Note: Teuchos::ArrayViews are in HostSpace
1741 using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1742 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1743 size_t numInserted = 0;
1744 {
1745 auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1746 numInserted = Details::insertCrsIndices(myRow, this->getRowPtrsUnpackedHost(), lclInds,
1747 numEntries, inputInds, fun);
1748 }
1749
1750 const bool insertFailed =
1751 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1752 if(insertFailed) {
1753 constexpr size_t ONE (1);
1754 const size_t numInputInds(indices.size());
1755 const int myRank = this->getComm()->getRank();
1756 std::ostringstream os;
1757 os << "On MPI Process " << myRank << ": Not enough capacity to "
1758 "insert " << numInputInds
1759 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1760 << " into local row " << myRow << ", which currently has "
1761 << rowInfo.numEntries
1762 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1763 << " and total allocation size " << rowInfo.allocSize << ".";
1764 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1765 (true, std::runtime_error, os.str());
1766 }
1767 numNewInds = numInserted;
1768 newNumEntries = rowInfo.numEntries + numNewInds;
1769
1770 this->k_numRowEntries_(myRow) += numNewInds;
1771 this->setLocallyModified ();
1772
1773 if (debug_) {
1774 const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1775 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1776 (chkNewNumEntries != newNumEntries, std::logic_error,
1777 "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1778 << " != newNumEntries = " << newNumEntries
1779 << ". Please report this bug to the Tpetra developers.");
1780 }
1781 }
1782
1783 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1784 size_t
1786 findGlobalIndices(const RowInfo& rowInfo,
1787 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1788 std::function<void(const size_t, const size_t, const size_t)> fun) const
1789 {
1790 using GO = GlobalOrdinal;
1791 using Kokkos::View;
1792 using Kokkos::MemoryUnmanaged;
1793 auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1794
1795 using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1796 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1797
1798 size_t numFound = 0;
1799 LocalOrdinal lclRow = rowInfo.localRow;
1800 if (this->isLocallyIndexed())
1801 {
1802 if (this->colMap_.is_null())
1803 return invalidCount;
1804 const auto& colMap = *(this->colMap_);
1805 auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1806 numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1807 rowInfo.numEntries,
1808 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1809 }
1810 else if (this->isGloballyIndexed())
1811 {
1812 numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1813 rowInfo.numEntries,
1814 gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1815 }
1816 return numFound;
1817 }
1818
1819
1820 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1821 size_t
1823 sortAndMergeRowIndices (const RowInfo& rowInfo,
1824 const bool sorted,
1825 const bool merged)
1826 {
1827 const size_t origNumEnt = rowInfo.numEntries;
1828 if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
1829 origNumEnt != 0) {
1830 auto lclColInds = this->getLocalIndsViewHostNonConst (rowInfo);
1831
1832 LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
1833 if (! sorted) {
1834 std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
1835 }
1836
1837 if (! merged) {
1838 LocalOrdinal* const beg = lclColIndsRaw;
1839 LocalOrdinal* const end = beg + rowInfo.numEntries;
1840 LocalOrdinal* const newend = std::unique (beg, end);
1841 const size_t newNumEnt = newend - beg;
1842
1843 // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1844 this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1845 return origNumEnt - newNumEnt; // the number of duplicates in the row
1846 }
1847 else {
1848 return static_cast<size_t> (0); // assume no duplicates
1849 }
1850 }
1851 else {
1852 return static_cast<size_t> (0); // no entries in the row
1853 }
1854 }
1855
1856
1857 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1858 void
1860 setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
1861 const Teuchos::RCP<const map_type>& rangeMap)
1862 {
1863 // simple pointer comparison for equality
1864 if (domainMap_ != domainMap) {
1865 domainMap_ = domainMap;
1866 importer_ = Teuchos::null;
1867 }
1868 if (rangeMap_ != rangeMap) {
1869 rangeMap_ = rangeMap;
1870 exporter_ = Teuchos::null;
1871 }
1872 }
1873
1874
1875 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1876 void
1879 {
1880 const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1881
1882 globalNumEntries_ = INV;
1883 globalMaxNumRowEntries_ = INV;
1884 haveGlobalConstants_ = false;
1885 }
1886
1887
1888 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1889 void
1891 checkInternalState () const
1892 {
1893 if (debug_) {
1894 using std::endl;
1895 const char tfecfFuncName[] = "checkInternalState: ";
1896 const char suffix[] = " Please report this bug to the Tpetra developers.";
1897
1898 std::unique_ptr<std::string> prefix;
1899 if (verbose_) {
1900 prefix = this->createPrefix("CrsGraph", "checkInternalState");
1901 std::ostringstream os;
1902 os << *prefix << "Start" << endl;
1903 std::cerr << os.str();
1904 }
1905
1906 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
1907 //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1908 // check the internal state of this data structure
1909 // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1910 // always remains in a valid state
1911
1912 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1913 (this->rowMap_.is_null (), std::logic_error,
1914 "Row Map is null." << suffix);
1915 // This may access the row Map, so we need to check first (above)
1916 // whether the row Map is null.
1917 const LocalOrdinal lclNumRows =
1918 static_cast<LocalOrdinal> (this->getLocalNumRows ());
1919
1920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1921 (this->isFillActive () == this->isFillComplete (), std::logic_error,
1922 "Graph cannot be both fill active and fill complete." << suffix);
1923 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1924 (this->isFillComplete () &&
1925 (this->colMap_.is_null () ||
1926 this->rangeMap_.is_null () ||
1927 this->domainMap_.is_null ()),
1928 std::logic_error,
1929 "Graph is full complete, but at least one of {column, range, domain} "
1930 "Map is null." << suffix);
1931 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1932 (this->isStorageOptimized () && ! this->indicesAreAllocated (),
1933 std::logic_error, "Storage is optimized, but indices are not "
1934 "allocated, not even trivially." << suffix);
1935
1936 size_t nodeAllocSize = 0;
1937 try {
1938 nodeAllocSize = this->getLocalAllocationSize ();
1939 }
1940 catch (std::logic_error& e) {
1941 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1942 (true, std::runtime_error, "getLocalAllocationSize threw "
1943 "std::logic_error: " << e.what ());
1944 }
1945 catch (std::exception& e) {
1946 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1947 (true, std::runtime_error, "getLocalAllocationSize threw an "
1948 "std::exception: " << e.what ());
1949 }
1950 catch (...) {
1951 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1952 (true, std::runtime_error, "getLocalAllocationSize threw an exception "
1953 "not a subclass of std::exception.");
1954 }
1955
1956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1957 (this->isStorageOptimized () &&
1958 nodeAllocSize != this->getLocalNumEntries (),
1959 std::logic_error, "Storage is optimized, but "
1960 "this->getLocalAllocationSize() = " << nodeAllocSize
1961 << " != this->getLocalNumEntries() = " << this->getLocalNumEntries ()
1962 << "." << suffix);
1963 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1964 (! this->haveGlobalConstants_ &&
1965 (this->globalNumEntries_ != GSTI ||
1966 this->globalMaxNumRowEntries_ != GSTI),
1967 std::logic_error, "Graph claims not to have global constants, but "
1968 "some of the global constants are not marked as invalid." << suffix);
1969 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1970 (this->haveGlobalConstants_ &&
1971 (this->globalNumEntries_ == GSTI ||
1972 this->globalMaxNumRowEntries_ == GSTI),
1973 std::logic_error, "Graph claims to have global constants, but "
1974 "some of them are marked as invalid." << suffix);
1975 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1976 (this->haveGlobalConstants_ &&
1977 (this->globalNumEntries_ < this->getLocalNumEntries () ||
1978 this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
1979 std::logic_error, "Graph claims to have global constants, and "
1980 "all of the values of the global constants are valid, but "
1981 "some of the local constants are greater than "
1982 "their corresponding global constants." << suffix);
1983 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1984 (this->indicesAreAllocated () &&
1985 (this->numAllocForAllRows_ != 0 ||
1986 this->k_numAllocPerRow_.extent (0) != 0),
1987 std::logic_error, "The graph claims that its indices are allocated, but "
1988 "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
1989 "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
1990 "the graph is supposed to release its \"allocation specifications\" "
1991 "when it allocates its indices." << suffix);
1992 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1993 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
1994 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1995 (rowPtrsUnpacked_host.extent(0) != rowPtrsUnpacked_dev.extent(0),
1996 std::logic_error, "The host and device views of k_rowPtrs_ have "
1997 "different sizes; rowPtrsUnpacked_host_ has size "
1998 << rowPtrsUnpacked_host.extent(0)
1999 << ", but rowPtrsUnpacked_dev_ has size "
2000 << rowPtrsUnpacked_dev.extent(0)
2001 << "." << suffix);
2002 if (isGloballyIndexed() && rowPtrsUnpacked_host.extent(0) != 0) {
2003 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2004 (size_t(rowPtrsUnpacked_host.extent(0)) != size_t(lclNumRows + 1),
2005 std::logic_error, "The graph is globally indexed and "
2006 "k_rowPtrs has nonzero size " << rowPtrsUnpacked_host.extent(0)
2007 << ", but that size does not equal lclNumRows+1 = "
2008 << (lclNumRows+1) << "." << suffix);
2009 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2010 (rowPtrsUnpacked_host(lclNumRows) != size_t(gblInds_wdv.extent(0)),
2011 std::logic_error, "The graph is globally indexed and "
2012 "k_rowPtrs_ has nonzero size " << rowPtrsUnpacked_host.extent(0)
2013 << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2014 << rowPtrsUnpacked_host(lclNumRows)
2015 << " != gblInds_wdv.extent(0)="
2016 << gblInds_wdv.extent(0) << "." << suffix);
2017 }
2018 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2019 (this->isLocallyIndexed () &&
2020 rowPtrsUnpacked_host.extent (0) != 0 &&
2021 (static_cast<size_t> (rowPtrsUnpacked_host.extent (0)) !=
2022 static_cast<size_t> (lclNumRows + 1) ||
2023 rowPtrsUnpacked_host(lclNumRows) !=
2024 static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0))),
2025 std::logic_error, "If k_rowPtrs_ has nonzero size and "
2026 "the graph is locally indexed, then "
2027 "k_rowPtrs_ must have N+1 rows, and "
2028 "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)." << suffix);
2029
2030 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2031 (this->indicesAreAllocated () &&
2032 nodeAllocSize > 0 &&
2033 this->lclIndsUnpacked_wdv.extent (0) == 0 &&
2034 this->gblInds_wdv.extent (0) == 0,
2035 std::logic_error, "Graph is allocated nontrivially, but "
2036 "but 1-D allocations are not present." << suffix);
2037
2038 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2039 (! this->indicesAreAllocated () &&
2040 ((rowPtrsUnpacked_host.extent (0) != 0 ||
2041 this->k_numRowEntries_.extent (0) != 0) ||
2042 this->lclIndsUnpacked_wdv.extent (0) != 0 ||
2043 this->gblInds_wdv.extent (0) != 0),
2044 std::logic_error, "If indices are not allocated, "
2045 "then none of the buffers should be." << suffix);
2046 // indices may be local or global only if they are allocated
2047 // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2048 // indicesAreGlobal_)
2049 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2050 ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2051 ! this->indicesAreAllocated_,
2052 std::logic_error, "Indices may be local or global only if they are "
2053 "allocated." << suffix);
2054 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2055 (this->indicesAreLocal_ && this->indicesAreGlobal_,
2056 std::logic_error, "Indices may not be both local and global." << suffix);
2057 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2058 (indicesAreLocal_ && gblInds_wdv.extent (0) != 0,
2059 std::logic_error, "Indices are local, but "
2060 "gblInds_wdv.extent(0) (= " << gblInds_wdv.extent (0)
2061 << ") != 0. In other words, if indices are local, then "
2062 "allocations of global indices should not be present."
2063 << suffix);
2064 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2065 (indicesAreGlobal_ && lclIndsUnpacked_wdv.extent (0) != 0,
2066 std::logic_error, "Indices are global, but "
2067 "lclIndsUnpacked_wdv.extent(0) (= " << lclIndsUnpacked_wdv.extent(0)
2068 << ") != 0. In other words, if indices are global, "
2069 "then allocations for local indices should not be present."
2070 << suffix);
2071 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2072 (indicesAreLocal_ && nodeAllocSize > 0 &&
2073 lclIndsUnpacked_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2074 std::logic_error, "Indices are local and "
2075 "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2076 "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
2077 << getLocalNumRows () << " > 0." << suffix);
2078 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2079 (indicesAreGlobal_ && nodeAllocSize > 0 &&
2080 gblInds_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2081 std::logic_error, "Indices are global and "
2082 "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2083 "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
2084 << getLocalNumRows () << " > 0." << suffix);
2085 // check the actual allocations
2086 if (this->indicesAreAllocated () &&
2087 rowPtrsUnpacked_host.extent (0) != 0) {
2088 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2089 (static_cast<size_t> (rowPtrsUnpacked_host.extent (0)) !=
2090 this->getLocalNumRows () + 1,
2091 std::logic_error, "Indices are allocated and "
2092 "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
2093 << rowPtrsUnpacked_host.extent (0) << " != getLocalNumRows()+1 = "
2094 << (this->getLocalNumRows () + 1) << "." << suffix);
2095 const size_t actualNumAllocated =
2096 rowPtrsUnpacked_host(this->getLocalNumRows());
2097 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2098 (this->isLocallyIndexed () &&
2099 static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0)) != actualNumAllocated,
2100 std::logic_error, "Graph is locally indexed, indices are "
2101 "are allocated, and k_rowPtrs_ has nonzero length, but "
2102 "lclIndsUnpacked_wdv.extent(0) = " << this->lclIndsUnpacked_wdv.extent (0)
2103 << " != actualNumAllocated = " << actualNumAllocated << suffix);
2104 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2105 (this->isGloballyIndexed () &&
2106 static_cast<size_t> (this->gblInds_wdv.extent (0)) != actualNumAllocated,
2107 std::logic_error, "Graph is globally indexed, indices "
2108 "are allocated, and k_rowPtrs_ has nonzero length, but "
2109 "gblInds_wdv.extent(0) = " << this->gblInds_wdv.extent (0)
2110 << " != actualNumAllocated = " << actualNumAllocated << suffix);
2111 }
2112
2113 if (verbose_) {
2114 std::ostringstream os;
2115 os << *prefix << "Done" << endl;
2116 std::cerr << os.str();
2117 }
2118 }
2119 }
2120
2121
2122 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2123 size_t
2125 getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2126 {
2127 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2128 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2129 return Teuchos::OrdinalTraits<size_t>::invalid ();
2130 }
2131 else {
2132 return rowInfo.numEntries;
2133 }
2134 }
2135
2136
2137 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2138 size_t
2140 getNumEntriesInLocalRow (LocalOrdinal localRow) const
2141 {
2142 const RowInfo rowInfo = this->getRowInfo (localRow);
2143 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2144 return Teuchos::OrdinalTraits<size_t>::invalid ();
2145 }
2146 else {
2147 return rowInfo.numEntries;
2148 }
2149 }
2150
2151
2152 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2153 size_t
2155 getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2156 {
2157 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2158 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2159 return Teuchos::OrdinalTraits<size_t>::invalid ();
2160 }
2161 else {
2162 return rowInfo.allocSize;
2163 }
2164 }
2165
2166
2167 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2168 size_t
2170 getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2171 {
2172 const RowInfo rowInfo = this->getRowInfo (localRow);
2173 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2174 return Teuchos::OrdinalTraits<size_t>::invalid ();
2175 }
2176 else {
2177 return rowInfo.allocSize;
2178 }
2179 }
2180
2181
2182 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2183 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2189
2190 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2191 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2197
2198
2199 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2200 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2202 getLocalIndicesHost () const
2203 {
2204 return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2205 }
2206
2207 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2211 {
2212 return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2213 }
2214
2215 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2216 void
2218 getLocalRowCopy (LocalOrdinal localRow,
2219 nonconst_local_inds_host_view_type & indices,
2220 size_t& numEntries) const
2221 {
2222 using Teuchos::ArrayView;
2223 const char tfecfFuncName[] = "getLocalRowCopy: ";
2224
2225 TEUCHOS_TEST_FOR_EXCEPTION(
2226 isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2227 "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2228 "does not have a column Map yet. That means we don't have local indices "
2229 "for columns yet, so it doesn't make sense to call this method. If the "
2230 "graph doesn't have a column Map yet, you should call fillComplete on "
2231 "it first.");
2232
2233 // This does the right thing (reports an empty row) if the input
2234 // row is invalid.
2235 const RowInfo rowinfo = this->getRowInfo (localRow);
2236 // No side effects on error.
2237 const size_t theNumEntries = rowinfo.numEntries;
2238 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2239 (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2240 "Specified storage (size==" << indices.size () << ") does not suffice "
2241 "to hold all " << theNumEntries << " entry/ies for this row.");
2242 numEntries = theNumEntries;
2243
2244 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2245 if (isLocallyIndexed ()) {
2246 auto lclInds = getLocalIndsViewHost(rowinfo);
2247 for (size_t j = 0; j < theNumEntries; ++j) {
2248 indices[j] = lclInds(j);
2249 }
2250 }
2251 else if (isGloballyIndexed ()) {
2252 auto gblInds = getGlobalIndsViewHost(rowinfo);
2253 for (size_t j = 0; j < theNumEntries; ++j) {
2254 indices[j] = colMap_->getLocalElement (gblInds(j));
2255 }
2256 }
2257 }
2258 }
2259
2260
2261 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2262 void
2264 getGlobalRowCopy (GlobalOrdinal globalRow,
2265 nonconst_global_inds_host_view_type &indices,
2266 size_t& numEntries) const
2267 {
2268 using Teuchos::ArrayView;
2269 const char tfecfFuncName[] = "getGlobalRowCopy: ";
2270
2271 // This does the right thing (reports an empty row) if the input
2272 // row is invalid.
2273 const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2274 const size_t theNumEntries = rowinfo.numEntries;
2275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2276 static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2277 "Specified storage (size==" << indices.size () << ") does not suffice "
2278 "to hold all " << theNumEntries << " entry/ies for this row.");
2279 numEntries = theNumEntries; // first side effect
2280
2281 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2282 if (isLocallyIndexed ()) {
2283 auto lclInds = getLocalIndsViewHost(rowinfo);
2284 for (size_t j = 0; j < theNumEntries; ++j) {
2285 indices[j] = colMap_->getGlobalElement (lclInds(j));
2286 }
2287 }
2288 else if (isGloballyIndexed ()) {
2289 auto gblInds = getGlobalIndsViewHost(rowinfo);
2290 for (size_t j = 0; j < theNumEntries; ++j) {
2291 indices[j] = gblInds(j);
2292 }
2293 }
2294 }
2295 }
2296
2297
2298 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2299 void
2302 const LocalOrdinal localRow,
2303 local_inds_host_view_type &indices) const
2304 {
2305 const char tfecfFuncName[] = "getLocalRowView: ";
2306
2307 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2308 (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2309 "currently stored as global indices, so we cannot return a view with "
2310 "local column indices, whether or not the graph has a column Map. If "
2311 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2312
2313 const RowInfo rowInfo = getRowInfo (localRow);
2314 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2315 rowInfo.numEntries > 0) {
2316 indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2317 rowInfo.numEntries,
2318 Access::ReadOnly);
2319 }
2320 else {
2321 // This does the right thing (reports an empty row) if the input
2322 // row is invalid.
2323 indices = local_inds_host_view_type();
2324 }
2325
2326 if (debug_) {
2327 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2328 (static_cast<size_t> (indices.size ()) !=
2329 getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2330 "= " << indices.extent(0) << " != getNumEntriesInLocalRow(localRow=" <<
2331 localRow << ") = " << getNumEntriesInLocalRow(localRow) <<
2332 ". Please report this bug to the Tpetra developers.");
2333 }
2334 }
2335
2336
2337 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2338 void
2341 const GlobalOrdinal globalRow,
2342 global_inds_host_view_type &indices) const
2343 {
2344 const char tfecfFuncName[] = "getGlobalRowView: ";
2345
2346 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2347 (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2348 "currently stored as local indices, so we cannot return a view with "
2349 "global column indices. Use getGlobalRowCopy() instead.");
2350
2351 // This does the right thing (reports an empty row) if the input
2352 // row is invalid.
2353 const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2354 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2355 rowInfo.numEntries > 0) {
2356 indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2357 rowInfo.numEntries,
2358 Access::ReadOnly);
2359 }
2360 else {
2361 indices = typename global_inds_dualv_type::t_host::const_type();
2362 }
2363 if (debug_) {
2364 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2365 (static_cast<size_t> (indices.size ()) !=
2366 getNumEntriesInGlobalRow (globalRow),
2367 std::logic_error, "indices.size() = " << indices.extent(0)
2368 << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2369 << getNumEntriesInGlobalRow (globalRow)
2370 << ". Please report this bug to the Tpetra developers.");
2371 }
2372 }
2373
2374
2375 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2376 void
2378 insertLocalIndices (const LocalOrdinal localRow,
2379 const Teuchos::ArrayView<const LocalOrdinal>& indices)
2380 {
2381 const char tfecfFuncName[] = "insertLocalIndices: ";
2382
2383 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2384 (! isFillActive (), std::runtime_error, "Fill must be active.");
2385 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2386 (isGloballyIndexed (), std::runtime_error,
2387 "Graph indices are global; use insertGlobalIndices().");
2388 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2389 (! hasColMap (), std::runtime_error,
2390 "Cannot insert local indices without a column Map.");
2391 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2392 (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2393 "Local row index " << localRow << " is not in the row Map "
2394 "on the calling process.");
2395 if (! indicesAreAllocated ()) {
2396 allocateIndices (LocalIndices, verbose_);
2397 }
2398
2399 if (debug_) {
2400 // In debug mode, if the graph has a column Map, test whether any
2401 // of the given column indices are not in the column Map. Keep
2402 // track of the invalid column indices so we can tell the user
2403 // about them.
2404 if (hasColMap ()) {
2405 using Teuchos::Array;
2406 using Teuchos::toString;
2407 using std::endl;
2408 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2409
2410 const map_type& colMap = *colMap_;
2411 Array<LocalOrdinal> badColInds;
2412 bool allInColMap = true;
2413 for (size_type k = 0; k < indices.size (); ++k) {
2414 if (! colMap.isNodeLocalElement (indices[k])) {
2415 allInColMap = false;
2416 badColInds.push_back (indices[k]);
2417 }
2418 }
2419 if (! allInColMap) {
2420 std::ostringstream os;
2421 os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2422 "entries in owned row " << localRow << ", at the following column "
2423 "indices: " << toString (indices) << "." << endl;
2424 os << "Of those, the following indices are not in the column Map on "
2425 "this process: " << toString (badColInds) << "." << endl << "Since "
2426 "the graph has a column Map already, it is invalid to insert entries "
2427 "at those locations.";
2428 TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2429 }
2430 }
2431 }
2432
2433 insertLocalIndicesImpl (localRow, indices);
2434
2435 if (debug_) {
2436 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2437 (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2438 "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2439 "! isLocallyIndexed() is true. Please report this bug to the "
2440 "Tpetra developers.");
2441 }
2442 }
2443
2444 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2445 void
2447 insertLocalIndices (const LocalOrdinal localRow,
2448 const LocalOrdinal numEnt,
2449 const LocalOrdinal inds[])
2450 {
2451 Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2452 this->insertLocalIndices (localRow, indsT);
2453 }
2454
2455
2456 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2457 void
2459 insertGlobalIndices (const GlobalOrdinal gblRow,
2460 const LocalOrdinal numInputInds,
2461 const GlobalOrdinal inputGblColInds[])
2462 {
2463 typedef LocalOrdinal LO;
2464 const char tfecfFuncName[] = "insertGlobalIndices: ";
2465
2466 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2467 (this->isLocallyIndexed (), std::runtime_error,
2468 "graph indices are local; use insertLocalIndices().");
2469 // This can't really be satisfied for now, because if we are
2470 // fillComplete(), then we are local. In the future, this may
2471 // change. However, the rule that modification require active
2472 // fill will not change.
2473 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2474 (! this->isFillActive (), std::runtime_error,
2475 "You are not allowed to call this method if fill is not active. "
2476 "If fillComplete has been called, you must first call resumeFill "
2477 "before you may insert indices.");
2478 if (! indicesAreAllocated ()) {
2479 allocateIndices (GlobalIndices, verbose_);
2480 }
2481 const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2482 if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2483 if (debug_) {
2484 if (this->hasColMap ()) {
2485 using std::endl;
2486 const map_type& colMap = * (this->colMap_);
2487 // In a debug build, keep track of the nonowned ("bad") column
2488 // indices, so that we can display them in the exception
2489 // message. In a release build, just ditch the loop early if
2490 // we encounter a nonowned column index.
2491 std::vector<GlobalOrdinal> badColInds;
2492 bool allInColMap = true;
2493 for (LO k = 0; k < numInputInds; ++k) {
2494 if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2495 allInColMap = false;
2496 badColInds.push_back (inputGblColInds[k]);
2497 }
2498 }
2499 if (! allInColMap) {
2500 std::ostringstream os;
2501 os << "You attempted to insert entries in owned row " << gblRow
2502 << ", at the following column indices: [";
2503 for (LO k = 0; k < numInputInds; ++k) {
2504 os << inputGblColInds[k];
2505 if (k + static_cast<LO> (1) < numInputInds) {
2506 os << ",";
2507 }
2508 }
2509 os << "]." << endl << "Of those, the following indices are not in "
2510 "the column Map on this process: [";
2511 for (size_t k = 0; k < badColInds.size (); ++k) {
2512 os << badColInds[k];
2513 if (k + size_t (1) < badColInds.size ()) {
2514 os << ",";
2515 }
2516 }
2517 os << "]." << endl << "Since the matrix has a column Map already, "
2518 "it is invalid to insert entries at those locations.";
2519 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2520 (true, std::invalid_argument, os.str ());
2521 }
2522 }
2523 } // debug_
2524 this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2525 }
2526 else { // a nonlocal row
2527 this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2528 numInputInds);
2529 }
2530 }
2531
2532
2533 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2534 void
2536 insertGlobalIndices (const GlobalOrdinal gblRow,
2537 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2538 {
2539 this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2540 inputGblColInds.getRawPtr ());
2541 }
2542
2543
2544 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2545 void
2547 insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2548 const GlobalOrdinal gblColInds[],
2549 const LocalOrdinal numGblColInds)
2550 {
2551 typedef LocalOrdinal LO;
2552 typedef GlobalOrdinal GO;
2553 const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2554
2555 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2556 (this->isLocallyIndexed (), std::runtime_error,
2557 "Graph indices are local; use insertLocalIndices().");
2558 // This can't really be satisfied for now, because if we are
2559 // fillComplete(), then we are local. In the future, this may
2560 // change. However, the rule that modification require active
2561 // fill will not change.
2562 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2563 (! this->isFillActive (), std::runtime_error,
2564 "You are not allowed to call this method if fill is not active. "
2565 "If fillComplete has been called, you must first call resumeFill "
2566 "before you may insert indices.");
2567 if (! indicesAreAllocated ()) {
2568 allocateIndices (GlobalIndices, verbose_);
2569 }
2570
2571 Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2572 // If we have a column Map, use it to filter the entries.
2573 if (! colMap_.is_null ()) {
2574 const map_type& colMap = * (this->colMap_);
2575
2576 LO curOffset = 0;
2577 while (curOffset < numGblColInds) {
2578 // Find a sequence of input indices that are in the column Map
2579 // on the calling process. Doing a sequence at a time,
2580 // instead of one at a time, amortizes some overhead.
2581 LO endOffset = curOffset;
2582 for ( ; endOffset < numGblColInds; ++endOffset) {
2583 const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2584 if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2585 break; // first entry, in current sequence, not in the column Map
2586 }
2587 }
2588 // curOffset, endOffset: half-exclusive range of indices in
2589 // the column Map on the calling process. If endOffset ==
2590 // curOffset, the range is empty.
2591 const LO numIndInSeq = (endOffset - curOffset);
2592 if (numIndInSeq != 0) {
2593 this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2594 numIndInSeq);
2595 }
2596 // Invariant before this line: Either endOffset ==
2597 // numGblColInds, or gblColInds[endOffset] is not in the
2598 // column Map on the calling process.
2599 curOffset = endOffset + 1;
2600 }
2601 }
2602 else {
2603 this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2604 gblColInds_av.size ());
2605 }
2606 }
2607
2608 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2609 void
2611 insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2612 const GlobalOrdinal gblColInds[],
2613 const LocalOrdinal numGblColInds)
2614 {
2615 // This creates the std::vector if it doesn't exist yet.
2616 // std::map's operator[] does a lookup each time, so it's better
2617 // to pull nonlocals_[grow] out of the loop.
2618 std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2619 for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2620 // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2621 // order to avoid duplicates. globalAssemble() sorts these
2622 // anyway.
2623 nonlocalRow.push_back (gblColInds[k]);
2624 }
2625 }
2626
2627 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2628 void
2630 removeLocalIndices (LocalOrdinal lrow)
2631 {
2632 const char tfecfFuncName[] = "removeLocalIndices: ";
2633 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2634 ! isFillActive (), std::runtime_error, "requires that fill is active.");
2635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2636 isStorageOptimized (), std::runtime_error,
2637 "cannot remove indices after optimizeStorage() has been called.");
2638 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2639 isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2640 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2641 ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2642 "Local row " << lrow << " is not in the row Map on the calling process.");
2643 if (! indicesAreAllocated ()) {
2644 allocateIndices (LocalIndices, verbose_);
2645 }
2646
2647 if (k_numRowEntries_.extent (0) != 0) {
2648 this->k_numRowEntries_(lrow) = 0;
2649 }
2650
2651 if (debug_) {
2652 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2653 (getNumEntriesInLocalRow (lrow) != 0 ||
2654 ! indicesAreAllocated () ||
2655 ! isLocallyIndexed (), std::logic_error,
2656 "Violated stated post-conditions. Please contact Tpetra team.");
2657 }
2658 }
2659
2660
2661 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2662 void
2664 setAllIndices (const typename local_graph_device_type::row_map_type& rowPointers,
2665 const typename local_graph_device_type::entries_type::non_const_type& columnIndices)
2666 {
2667 using ProfilingRegion=Details::ProfilingRegion;
2668 ProfilingRegion region ("Tpetra::CrsGraph::setAllIndices");
2669 const char tfecfFuncName[] = "setAllIndices: ";
2670 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2671 ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2672 "The graph must have a column Map before you may call this method.");
2673 LocalOrdinal numLocalRows = this->getLocalNumRows ();
2674 {
2675 LocalOrdinal rowPtrLen = rowPointers.size();
2676 if(numLocalRows == 0) {
2677 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2678 rowPtrLen != 0 && rowPtrLen != 1,
2679 std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2680 }
2681 else {
2682 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2683 rowPtrLen != numLocalRows + 1,
2684 std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2685 " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2686 }
2687 }
2688
2689 if(debug_) {
2690 using exec_space = typename local_graph_device_type::execution_space;
2691 int columnsOutOfBounds = 0;
2692 local_ordinal_type numLocalCols = this->getLocalNumCols();
2693 Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2694 KOKKOS_LAMBDA (const LocalOrdinal i, int& lOutOfBounds)
2695 {
2696 if(columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2697 lOutOfBounds++;
2698 }, columnsOutOfBounds);
2699 int globalColsOutOfBounds= 0;
2700 auto comm = this->getComm();
2701 Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2702 Teuchos::outArg (globalColsOutOfBounds));
2703 if (globalColsOutOfBounds)
2704 {
2705 std::string message;
2706 if (columnsOutOfBounds)
2707 {
2708 //Only print message from ranks with the problem
2709 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2710 }
2711 Details::gathervPrint(std::cout, message, *comm);
2712 throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2713 }
2714 }
2715
2716 if (debug_ && this->isSorted()) {
2717 // Verify that the local indices are actually sorted
2718 int notSorted = 0;
2719 using exec_space = typename local_graph_device_type::execution_space;
2720 using size_type = typename local_graph_device_type::size_type;
2721 Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2722 KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2723 {
2724 size_type rowBegin = rowPointers(i);
2725 size_type rowEnd = rowPointers(i + 1);
2726 for(size_type j = rowBegin + 1; j < rowEnd; j++)
2727 {
2728 if(columnIndices(j - 1) > columnIndices(j))
2729 {
2730 lNotSorted = 1;
2731 }
2732 }
2733 }, notSorted);
2734 //All-reduce notSorted to avoid rank divergence
2735 int globalNotSorted = 0;
2736 auto comm = this->getComm();
2737 Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2738 Teuchos::outArg (globalNotSorted));
2739 if (globalNotSorted)
2740 {
2741 std::string message;
2742 if (notSorted)
2743 {
2744 //Only print message from ranks with the problem
2745 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2746 }
2747 Details::gathervPrint(std::cout, message, *comm);
2748 throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2749 }
2750 }
2751
2752 indicesAreAllocated_ = true;
2753 indicesAreLocal_ = true;
2754 indicesAreSorted_ = true;
2755 noRedundancies_ = true;
2756 lclIndsPacked_wdv= local_inds_wdv_type(columnIndices);
2758 setRowPtrs(rowPointers);
2759
2760 set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2761
2762 // Storage MUST be packed, since the interface doesn't give any
2763 // way to indicate any extra space at the end of each row.
2764 storageStatus_ = Details::STORAGE_1D_PACKED;
2765
2766 // These normally get cleared out at the end of allocateIndices.
2767 // It makes sense to clear them out here, because at the end of
2768 // this method, the graph is allocated on the calling process.
2770 k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
2771
2773 }
2774
2775
2776 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2777 void
2779 setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
2780 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
2781 {
2782 using Kokkos::View;
2783 typedef typename local_graph_device_type::row_map_type row_map_type;
2784 typedef typename row_map_type::array_layout layout_type;
2785 typedef typename row_map_type::non_const_value_type row_offset_type;
2786 typedef View<size_t*, layout_type , Kokkos::HostSpace,
2787 Kokkos::MemoryUnmanaged> input_view_type;
2788 typedef typename row_map_type::non_const_type nc_row_map_type;
2789
2790 const size_t size = static_cast<size_t> (rowPointers.size ());
2791 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2792 input_view_type ptr_in (rowPointers.getRawPtr (), size);
2793
2794 nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
2795
2796 // FIXME get rid of the else-clause when the minimum CXX standard required is bumped to C++17
2797#ifdef KOKKOS_ENABLE_CXX17
2798 if constexpr (same) { // size_t == row_offset_type
2799 using lexecution_space = typename device_type::execution_space;
2800 Kokkos::deep_copy (lexecution_space(),
2801 ptr_rot,
2802 ptr_in);
2803 }
2804#else
2805 if (same) { // size_t == row_offset_type
2806 // This compile-time logic ensures that the compiler never sees
2807 // an assignment of View<row_offset_type*, ...> to View<size_t*,
2808 // ...> unless size_t == row_offset_type.
2809 input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
2810 // DEEP_COPY REVIEW - HOST-TO-DEVICE
2811 Kokkos::deep_copy (execution_space(),
2812 Kokkos::Impl::if_c<same,
2813 nc_row_map_type,
2814 input_view_type>::select (ptr_rot, ptr_decoy),
2815 ptr_in);
2816 }
2817#endif
2818 else { // size_t != row_offset_type
2819 // CudaUvmSpace != HostSpace, so this will be false in that case.
2820 constexpr bool inHostMemory =
2821 std::is_same<typename row_map_type::memory_space,
2822 Kokkos::HostSpace>::value;
2823 if (inHostMemory) {
2824 // Copy (with cast from size_t to row_offset_type, with bounds
2825 // checking if necessary) to ptr_rot.
2826 ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
2827 }
2828 else { // Copy input row offsets to device first.
2829 //
2830 // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2831 // execution space would avoid the double copy.
2832 //
2833 View<size_t*, layout_type, device_type> ptr_st ("Tpetra::CrsGraph::ptr", size);
2834
2835 // DEEP_COPY REVIEW - NOT TESTED
2836 Kokkos::deep_copy (ptr_st, ptr_in);
2837 // Copy on device (casting from size_t to row_offset_type,
2838 // with bounds checking if necessary) to ptr_rot. This
2839 // executes in the output View's execution space, which is the
2840 // same as execution_space.
2841 ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
2842 }
2843 }
2844
2845 Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2846 Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
2847 setAllIndices (ptr_rot, k_ind);
2848 }
2849
2850
2851 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2852 void
2855 {
2856 using Teuchos::Comm;
2857 using Teuchos::outArg;
2858 using Teuchos::RCP;
2859 using Teuchos::rcp;
2860 using Teuchos::REDUCE_MAX;
2861 using Teuchos::REDUCE_MIN;
2862 using Teuchos::reduceAll;
2863 using std::endl;
2864 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2865 using LO = local_ordinal_type;
2866 using GO = global_ordinal_type;
2867 using size_type = typename Teuchos::Array<GO>::size_type;
2868 const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2869
2870 std::unique_ptr<std::string> prefix;
2871 if (verbose_) {
2872 prefix = this->createPrefix("CrsGraph", "globalAssemble");
2873 std::ostringstream os;
2874 os << *prefix << "Start" << endl;
2875 std::cerr << os.str();
2876 }
2877 RCP<const Comm<int> > comm = getComm ();
2878
2879 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2880 (! isFillActive (), std::runtime_error, "Fill must be active before "
2881 "you may call this method.");
2882
2883 const size_t myNumNonlocalRows = this->nonlocals_.size ();
2884
2885 // If no processes have nonlocal rows, then we don't have to do
2886 // anything. Checking this is probably cheaper than constructing
2887 // the Map of nonlocal rows (see below) and noticing that it has
2888 // zero global entries.
2889 {
2890 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2891 int someoneHasNonlocalRows = 0;
2892 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
2893 outArg (someoneHasNonlocalRows));
2894 if (someoneHasNonlocalRows == 0) {
2895 if (verbose_) {
2896 std::ostringstream os;
2897 os << *prefix << "Done: No nonlocal rows" << endl;
2898 std::cerr << os.str();
2899 }
2900 return;
2901 }
2902 else if (verbose_) {
2903 std::ostringstream os;
2904 os << *prefix << "At least 1 process has nonlocal rows"
2905 << endl;
2906 std::cerr << os.str();
2907 }
2908 }
2909
2910 // 1. Create a list of the "nonlocal" rows on each process. this
2911 // requires iterating over nonlocals_, so while we do this,
2912 // deduplicate the entries and get a count for each nonlocal
2913 // row on this process.
2914 // 2. Construct a new row Map corresponding to those rows. This
2915 // Map is likely overlapping. We know that the Map is not
2916 // empty on all processes, because the above all-reduce and
2917 // return exclude that case.
2918
2919 RCP<const map_type> nonlocalRowMap;
2920 // Keep this for CrsGraph's constructor.
2921 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
2922 {
2923 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
2924 size_type curPos = 0;
2925 for (auto mapIter = this->nonlocals_.begin ();
2926 mapIter != this->nonlocals_.end ();
2927 ++mapIter, ++curPos) {
2928 myNonlocalGblRows[curPos] = mapIter->first;
2929 std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2930 std::sort (gblCols.begin (), gblCols.end ());
2931 auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
2932 gblCols.erase (vecLast, gblCols.end ());
2933 numEntPerNonlocalRow[curPos] = gblCols.size ();
2934 }
2935
2936 // Currently, Map requires that its indexBase be the global min
2937 // of all its global indices. Map won't compute this for us, so
2938 // we must do it. If our process has no nonlocal rows, set the
2939 // "min" to the max possible GO value. This ensures that if
2940 // some process has at least one nonlocal row, then it will pick
2941 // that up as the min. We know that at least one process has a
2942 // nonlocal row, since the all-reduce and return at the top of
2943 // this method excluded that case.
2944 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
2945 {
2946 auto iter = std::min_element (myNonlocalGblRows.begin (),
2947 myNonlocalGblRows.end ());
2948 if (iter != myNonlocalGblRows.end ()) {
2949 myMinNonlocalGblRow = *iter;
2950 }
2951 }
2952 GO gblMinNonlocalGblRow = 0;
2953 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
2954 outArg (gblMinNonlocalGblRow));
2955 const GO indexBase = gblMinNonlocalGblRow;
2956 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2957 nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
2958 }
2959
2960 if (verbose_) {
2961 std::ostringstream os;
2962 os << *prefix << "nonlocalRowMap->getIndexBase()="
2963 << nonlocalRowMap->getIndexBase() << endl;
2964 std::cerr << os.str();
2965 }
2966
2967 // 3. Use the column indices for each nonlocal row, as stored in
2968 // nonlocals_, to construct a CrsGraph corresponding to
2969 // nonlocal rows. We need, but we have, exact counts of the
2970 // number of entries in each nonlocal row.
2971
2972 RCP<crs_graph_type> nonlocalGraph =
2973 rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
2974 {
2975 size_type curPos = 0;
2976 for (auto mapIter = this->nonlocals_.begin ();
2977 mapIter != this->nonlocals_.end ();
2978 ++mapIter, ++curPos) {
2979 const GO gblRow = mapIter->first;
2980 std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
2981 const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
2982 nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
2983 }
2984 }
2985 if (verbose_) {
2986 std::ostringstream os;
2987 os << *prefix << "Built nonlocal graph" << endl;
2988 std::cerr << os.str();
2989 }
2990 // There's no need to fill-complete the nonlocals graph.
2991 // We just use it as a temporary container for the Export.
2992
2993 // 4. If the original row Map is one to one, then we can Export
2994 // directly from nonlocalGraph into this. Otherwise, we have
2995 // to create a temporary graph with a one-to-one row Map,
2996 // Export into that, then Import from the temporary graph into
2997 // *this.
2998
2999 auto origRowMap = this->getRowMap ();
3000 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3001
3002 if (origRowMapIsOneToOne) {
3003 if (verbose_) {
3004 std::ostringstream os;
3005 os << *prefix << "Original row Map is 1-to-1" << endl;
3006 std::cerr << os.str();
3007 }
3008 export_type exportToOrig (nonlocalRowMap, origRowMap);
3009 this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3010 // We're done at this point!
3011 }
3012 else {
3013 if (verbose_) {
3014 std::ostringstream os;
3015 os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3016 std::cerr << os.str();
3017 }
3018 // If you ask a Map whether it is one to one, it does some
3019 // communication and stashes intermediate results for later use
3020 // by createOneToOne. Thus, calling createOneToOne doesn't cost
3021 // much more then the original cost of calling isOneToOne.
3022 auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3023 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3024
3025 // Create a temporary graph with the one-to-one row Map.
3026 //
3027 // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3028 // row, to avoid reallocation during the Export operation.
3029 crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3030
3031 // Export from graph of nonlocals into the temp one-to-one graph.
3032 if (verbose_) {
3033 std::ostringstream os;
3034 os << *prefix << "Export nonlocal graph" << endl;
3035 std::cerr << os.str();
3036 }
3037 oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3038
3039 // We don't need the graph of nonlocals anymore, so get rid of
3040 // it, to keep the memory high-water mark down.
3041 nonlocalGraph = Teuchos::null;
3042
3043 // Import from the one-to-one graph to the original graph.
3044 import_type importToOrig (oneToOneRowMap, origRowMap);
3045 if (verbose_) {
3046 std::ostringstream os;
3047 os << *prefix << "Import nonlocal graph" << endl;
3048 std::cerr << os.str();
3049 }
3050 this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3051 }
3052
3053 // It's safe now to clear out nonlocals_, since we've already
3054 // committed side effects to *this. The standard idiom for
3055 // clearing a Container like std::map, is to swap it with an empty
3056 // Container and let the swapped Container fall out of scope.
3057 decltype (this->nonlocals_) newNonlocals;
3058 std::swap (this->nonlocals_, newNonlocals);
3059
3061 if (verbose_) {
3062 std::ostringstream os;
3063 os << *prefix << "Done" << endl;
3064 std::cerr << os.str();
3065 }
3066 }
3067
3068
3069 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3070 void
3072 resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3073 {
3074 clearGlobalConstants();
3075 if (params != Teuchos::null) this->setParameterList (params);
3076 // either still sorted/merged or initially sorted/merged
3077 indicesAreSorted_ = true;
3078 noRedundancies_ = true;
3079 fillComplete_ = false;
3080 }
3081
3082
3083 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3084 void
3086 fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3087 {
3088 // If the graph already has domain and range Maps, don't clobber
3089 // them. If it doesn't, use the current row Map for both the
3090 // domain and range Maps.
3091 //
3092 // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3093 // column Map, and column indices are inserted which are not in
3094 // the row Map on any process, this will cause troubles. However,
3095 // that is not a common case for most applications that we
3096 // encounter, and checking for it might require more
3097 // communication.
3098 Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3099 if (domMap.is_null ()) {
3100 domMap = this->getRowMap ();
3101 }
3102 Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3103 if (ranMap.is_null ()) {
3104 ranMap = this->getRowMap ();
3105 }
3106 this->fillComplete (domMap, ranMap, params);
3107 }
3108
3109
3110 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3111 void
3113 fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3114 const Teuchos::RCP<const map_type>& rangeMap,
3115 const Teuchos::RCP<Teuchos::ParameterList>& params)
3116 {
3117 using std::endl;
3118 const char tfecfFuncName[] = "fillComplete: ";
3119 const bool verbose = verbose_;
3120
3121 std::unique_ptr<std::string> prefix;
3122 if (verbose) {
3123 prefix = this->createPrefix("CrsGraph", "fillComplete");
3124 std::ostringstream os;
3125 os << *prefix << "Start" << endl;
3126 std::cerr << os.str();
3127 }
3128
3129 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3130 (! isFillActive () || isFillComplete (), std::runtime_error,
3131 "Graph fill state must be active (isFillActive() "
3132 "must be true) before calling fillComplete().");
3133
3134 const int numProcs = getComm ()->getSize ();
3135
3136 //
3137 // Read and set parameters
3138 //
3139
3140 // Does the caller want to sort remote GIDs (within those owned by
3141 // the same process) in makeColMap()?
3142 if (! params.is_null ()) {
3143 if (params->isParameter ("sort column map ghost gids")) {
3145 params->get<bool> ("sort column map ghost gids",
3147 }
3148 else if (params->isParameter ("Sort column Map ghost GIDs")) {
3150 params->get<bool> ("Sort column Map ghost GIDs",
3152 }
3153 }
3154
3155 // If true, the caller promises that no process did nonlocal
3156 // changes since the last call to fillComplete.
3157 bool assertNoNonlocalInserts = false;
3158 if (! params.is_null ()) {
3159 assertNoNonlocalInserts =
3160 params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3161 }
3162
3163 //
3164 // Allocate indices, if they haven't already been allocated
3165 //
3166 if (! indicesAreAllocated ()) {
3167 if (hasColMap ()) {
3168 // We have a column Map, so use local indices.
3169 allocateIndices (LocalIndices, verbose);
3170 } else {
3171 // We don't have a column Map, so use global indices.
3172 allocateIndices (GlobalIndices, verbose);
3173 }
3174 }
3175
3176 //
3177 // Do global assembly, if requested and if the communicator
3178 // contains more than one process.
3179 //
3180 const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3181 if (mayNeedGlobalAssemble) {
3182 // This first checks if we need to do global assembly.
3183 // The check costs a single all-reduce.
3184 globalAssemble ();
3185 }
3186 else {
3187 const size_t numNonlocals = nonlocals_.size();
3188 if (verbose) {
3189 std::ostringstream os;
3190 os << *prefix << "Do not need to call globalAssemble; "
3191 "assertNoNonlocalInserts="
3192 << (assertNoNonlocalInserts ? "true" : "false")
3193 << "numProcs=" << numProcs
3194 << ", nonlocals_.size()=" << numNonlocals << endl;
3195 std::cerr << os.str();
3196 }
3197 const int lclNeededGlobalAssemble =
3198 (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3199 if (lclNeededGlobalAssemble != 0 && verbose) {
3200 std::ostringstream os;
3201 os << *prefix;
3202 Details::Impl::verbosePrintMap(
3203 os, nonlocals_.begin(), nonlocals_.end(),
3204 nonlocals_.size(), "nonlocals_");
3205 std::cerr << os.str() << endl;
3206 }
3207
3208 if (debug_) {
3209 auto map = this->getMap();
3210 auto comm = map.is_null() ? Teuchos::null : map->getComm();
3211 int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3212 if (! comm.is_null()) {
3213 using Teuchos::REDUCE_MAX;
3214 using Teuchos::reduceAll;
3215 reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3216 Teuchos::outArg(gblNeededGlobalAssemble));
3217 }
3218 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3219 (gblNeededGlobalAssemble != 0, std::runtime_error,
3220 "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3221 "least one process in the CrsGraph's communicator. This "
3222 "means either that you incorrectly set the "
3223 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3224 "or that you inserted invalid entries. "
3225 "Rerun with the environment variable TPETRA_VERBOSE="
3226 "CrsGraph set to see the entries of nonlocals_ on every "
3227 "MPI process (WARNING: lots of output).");
3228 }
3229 else {
3230 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3231 (lclNeededGlobalAssemble != 0, std::runtime_error,
3232 "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3233 "calling process. This means either that you incorrectly "
3234 "set the \"No Nonlocal Changes\" fillComplete parameter "
3235 "to true, or that you inserted invalid entries. "
3236 "Rerun with the environment "
3237 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3238 "of nonlocals_ on every MPI process (WARNING: lots of "
3239 "output).");
3240 }
3241 }
3242
3243 // Set domain and range Map. This may clear the Import / Export
3244 // objects if the new Maps differ from any old ones.
3245 setDomainRangeMaps (domainMap, rangeMap);
3246
3247 // If the graph does not already have a column Map (either from
3248 // the user constructor calling the version of the constructor
3249 // that takes a column Map, or from a previous fillComplete call),
3250 // then create it.
3251 Teuchos::Array<int> remotePIDs (0);
3252 const bool mustBuildColMap = ! this->hasColMap ();
3253 if (mustBuildColMap) {
3254 this->makeColMap (remotePIDs); // resized on output
3255 }
3256
3257 // Make indices local, if they aren't already.
3258 // The method doesn't do any work if the indices are already local.
3259 const std::pair<size_t, std::string> makeIndicesLocalResult =
3260 this->makeIndicesLocal(verbose);
3261
3262 if (debug_) {
3264 using Teuchos::RCP;
3265 using Teuchos::REDUCE_MIN;
3266 using Teuchos::reduceAll;
3267 using Teuchos::outArg;
3268
3269 RCP<const map_type> map = this->getMap ();
3270 RCP<const Teuchos::Comm<int> > comm;
3271 if (! map.is_null ()) {
3272 comm = map->getComm ();
3273 }
3274 if (comm.is_null ()) {
3275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3276 (makeIndicesLocalResult.first != 0, std::runtime_error,
3277 makeIndicesLocalResult.second);
3278 }
3279 else {
3280 const int lclSuccess = (makeIndicesLocalResult.first == 0);
3281 int gblSuccess = 0; // output argument
3282 reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3283 if (gblSuccess != 1) {
3284 std::ostringstream os;
3285 gathervPrint (os, makeIndicesLocalResult.second, *comm);
3286 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3287 (true, std::runtime_error, os.str ());
3288 }
3289 }
3290 }
3291 else {
3292 // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3293 // the error state to makeImportExport or
3294 // computeGlobalConstants, which may do all-reduces and thus may
3295 // have the opportunity to communicate that error state.
3296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3297 (makeIndicesLocalResult.first != 0, std::runtime_error,
3298 makeIndicesLocalResult.second);
3299 }
3300
3301 // If this process has no indices, then CrsGraph considers it
3302 // already trivially sorted and merged. Thus, this method need
3303 // not be called on all processes in the row Map's communicator.
3304 this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3305
3306 // Make Import and Export objects, if they haven't been made
3307 // already. If we made a column Map above, reuse information from
3308 // that process to avoid communiation in the Import setup.
3309 this->makeImportExport (remotePIDs, mustBuildColMap);
3310
3311 // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3312 this->fillLocalGraph (params);
3313
3314 const bool callComputeGlobalConstants = params.get () == nullptr ||
3315 params->get ("compute global constants", true);
3316 if (callComputeGlobalConstants) {
3317 this->computeGlobalConstants ();
3318 }
3319 else {
3320 this->computeLocalConstants ();
3321 }
3322 this->fillComplete_ = true;
3323 this->checkInternalState ();
3324
3325 if (verbose) {
3326 std::ostringstream os;
3327 os << *prefix << "Done" << endl;
3328 std::cerr << os.str();
3329 }
3330 }
3331
3332
3333 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3334 void
3336 expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3337 const Teuchos::RCP<const map_type>& rangeMap,
3338 const Teuchos::RCP<const import_type>& importer,
3339 const Teuchos::RCP<const export_type>& exporter,
3340 const Teuchos::RCP<Teuchos::ParameterList>& params)
3341 {
3342 const char tfecfFuncName[] = "expertStaticFillComplete: ";
3343#ifdef HAVE_TPETRA_MMM_TIMINGS
3344 std::string label;
3345 if(!params.is_null())
3346 label = params->get("Timer Label",label);
3347 std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3348 using Teuchos::TimeMonitor;
3349 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3350#endif
3351
3352
3353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3354 domainMap.is_null () || rangeMap.is_null (),
3355 std::runtime_error, "The input domain Map and range Map must be nonnull.");
3356 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3357 isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3358 "call this method unless the graph has a column Map.");
3359 auto rowPtrsUnpackedLength = this->getRowPtrsUnpackedHost().extent (0);
3360 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3361 getLocalNumRows () > 0 && rowPtrsUnpackedLength == 0,
3362 std::runtime_error, "The calling process has getLocalNumRows() = "
3363 << getLocalNumRows () << " > 0 rows, but the row offsets array has not "
3364 "been set.");
3365 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3366 static_cast<size_t> (rowPtrsUnpackedLength) != getLocalNumRows () + 1,
3367 std::runtime_error, "The row offsets array has length " <<
3368 rowPtrsUnpackedLength << " != getLocalNumRows()+1 = " <<
3369 (getLocalNumRows () + 1) << ".");
3370
3371 // Note: We don't need to do the following things which are normally done in fillComplete:
3372 // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3373
3374 // Constants from allocateIndices
3375 //
3376 // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3377 // away once the graph is allocated. expertStaticFillComplete
3378 // either presumes that the graph is allocated, or "allocates" it.
3379 //
3380 // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3381 // version of CrsGraph is to allocate in the constructor, not
3382 // lazily on first insert. That will make both
3383 // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3385 k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3386 indicesAreAllocated_ = true;
3387
3388 // Constants from makeIndicesLocal
3389 //
3390 // The graph has a column Map, so its indices had better be local.
3391 indicesAreLocal_ = true;
3392 indicesAreGlobal_ = false;
3393
3394 // set domain/range map: may clear the import/export objects
3395#ifdef HAVE_TPETRA_MMM_TIMINGS
3396 MM = Teuchos::null;
3397 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3398#endif
3399 setDomainRangeMaps (domainMap, rangeMap);
3400
3401 // Presume the user sorted and merged the arrays first
3402 indicesAreSorted_ = true;
3403 noRedundancies_ = true;
3404
3405 // makeImportExport won't create a new importer/exporter if I set one here first.
3406#ifdef HAVE_TPETRA_MMM_TIMINGS
3407 MM = Teuchos::null;
3408 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3409#endif
3410
3411 importer_ = Teuchos::null;
3412 exporter_ = Teuchos::null;
3413 if (importer != Teuchos::null) {
3414 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3415 ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3416 ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3417 std::invalid_argument,": importer does not match matrix maps.");
3418 importer_ = importer;
3419
3420 }
3421
3422#ifdef HAVE_TPETRA_MMM_TIMINGS
3423 MM = Teuchos::null;
3424 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3425#endif
3426
3427 if (exporter != Teuchos::null) {
3428 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3429 ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3430 ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3431 std::invalid_argument,": exporter does not match matrix maps.");
3432 exporter_ = exporter;
3433 }
3434
3435#ifdef HAVE_TPETRA_MMM_TIMINGS
3436 MM = Teuchos::null;
3437 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3438#endif
3439 Teuchos::Array<int> remotePIDs (0); // unused output argument
3440 this->makeImportExport (remotePIDs, false);
3441
3442#ifdef HAVE_TPETRA_MMM_TIMINGS
3443 MM = Teuchos::null;
3444 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3445#endif
3446 this->fillLocalGraph (params);
3447
3448 const bool callComputeGlobalConstants = params.get () == nullptr ||
3449 params->get ("compute global constants", true);
3450
3451 if (callComputeGlobalConstants) {
3452#ifdef HAVE_TPETRA_MMM_TIMINGS
3453 MM = Teuchos::null;
3454 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3455#endif // HAVE_TPETRA_MMM_TIMINGS
3456 this->computeGlobalConstants ();
3457 }
3458 else {
3459#ifdef HAVE_TPETRA_MMM_TIMINGS
3460 MM = Teuchos::null;
3461 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3462#endif // HAVE_TPETRA_MMM_TIMINGS
3463 this->computeLocalConstants ();
3464 }
3465
3466 fillComplete_ = true;
3467
3468#ifdef HAVE_TPETRA_MMM_TIMINGS
3469 MM = Teuchos::null;
3470 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3471#endif
3473 }
3474
3475
3476 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3477 void
3479 fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3480 {
3482 typedef decltype (k_numRowEntries_) row_entries_type;
3483 typedef typename local_graph_device_type::row_map_type row_map_type;
3484 typedef typename row_map_type::non_const_type non_const_row_map_type;
3485 typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3486 const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3487 "expertStaticFillComplete): ";
3488 const size_t lclNumRows = this->getLocalNumRows ();
3489
3490 // This method's goal is to fill in the two arrays (compressed
3491 // sparse row format) that define the sparse graph's structure.
3492
3493 bool requestOptimizedStorage = true;
3494 if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3495 requestOptimizedStorage = false;
3496 }
3497
3498 // The graph's column indices are currently stored in a 1-D
3499 // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3500 // in k_lclInds1D_.
3501
3502 if (debug_) {
3503 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3504 // The graph's array of row offsets must already be allocated.
3505 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3506 (rowPtrsUnpacked.extent (0) == 0, std::logic_error,
3507 "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3508 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3509 (rowPtrsUnpacked.extent (0) != lclNumRows + 1, std::logic_error,
3510 "rowPtrsUnpacked_host_.extent(0) = "
3511 << rowPtrsUnpacked.extent (0) << " != (lclNumRows + 1) = "
3512 << (lclNumRows + 1) << ".");
3513 const size_t numOffsets = rowPtrsUnpacked.extent (0);
3514 const auto valToCheck = rowPtrsUnpacked(numOffsets-1);
3515 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3516 (numOffsets != 0 &&
3517 lclIndsUnpacked_wdv.extent (0) != valToCheck,
3518 std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3519 " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3520 << " != rowPtrsUnpacked_host_(" << numOffsets << ")=" << valToCheck
3521 << ".");
3522 }
3523
3524 size_t allocSize = 0;
3525 try {
3526 allocSize = this->getLocalAllocationSize ();
3527 }
3528 catch (std::logic_error& e) {
3529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3530 (true, std::logic_error, "getLocalAllocationSize threw "
3531 "std::logic_error: " << e.what ());
3532 }
3533 catch (std::runtime_error& e) {
3534 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3535 (true, std::runtime_error, "getLocalAllocationSize threw "
3536 "std::runtime_error: " << e.what ());
3537 }
3538 catch (std::exception& e) {
3539 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3540 (true, std::runtime_error, "getLocalAllocationSize threw "
3541 "std::exception: " << e.what ());
3542 }
3543 catch (...) {
3544 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3545 (true, std::runtime_error, "getLocalAllocationSize threw "
3546 "an exception not a subclass of std::exception.");
3547 }
3548
3549 if (this->getLocalNumEntries () != allocSize) {
3550 // Use the nonconst version of row_map_type for ptr_d, because
3551 // the latter is const and we need to modify ptr_d here.
3552 non_const_row_map_type ptr_d;
3553 row_map_type ptr_d_const;
3554
3555 // The graph's current 1-D storage is "unpacked." This means
3556 // the row offsets may differ from what the final row offsets
3557 // should be. This could happen, for example, if the user set
3558 // an upper bound on the number of entries in each row, but
3559 // didn't fill all those entries.
3560
3561 if (debug_) {
3562 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3563 if (rowPtrsUnpacked.extent (0) != 0) {
3564 const size_t numOffsets =
3565 static_cast<size_t> (rowPtrsUnpacked.extent (0));
3566 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3567 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3568 (valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3569 std::logic_error, "(Unpacked branch) Before allocating "
3570 "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3571 << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3572 << lclIndsUnpacked_wdv.extent (0) << ".");
3573 }
3574 }
3575
3576 // Pack the row offsets into ptr_d, by doing a sum-scan of the
3577 // array of valid entry counts per row (k_numRowEntries_).
3578
3579 // Total number of entries in the matrix on the calling
3580 // process. We will compute this in the loop below. It's
3581 // cheap to compute and useful as a sanity check.
3582 size_t lclTotalNumEntries = 0;
3583 {
3584 // Allocate the packed row offsets array.
3585 ptr_d =
3586 non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3587 ptr_d_const = ptr_d;
3588
3589 // It's ok that k_numRowEntries_ is a host View; the
3590 // function can handle this.
3591 typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3592 if (debug_) {
3593 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3594 (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3595 std::logic_error, "(Unpacked branch) "
3596 "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3597 << " != getLocalNumRows()=" << lclNumRows << "");
3598 }
3599
3600 lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3601
3602 if (debug_) {
3603 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3604 (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3605 std::logic_error, "(Unpacked branch) After allocating "
3606 "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3607 << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3608 const auto valToCheck =
3609 ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3610 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3611 (valToCheck != lclTotalNumEntries, std::logic_error,
3612 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3613 "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3614 << ") = " << valToCheck << " != total number of entries "
3615 "on the calling process = " << lclTotalNumEntries
3616 << ".");
3617 }
3618 }
3619
3620 // Allocate the array of packed column indices.
3621 lclinds_1d_type ind_d =
3622 lclinds_1d_type ("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3623
3624 // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3625 // them, using the packed row offsets array ptr_d that we
3626 // created above.
3627 //
3628 // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3629 // CrsMatrix?), we need to keep around the unpacked row
3630 // offsets and column indices.
3631
3632 // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3633 // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3634 typedef pack_functor<
3635 typename local_graph_device_type::entries_type::non_const_type,
3636 typename local_inds_dualv_type::t_dev::const_type,
3637 row_map_type,
3638 typename local_graph_device_type::row_map_type> inds_packer_type;
3639 inds_packer_type f (ind_d,
3640 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3641 ptr_d, this->getRowPtrsUnpackedDevice());
3642 {
3643 typedef typename decltype (ind_d)::execution_space exec_space;
3644 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3645 Kokkos::parallel_for (range_type (0, lclNumRows), f);
3646 }
3647
3648 if (debug_) {
3649 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3650 (ptr_d.extent (0) == 0, std::logic_error,
3651 "(\"Optimize Storage\"=true branch) After packing, "
3652 "ptr_d.extent(0)=0.");
3653 if (ptr_d.extent (0) != 0) {
3654 const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
3655 const auto valToCheck =
3656 ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
3657 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3658 (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3659 std::logic_error, "(\"Optimize Storage\"=true branch) "
3660 "After packing, ptr_d(" << (numOffsets-1) << ")="
3661 << valToCheck << " != ind_d.extent(0)="
3662 << ind_d.extent(0) << ".");
3663 }
3664 }
3665 // Build the local graph.
3666 if (requestOptimizedStorage)
3667 setRowPtrs(ptr_d_const);
3668 else
3669 setRowPtrsPacked(ptr_d_const);
3670 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3671 }
3672 else { // We don't have to pack, so just set the pointers.
3673 //Set both packed and unpacked rowptrs to this
3674 this->setRowPtrs(rowPtrsUnpacked_dev_);
3675 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3676
3677 if (debug_) {
3678 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3679 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3680 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3681 (rowPtrsPacked_dev.extent (0) == 0, std::logic_error,
3682 "(\"Optimize Storage\"=false branch) "
3683 "rowPtrsPacked_dev_.extent(0) = 0.");
3684 if (rowPtrsPacked_dev.extent (0) != 0) {
3685 const size_t numOffsets =
3686 static_cast<size_t> (rowPtrsPacked_dev.extent (0));
3687 const size_t valToCheck =
3688 rowPtrsPacked_host(numOffsets - 1);
3689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3690 (valToCheck != size_t(lclIndsPacked_wdv.extent (0)),
3691 std::logic_error, "(\"Optimize Storage\"=false branch) "
3692 "rowPtrsPacked_dev_(" << (numOffsets-1) << ")="
3693 << valToCheck
3694 << " != lclIndsPacked_wdv.extent(0)="
3695 << lclIndsPacked_wdv.extent (0) << ".");
3696 }
3697 }
3698 }
3699
3700 if (debug_) {
3701 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3702 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3703 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3704 (static_cast<size_t> (rowPtrsPacked_dev.extent (0)) != lclNumRows + 1,
3705 std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " <<
3706 rowPtrsPacked_dev.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
3707 << ".");
3708 if (rowPtrsPacked_dev.extent (0) != 0) {
3709 const size_t numOffsets = static_cast<size_t> (rowPtrsPacked_dev.extent (0));
3710 const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3711 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3712 (static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
3713 std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
3714 << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = "
3715 << lclIndsPacked_wdv.extent (0) << ".");
3716 }
3717 }
3718
3719 if (requestOptimizedStorage) {
3720 // With optimized storage, we don't need to store
3721 // the array of row entry counts.
3722
3723 // Free graph data structures that are only needed for
3724 // unpacked 1-D storage.
3725 k_numRowEntries_ = row_entries_type ();
3726
3727 // Keep the new 1-D packed allocations.
3728 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3729
3730 storageStatus_ = Details::STORAGE_1D_PACKED;
3731 }
3732
3733 set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3734 }
3735
3736 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3737 void
3739 replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3740 {
3741 // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3742 //
3743 // FIXME (mfh 18 Aug 2014) This will break if the calling process
3744 // has no entries, because in that case, currently it is neither
3745 // locally nor globally indexed. This will change once we get rid
3746 // of lazy allocation (so that the constructor allocates indices
3747 // and therefore commits to local vs. global).
3748 const char tfecfFuncName[] = "replaceColMap: ";
3749 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3750 isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
3751 "Requires matching maps and non-static graph.");
3752 colMap_ = newColMap;
3753 }
3754
3755 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3756 void
3758 reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
3759 const Teuchos::RCP<const import_type>& newImport,
3760 const bool sortIndicesInEachRow)
3761 {
3762 using Teuchos::REDUCE_MIN;
3763 using Teuchos::reduceAll;
3764 using Teuchos::RCP;
3765 typedef GlobalOrdinal GO;
3766 typedef LocalOrdinal LO;
3767 typedef typename local_inds_dualv_type::t_host col_inds_type;
3768 const char tfecfFuncName[] = "reindexColumns: ";
3769
3770 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3771 isFillComplete (), std::runtime_error, "The graph is fill complete "
3772 "(isFillComplete() returns true). You must call resumeFill() before "
3773 "you may call this method.");
3774
3775 // mfh 19 Aug 2014: This method does NOT redistribute data; it
3776 // doesn't claim to do the work of an Import or Export. This
3777 // means that for all processes, the calling process MUST own all
3778 // column indices, in both the old column Map (if it exists) and
3779 // the new column Map. We check this via an all-reduce.
3780 //
3781 // Some processes may be globally indexed, others may be locally
3782 // indexed, and others (that have no graph entries) may be
3783 // neither. This method will NOT change the graph's current
3784 // state. If it's locally indexed, it will stay that way, and
3785 // vice versa. It would easy to add an option to convert indices
3786 // from global to local, so as to save a global-to-local
3787 // conversion pass. However, we don't do this here. The intended
3788 // typical use case is that the graph already has a column Map and
3789 // is locally indexed, and this is the case for which we optimize.
3790
3791 const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
3792
3793 // Attempt to convert indices to the new column Map's version of
3794 // local. This will fail if on the calling process, the graph has
3795 // indices that are not on that process in the new column Map.
3796 // After the local conversion attempt, we will do an all-reduce to
3797 // see if any processes failed.
3798
3799 // If this is false, then either the graph contains a column index
3800 // which is invalid in the CURRENT column Map, or the graph is
3801 // locally indexed but currently has no column Map. In either
3802 // case, there is no way to convert the current local indices into
3803 // global indices, so that we can convert them into the new column
3804 // Map's local indices. It's possible for this to be true on some
3805 // processes but not others, due to replaceColMap.
3806 bool allCurColIndsValid = true;
3807 // On the calling process, are all valid current column indices
3808 // also in the new column Map on the calling process? In other
3809 // words, does local reindexing suffice, or should the user have
3810 // done an Import or Export instead?
3811 bool localSuffices = true;
3812
3813 // Final arrays for the local indices. We will allocate exactly
3814 // one of these ONLY if the graph is locally indexed on the
3815 // calling process, and ONLY if the graph has one or more entries
3816 // (is not empty) on the calling process. In that case, we
3817 // allocate the first (1-D storage) if the graph has a static
3818 // profile, else we allocate the second (2-D storage).
3819 col_inds_type newLclInds1D;
3820 auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
3821
3822 // If indices aren't allocated, that means the calling process
3823 // owns no entries in the graph. Thus, there is nothing to
3824 // convert, and it trivially succeeds locally.
3825 if (indicesAreAllocated ()) {
3826 if (isLocallyIndexed ()) {
3827 if (hasColMap ()) { // locally indexed, and currently has a column Map
3828 const map_type& oldColMap = * (getColMap ());
3829 // Allocate storage for the new local indices.
3830 const size_t allocSize = this->getLocalAllocationSize ();
3831 newLclInds1D = col_inds_type("Tpetra::CrsGraph::lclIndsReindexedHost",
3832 allocSize);
3833 // Attempt to convert the new indices locally.
3834 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3835 const RowInfo rowInfo = this->getRowInfo (lclRow);
3836 const size_t beg = rowInfo.offset1D;
3837 const size_t end = beg + rowInfo.numEntries;
3838 for (size_t k = beg; k < end; ++k) {
3839 const LO oldLclCol = oldLclInds1D(k);
3840 if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3841 allCurColIndsValid = false;
3842 break; // Stop at the first invalid index
3843 }
3844 const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
3845
3846 // The above conversion MUST succeed. Otherwise, the
3847 // current local index is invalid, which means that
3848 // the graph was constructed incorrectly.
3849 if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
3850 allCurColIndsValid = false;
3851 break; // Stop at the first invalid index
3852 }
3853 else {
3854 const LO newLclCol = newColMap->getLocalElement (gblCol);
3855 if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3856 localSuffices = false;
3857 break; // Stop at the first invalid index
3858 }
3859 newLclInds1D(k) = newLclCol;
3860 }
3861 } // for each entry in the current row
3862 } // for each locally owned row
3863 }
3864 else { // locally indexed, but no column Map
3865 // This case is only possible if replaceColMap() was called
3866 // with a null argument on the calling process. It's
3867 // possible, but it means that this method can't possibly
3868 // succeed, since we have no way of knowing how to convert
3869 // the current local indices to global indices.
3870 allCurColIndsValid = false;
3871 }
3872 }
3873 else { // globally indexed
3874 // If the graph is globally indexed, we don't need to save
3875 // local indices, but we _do_ need to know whether the current
3876 // global indices are valid in the new column Map. We may
3877 // need to do a getRemoteIndexList call to find this out.
3878 //
3879 // In this case, it doesn't matter whether the graph currently
3880 // has a column Map. We don't need the old column Map to
3881 // convert from global indices to the _new_ column Map's local
3882 // indices. Furthermore, we can use the same code, whether
3883 // the graph is static or dynamic profile.
3884
3885 // Test whether the current global indices are in the new
3886 // column Map on the calling process.
3887 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3888 const RowInfo rowInfo = this->getRowInfo (lclRow);
3889 auto oldGblRowView = this->getGlobalIndsViewHost (rowInfo);
3890 for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3891 const GO gblCol = oldGblRowView(k);
3892 if (! newColMap->isNodeGlobalElement (gblCol)) {
3893 localSuffices = false;
3894 break; // Stop at the first invalid index
3895 }
3896 } // for each entry in the current row
3897 } // for each locally owned row
3898 } // locally or globally indexed
3899 } // whether indices are allocated
3900
3901 // Do an all-reduce to check both possible error conditions.
3902 int lclSuccess[2];
3903 lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3904 lclSuccess[1] = localSuffices ? 1 : 0;
3905 int gblSuccess[2];
3906 gblSuccess[0] = 0;
3907 gblSuccess[1] = 0;
3908 RCP<const Teuchos::Comm<int> > comm =
3909 getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
3910 if (! comm.is_null ()) {
3911 reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
3912 }
3913
3914 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3915 gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
3916 " The most likely reason is that the graph is locally indexed, but the "
3917 "column Map is missing (null) on some processes, due to a previous call "
3918 "to replaceColMap().");
3919
3920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3921 gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
3922 "contains column indices that are in the old column Map, but not in the "
3923 "new column Map (on that process). This method does NOT redistribute "
3924 "data; it does not claim to do the work of an Import or Export operation."
3925 " This means that for all processess, the calling process MUST own all "
3926 "column indices, in both the old column Map and the new column Map. In "
3927 "this case, you will need to do an Import or Export operation to "
3928 "redistribute data.");
3929
3930 // Commit the results.
3931 if (isLocallyIndexed ()) {
3932 { // scope the device view; sortAndMergeAllIndices needs host
3933 typename local_inds_dualv_type::t_dev newLclInds1D_dev(
3934 Kokkos::view_alloc("Tpetra::CrsGraph::lclIndReindexed",
3935 Kokkos::WithoutInitializing),
3936 newLclInds1D.extent(0));
3937 Kokkos::deep_copy(newLclInds1D_dev, newLclInds1D);
3938 lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3939 }
3940
3941 // We've reindexed, so we don't know if the indices are sorted.
3942 //
3943 // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3944 // since we're already going through all the indices above. We
3945 // could also sort each row in place; that way, we would only
3946 // have to make one pass over the rows.
3947 indicesAreSorted_ = false;
3948 if (sortIndicesInEachRow) {
3949 // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3950 // order to call this method.
3951 //
3952 // FIXME (mfh 17 Sep 2014) This violates the strong exception
3953 // guarantee. It would be better to sort the new index arrays
3954 // before committing them.
3955 const bool sorted = false; // need to resort
3956 const bool merged = true; // no need to merge, since no dups
3957 this->sortAndMergeAllIndices (sorted, merged);
3958 }
3959 }
3960 colMap_ = newColMap;
3961
3962 if (newImport.is_null ()) {
3963 // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
3964 // check whether the input Import is null on any process.
3965 //
3966 // If the domain Map hasn't been set yet, we can't compute a new
3967 // Import object. Leave it what it is; it should be null, but
3968 // it doesn't matter. If the domain Map _has_ been set, then
3969 // compute a new Import object if necessary.
3970 if (! domainMap_.is_null ()) {
3971 if (! domainMap_->isSameAs (* newColMap)) {
3972 importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
3973 } else {
3974 importer_ = Teuchos::null; // don't need an Import
3975 }
3976 }
3977 } else {
3978 // The caller gave us an Import object. Assume that it's valid.
3979 importer_ = newImport;
3980 }
3981 }
3982
3983 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3984 void
3986 replaceDomainMap (const Teuchos::RCP<const map_type>& newDomainMap)
3987 {
3988 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
3989 TEUCHOS_TEST_FOR_EXCEPTION(
3990 colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
3991 "this method unless the graph already has a column Map.");
3992 TEUCHOS_TEST_FOR_EXCEPTION(
3993 newDomainMap.is_null (), std::invalid_argument,
3994 prefix << "The new domain Map must be nonnull.");
3995
3996 // Create a new importer, if needed
3997 Teuchos::RCP<const import_type> newImporter = Teuchos::null;
3998 if (newDomainMap != colMap_ && (! newDomainMap->isSameAs (*colMap_))) {
3999 newImporter = rcp(new import_type(newDomainMap, colMap_));
4000 }
4001 this->replaceDomainMapAndImporter(newDomainMap, newImporter);
4002 }
4003
4004 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4005 void
4007 replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4008 const Teuchos::RCP<const import_type>& newImporter)
4009 {
4010 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4011 TEUCHOS_TEST_FOR_EXCEPTION(
4012 colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4013 "this method unless the graph already has a column Map.");
4014 TEUCHOS_TEST_FOR_EXCEPTION(
4015 newDomainMap.is_null (), std::invalid_argument,
4016 prefix << "The new domain Map must be nonnull.");
4017
4018 if (debug_) {
4019 if (newImporter.is_null ()) {
4020 // It's not a good idea to put expensive operations in a macro
4021 // clause, even if they are side effect - free, because macros
4022 // don't promise that they won't evaluate their arguments more
4023 // than once. It's polite for them to do so, but not required.
4024 const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4025 TEUCHOS_TEST_FOR_EXCEPTION
4026 (!colSameAsDom, std::invalid_argument, "If the new Import is null, "
4027 "then the new domain Map must be the same as the current column Map.");
4028 }
4029 else {
4030 const bool colSameAsTgt =
4031 colMap_->isSameAs (* (newImporter->getTargetMap ()));
4032 const bool newDomSameAsSrc =
4033 newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4034 TEUCHOS_TEST_FOR_EXCEPTION
4035 (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4036 "new Import is nonnull, then the current column Map must be the same "
4037 "as the new Import's target Map, and the new domain Map must be the "
4038 "same as the new Import's source Map.");
4039 }
4040 }
4041
4042 domainMap_ = newDomainMap;
4043 importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4044 }
4045
4046 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4047 void
4049 replaceRangeMap (const Teuchos::RCP<const map_type>& newRangeMap)
4050 {
4051 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
4052 TEUCHOS_TEST_FOR_EXCEPTION(
4053 rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4054 "this method unless the graph already has a row Map.");
4055 TEUCHOS_TEST_FOR_EXCEPTION(
4056 newRangeMap.is_null (), std::invalid_argument,
4057 prefix << "The new range Map must be nonnull.");
4058
4059 // Create a new exporter, if needed
4060 Teuchos::RCP<const export_type> newExporter = Teuchos::null;
4061 if (newRangeMap != rowMap_ && (! newRangeMap->isSameAs (*rowMap_))) {
4062 newExporter = rcp(new export_type(rowMap_, newRangeMap));
4063 }
4064 this->replaceRangeMapAndExporter(newRangeMap, newExporter);
4065 }
4066
4067 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4068 void
4070 replaceRangeMapAndExporter (const Teuchos::RCP<const map_type>& newRangeMap,
4071 const Teuchos::RCP<const export_type>& newExporter)
4072 {
4073 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4074 TEUCHOS_TEST_FOR_EXCEPTION(
4075 rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4076 "this method unless the graph already has a column Map.");
4077 TEUCHOS_TEST_FOR_EXCEPTION(
4078 newRangeMap.is_null (), std::invalid_argument,
4079 prefix << "The new domain Map must be nonnull.");
4080
4081 if (debug_) {
4082 if (newExporter.is_null ()) {
4083 // It's not a good idea to put expensive operations in a macro
4084 // clause, even if they are side effect - free, because macros
4085 // don't promise that they won't evaluate their arguments more
4086 // than once. It's polite for them to do so, but not required.
4087 const bool rowSameAsRange = rowMap_->isSameAs (*newRangeMap);
4088 TEUCHOS_TEST_FOR_EXCEPTION
4089 (!rowSameAsRange, std::invalid_argument, "If the new Export is null, "
4090 "then the new range Map must be the same as the current row Map.");
4091 }
4092 else {
4093 const bool newRangeSameAsTgt =
4094 newRangeMap->isSameAs (* (newExporter->getTargetMap ()));
4095 const bool rowSameAsSrc =
4096 rowMap_->isSameAs (* (newExporter->getSourceMap ()));
4097 TEUCHOS_TEST_FOR_EXCEPTION
4098 (! rowSameAsSrc || ! newRangeSameAsTgt, std::invalid_argument, "If the "
4099 "new Export is nonnull, then the current row Map must be the same "
4100 "as the new Export's source Map, and the new range Map must be the "
4101 "same as the new Export's target Map.");
4102 }
4103 }
4104
4105 rangeMap_ = newRangeMap;
4106 exporter_ = Teuchos::rcp_const_cast<export_type> (newExporter);
4107 }
4108
4109
4110 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4113 getLocalGraphDevice () const
4114 {
4116 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4117 this->getRowPtrsPackedDevice());
4118 }
4119
4120 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4123 getLocalGraphHost () const
4124 {
4125 return local_graph_host_type(
4126 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4127 this->getRowPtrsPackedHost());
4128 }
4129
4130 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4131 void
4134 {
4136 using Teuchos::ArrayView;
4137 using Teuchos::outArg;
4138 using Teuchos::reduceAll;
4139 typedef global_size_t GST;
4140
4141 ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4142
4143 this->computeLocalConstants ();
4144
4145 // Compute global constants from local constants. Processes that
4146 // already have local constants still participate in the
4147 // all-reduces, using their previously computed values.
4148 if (! this->haveGlobalConstants_) {
4149 const Teuchos::Comm<int>& comm = * (this->getComm ());
4150 // Promote all the nodeNum* and nodeMaxNum* quantities from
4151 // size_t to global_size_t, when doing the all-reduces for
4152 // globalNum* / globalMaxNum* results.
4153 //
4154 // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4155 // this in two all-reduces (one for the sum and the other for
4156 // the max), or use a custom MPI_Op that combines the sum and
4157 // the max. The latter might even be slower than two
4158 // all-reduces on modern network hardware. It would also be a
4159 // good idea to use nonblocking all-reduces (MPI 3), so that we
4160 // don't have to wait around for the first one to finish before
4161 // starting the second one.
4162 GST lcl, gbl;
4163 lcl = static_cast<GST> (this->getLocalNumEntries ());
4164
4165 reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4166 this->globalNumEntries_ = gbl;
4167
4168 const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4169 reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4170 outArg (this->globalMaxNumRowEntries_));
4171 this->haveGlobalConstants_ = true;
4172 }
4173 }
4174
4175
4176 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4177 void
4180 {
4182
4183 ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4184 if (this->haveLocalConstants_) {
4185 return;
4186 }
4187
4188 // Reset local properties
4189 this->nodeMaxNumRowEntries_ =
4190 Teuchos::OrdinalTraits<size_t>::invalid();
4191
4192 using LO = local_ordinal_type;
4193
4194 auto ptr = this->getRowPtrsPackedDevice();
4195 const LO lclNumRows = ptr.extent(0) == 0 ?
4196 static_cast<LO> (0) :
4197 (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4198
4199 const LO lclMaxNumRowEnt =
4200 ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4201 ptr, lclNumRows);
4202 this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4203 this->haveLocalConstants_ = true;
4204 }
4205
4206
4207 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4208 std::pair<size_t, std::string>
4210 makeIndicesLocal (const bool verbose)
4211 {
4213 using Teuchos::arcp;
4214 using Teuchos::Array;
4215 using std::endl;
4216 typedef LocalOrdinal LO;
4217 typedef GlobalOrdinal GO;
4218 typedef device_type DT;
4219 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4220 typedef decltype (k_numRowEntries_) row_entries_type;
4221 typedef typename row_entries_type::non_const_value_type num_ent_type;
4222 const char tfecfFuncName[] = "makeIndicesLocal: ";
4223 ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4224
4225 std::unique_ptr<std::string> prefix;
4226 if (verbose) {
4227 prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4228 std::ostringstream os;
4229 os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
4230 std::cerr << os.str();
4231 }
4232
4233 // These are somewhat global properties, so it's safe to have
4234 // exception checks for them, rather than returning an error code.
4235 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4236 (! this->hasColMap (), std::logic_error, "The graph does not have a "
4237 "column Map yet. This method should never be called in that case. "
4238 "Please report this bug to the Tpetra developers.");
4239 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4240 (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4241 "that it has a column Map, because hasColMap() returns true. However, "
4242 "the result of getColMap() is null. This should never happen. Please "
4243 "report this bug to the Tpetra developers.");
4244
4245 // Return value 1: The number of column indices (counting
4246 // duplicates) that could not be converted to local indices,
4247 // because they were not in the column Map on the calling process.
4248 size_t lclNumErrs = 0;
4249 std::ostringstream errStrm; // for return value 2 (error string)
4250
4251 const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
4252 const map_type& colMap = * (this->getColMap ());
4253
4254 if (this->isGloballyIndexed () && lclNumRows != 0) {
4255 // This is a host-accessible View.
4256 typename row_entries_type::const_type h_numRowEnt =
4257 this->k_numRowEntries_;
4258
4259 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
4260
4261 // Allocate space for local indices.
4262 if (rowPtrsUnpacked_host.extent(0) == 0) {
4263 errStrm << "Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
4264 "happen here. Please report this bug to the Tpetra developers."
4265 << endl;
4266 // Need to return early.
4267 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4268 errStrm.str ());
4269 }
4270 const auto numEnt = rowPtrsUnpacked_host(lclNumRows);
4271
4272 // mfh 17 Dec 2016: We don't need initial zero-fill of
4273 // lclIndsUnpacked_wdv, because we will fill it below anyway.
4274 // AllowPadding would only help for aligned access (e.g.,
4275 // for vectorization) if we also were to pad each row to the
4276 // same alignment, so we'll skip AllowPadding for now.
4277
4278 // using Kokkos::AllowPadding;
4279 using Kokkos::view_alloc;
4280 using Kokkos::WithoutInitializing;
4281
4282 // When giving the label as an argument to
4283 // Kokkos::view_alloc, the label must be a string and not a
4284 // char*, else the code won't compile. This is because
4285 // view_alloc also allows a raw pointer as its first
4286 // argument. See
4287 // https://github.com/kokkos/kokkos/issues/434. This is a
4288 // large allocation typically, so the overhead of creating
4289 // an std::string is minor.
4290 const std::string label ("Tpetra::CrsGraph::lclInd");
4291 if (verbose) {
4292 std::ostringstream os;
4293 os << *prefix << "(Re)allocate lclInd_wdv: old="
4294 << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4295 std::cerr << os.str();
4296 }
4297
4298 local_inds_dualv_type lclInds_dualv =
4299 local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4300 numEnt);
4301 lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4302
4303 auto lclColMap = colMap.getLocalMap ();
4304 // This is a "device mirror" of the host View h_numRowEnt.
4305 //
4306 // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4307 // Device instance is to use its default constructor. See the
4308 // following Kokkos issue:
4309 //
4310 // https://github.com/kokkos/kokkos/issues/442
4311 if (verbose) {
4312 std::ostringstream os;
4313 os << *prefix << "Allocate device mirror k_numRowEnt: "
4314 << h_numRowEnt.extent(0) << endl;
4315 std::cerr << os.str();
4316 }
4317 auto k_numRowEnt =
4318 Kokkos::create_mirror_view_and_copy (device_type (), h_numRowEnt);
4319
4321 lclNumErrs =
4322 convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (
4323 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4324 gblInds_wdv.getDeviceView(Access::ReadOnly),
4325 this->getRowPtrsUnpackedDevice(),
4326 lclColMap,
4327 k_numRowEnt);
4328 if (lclNumErrs != 0) {
4329 const int myRank = [this] () {
4330 auto map = this->getMap ();
4331 if (map.is_null ()) {
4332 return 0;
4333 }
4334 else {
4335 auto comm = map->getComm ();
4336 return comm.is_null () ? 0 : comm->getRank ();
4337 }
4338 } ();
4339 const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4340 errStrm << "(Process " << myRank << ") When converting column "
4341 "indices from global to local, we encountered " << lclNumErrs
4342 << " ind" << (pluralNumErrs ? "ices" : "ex")
4343 << " that do" << (pluralNumErrs ? "es" : "")
4344 << " not live in the column Map on this process." << endl;
4345 }
4346
4347 // We've converted column indices from global to local, so we
4348 // can deallocate the global column indices (which we know are
4349 // in 1-D storage, because the graph has static profile).
4350 if (verbose) {
4351 std::ostringstream os;
4352 os << *prefix << "Free gblInds_wdv: "
4353 << gblInds_wdv.extent(0) << endl;
4354 std::cerr << os.str();
4355 }
4356 gblInds_wdv = global_inds_wdv_type ();
4357 } // globallyIndexed() && lclNumRows > 0
4358
4359 this->indicesAreLocal_ = true;
4360 this->indicesAreGlobal_ = false;
4361 this->checkInternalState ();
4362
4363 return std::make_pair (lclNumErrs, errStrm.str ());
4364 }
4365
4366 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4367 void
4369 makeColMap (Teuchos::Array<int>& remotePIDs)
4370 {
4372 using std::endl;
4373 const char tfecfFuncName[] = "makeColMap";
4374
4375 ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4376 std::unique_ptr<std::string> prefix;
4377 if (verbose_) {
4378 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4379 std::ostringstream os;
4380 os << *prefix << "Start" << endl;
4381 std::cerr << os.str();
4382 }
4383
4384 // this->colMap_ should be null at this point, but we accept the
4385 // future possibility that it might not be (esp. if we decide
4386 // later to support graph structure changes after first
4387 // fillComplete, which CrsGraph does not currently (as of 12 Feb
4388 // 2017) support).
4389 Teuchos::RCP<const map_type> colMap = this->colMap_;
4390 const bool sortEachProcsGids =
4392
4393 // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4394 // per-process error code. If an error does occur on a process,
4395 // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4396 // notice that error. This is the caller's responsibility. For
4397 // now, we only propagate (to all processes) and report the error
4398 // in debug mode. In the future, we need to add the local/global
4399 // error handling scheme used in BlockCrsMatrix to this class.
4400 if (debug_) {
4401 using Teuchos::outArg;
4402 using Teuchos::REDUCE_MIN;
4403 using Teuchos::reduceAll;
4404
4405 std::ostringstream errStrm;
4406 const int lclErrCode =
4407 Details::makeColMap (colMap, remotePIDs,
4408 getDomainMap (), *this, sortEachProcsGids, &errStrm);
4409 auto comm = this->getComm ();
4410 if (! comm.is_null ()) {
4411 const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4412 int gblSuccess = 0; // output argument
4413 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4414 outArg (gblSuccess));
4415 if (gblSuccess != 1) {
4416 std::ostringstream os;
4417 Details::gathervPrint (os, errStrm.str (), *comm);
4418 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4419 (true, std::runtime_error, ": An error happened on at "
4420 "least one process in the CrsGraph's communicator. "
4421 "Here are all processes' error messages:" << std::endl
4422 << os.str ());
4423 }
4424 }
4425 }
4426 else {
4427 (void) Details::makeColMap (colMap, remotePIDs,
4428 getDomainMap (), *this, sortEachProcsGids, nullptr);
4429 }
4430 // See above. We want to admit the possibility of makeColMap
4431 // actually revising an existing column Map, even though that
4432 // doesn't currently (as of 10 May 2017) happen.
4433 this->colMap_ = colMap;
4434
4436 if (verbose_) {
4437 std::ostringstream os;
4438 os << *prefix << "Done" << endl;
4439 std::cerr << os.str();
4440 }
4441 }
4442
4443
4444 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4445 void
4447 sortAndMergeAllIndices (const bool sorted, const bool merged)
4448 {
4449 using std::endl;
4450 using LO = LocalOrdinal;
4451 using host_execution_space =
4452 typename Kokkos::View<LO*, device_type>::HostMirror::
4453 execution_space;
4454 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4455 const char tfecfFuncName[] = "sortAndMergeAllIndices";
4456 Details::ProfilingRegion regionSortAndMerge
4457 ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4458
4459 std::unique_ptr<std::string> prefix;
4460 if (verbose_) {
4461 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4462 std::ostringstream os;
4463 os << *prefix << "Start: "
4464 << "sorted=" << (sorted ? "true" : "false")
4465 << ", merged=" << (merged ? "true" : "false") << endl;
4466 std::cerr << os.str();
4467 }
4468 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4469 (this->isGloballyIndexed(), std::logic_error,
4470 "This method may only be called after makeIndicesLocal." );
4471 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4472 (! merged && this->isStorageOptimized(), std::logic_error,
4473 "The graph is already storage optimized, so we shouldn't be "
4474 "merging any indices. "
4475 "Please report this bug to the Tpetra developers.");
4476
4477 if (! sorted || ! merged) {
4478 const LO lclNumRows(this->getLocalNumRows());
4479 auto range = range_type(0, lclNumRows);
4480
4481 if (verbose_) {
4482 size_t totalNumDups = 0;
4483 //Sync and mark-modified the local indices before disabling WDV tracking
4484 lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4486 Kokkos::parallel_reduce(range,
4487 [this, sorted, merged] (const LO lclRow, size_t& numDups)
4488 {
4489 const RowInfo rowInfo = this->getRowInfo(lclRow);
4490 numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4491 },
4492 totalNumDups);
4494 std::ostringstream os;
4495 os << *prefix << "totalNumDups=" << totalNumDups << endl;
4496 std::cerr << os.str();
4497 }
4498 else {
4499 //Sync and mark-modified the local indices before disabling WDV tracking
4500 lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4502 Kokkos::parallel_for(range,
4503 [this, sorted, merged] (const LO lclRow)
4504 {
4505 const RowInfo rowInfo = this->getRowInfo(lclRow);
4506 this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4507 });
4509 }
4510 this->indicesAreSorted_ = true; // we just sorted every row
4511 this->noRedundancies_ = true; // we just merged every row
4512 }
4513
4514 if (verbose_) {
4515 std::ostringstream os;
4516 os << *prefix << "Done" << endl;
4517 std::cerr << os.str();
4518 }
4519 }
4520
4521 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4522 void
4524 makeImportExport (Teuchos::Array<int>& remotePIDs,
4525 const bool useRemotePIDs)
4526 {
4528 using Teuchos::ParameterList;
4529 using Teuchos::RCP;
4530 using Teuchos::rcp;
4531 const char tfecfFuncName[] = "makeImportExport: ";
4532 ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4533
4534 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4535 (! this->hasColMap (), std::logic_error,
4536 "This method may not be called unless the graph has a column Map.");
4537 RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4538
4539 // Don't do any checks to see if we need to create the Import, if
4540 // it exists already.
4541 //
4542 // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4543 // change CrsGraph in the future to allow changing the column
4544 // Map after fillComplete. For now, the column Map is fixed
4545 // after the first fillComplete call.
4546 if (importer_.is_null ()) {
4547 // Create the Import instance if necessary.
4548 if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4549 if (params.is_null () || ! params->isSublist ("Import")) {
4550 if (useRemotePIDs) {
4551 importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4552 }
4553 else {
4554 importer_ = rcp (new import_type (domainMap_, colMap_));
4555 }
4556 }
4557 else {
4558 RCP<ParameterList> importSublist = sublist (params, "Import", true);
4559 if (useRemotePIDs) {
4560 RCP<import_type> newImp =
4561 rcp (new import_type (domainMap_, colMap_, remotePIDs,
4562 importSublist));
4563 importer_ = newImp;
4564 }
4565 else {
4566 importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4567 }
4568 }
4569 }
4570 }
4571
4572 // Don't do any checks to see if we need to create the Export, if
4573 // it exists already.
4574 if (exporter_.is_null ()) {
4575 // Create the Export instance if necessary.
4576 if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4577 if (params.is_null () || ! params->isSublist ("Export")) {
4578 exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4579 }
4580 else {
4581 RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4582 exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4583 }
4584 }
4585 }
4586 }
4587
4588
4589 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4590 std::string
4592 description () const
4593 {
4594 std::ostringstream oss;
4596 if (isFillComplete ()) {
4597 oss << "{status = fill complete"
4598 << ", global rows = " << getGlobalNumRows()
4599 << ", global cols = " << getGlobalNumCols()
4600 << ", global num entries = " << getGlobalNumEntries()
4601 << "}";
4602 }
4603 else {
4604 oss << "{status = fill not complete"
4605 << ", global rows = " << getGlobalNumRows()
4606 << "}";
4607 }
4608 return oss.str();
4609 }
4610
4611
4612 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4613 void
4615 describe (Teuchos::FancyOStream &out,
4616 const Teuchos::EVerbosityLevel verbLevel) const
4617 {
4618 using Teuchos::ArrayView;
4619 using Teuchos::Comm;
4620 using Teuchos::RCP;
4621 using Teuchos::VERB_DEFAULT;
4622 using Teuchos::VERB_NONE;
4623 using Teuchos::VERB_LOW;
4624 using Teuchos::VERB_MEDIUM;
4625 using Teuchos::VERB_HIGH;
4626 using Teuchos::VERB_EXTREME;
4627 using std::endl;
4628 using std::setw;
4629
4630 Teuchos::EVerbosityLevel vl = verbLevel;
4631 if (vl == VERB_DEFAULT) vl = VERB_LOW;
4632 RCP<const Comm<int> > comm = this->getComm();
4633 const int myImageID = comm->getRank(),
4634 numImages = comm->getSize();
4635 size_t width = 1;
4636 for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4637 ++width;
4638 }
4639 width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4640 Teuchos::OSTab tab (out);
4641 // none: print nothing
4642 // low: print O(1) info from node 0
4643 // medium: print O(P) info, num entries per node
4644 // high: print O(N) info, num entries per row
4645 // extreme: print O(NNZ) info: print graph indices
4646 //
4647 // for medium and higher, print constituent objects at specified verbLevel
4648 if (vl != VERB_NONE) {
4649 if (myImageID == 0) out << this->description() << std::endl;
4650 // O(1) globals, minus what was already printed by description()
4651 if (isFillComplete() && myImageID == 0) {
4652 out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4653 }
4654 // constituent objects
4655 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4656 if (myImageID == 0) out << "\nRow map: " << std::endl;
4657 rowMap_->describe(out,vl);
4658 if (colMap_ != Teuchos::null) {
4659 if (myImageID == 0) out << "\nColumn map: " << std::endl;
4660 colMap_->describe(out,vl);
4661 }
4662 if (domainMap_ != Teuchos::null) {
4663 if (myImageID == 0) out << "\nDomain map: " << std::endl;
4664 domainMap_->describe(out,vl);
4665 }
4666 if (rangeMap_ != Teuchos::null) {
4667 if (myImageID == 0) out << "\nRange map: " << std::endl;
4668 rangeMap_->describe(out,vl);
4669 }
4670 }
4671 // O(P) data
4672 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4673 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4674 if (myImageID == imageCtr) {
4675 out << "Node ID = " << imageCtr << std::endl
4676 << "Node number of entries = " << this->getLocalNumEntries () << std::endl
4677 << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4678 if (! indicesAreAllocated ()) {
4679 out << "Indices are not allocated." << std::endl;
4680 }
4681 }
4682 comm->barrier();
4683 comm->barrier();
4684 comm->barrier();
4685 }
4686 }
4687 // O(N) and O(NNZ) data
4688 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4689 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4690 if (myImageID == imageCtr) {
4691 out << std::setw(width) << "Node ID"
4692 << std::setw(width) << "Global Row"
4693 << std::setw(width) << "Num Entries";
4694 if (vl == VERB_EXTREME) {
4695 out << " Entries";
4696 }
4697 out << std::endl;
4698 const LocalOrdinal lclNumRows =
4699 static_cast<LocalOrdinal> (this->getLocalNumRows ());
4700 for (LocalOrdinal r=0; r < lclNumRows; ++r) {
4701 const RowInfo rowinfo = this->getRowInfo (r);
4702 GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4703 out << std::setw(width) << myImageID
4704 << std::setw(width) << gid
4705 << std::setw(width) << rowinfo.numEntries;
4706 if (vl == VERB_EXTREME) {
4707 out << " ";
4708 if (isGloballyIndexed()) {
4709 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4710 for (size_t j=0; j < rowinfo.numEntries; ++j){
4711 GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4712 out << colgid << " ";
4713 }
4714 }
4715 else if (isLocallyIndexed()) {
4716 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4717 for (size_t j=0; j < rowinfo.numEntries; ++j) {
4718 LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4719 out << colMap_->getGlobalElement(collid) << " ";
4720 }
4721 }
4722 }
4723 out << std::endl;
4724 }
4725 }
4726 comm->barrier();
4727 comm->barrier();
4728 comm->barrier();
4729 }
4730 }
4731 }
4732 }
4733
4734
4735 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4736 bool
4738 checkSizes (const SrcDistObject& /* source */)
4739 {
4740 // It's not clear what kind of compatibility checks on sizes can
4741 // be performed here. Epetra_CrsGraph doesn't check any sizes for
4742 // compatibility.
4743 return true;
4744 }
4745
4746 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4747 void
4750 (const SrcDistObject& source,
4751 const size_t numSameIDs,
4752 const Kokkos::DualView<const local_ordinal_type*,
4753 buffer_device_type>& permuteToLIDs,
4754 const Kokkos::DualView<const local_ordinal_type*,
4755 buffer_device_type>& permuteFromLIDs,
4756 const CombineMode /*CM*/)
4757 {
4758 using std::endl;
4759 using LO = local_ordinal_type;
4760 using GO = global_ordinal_type;
4761 using this_CRS_type = CrsGraph<LO, GO, node_type>;
4762 const char tfecfFuncName[] = "copyAndPermute: ";
4763 const bool verbose = verbose_;
4764
4765 std::unique_ptr<std::string> prefix;
4766 if (verbose) {
4767 prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4768 std::ostringstream os;
4769 os << *prefix << endl;
4770 std::cerr << os.str ();
4771 }
4772
4773 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4774 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
4775 std::runtime_error, "permuteToLIDs.extent(0) = "
4776 << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
4777 << permuteFromLIDs.extent (0) << ".");
4778
4779 // We know from checkSizes that the source object is a
4780 // row_graph_type, so we don't need to check again.
4781 const row_graph_type& srcRowGraph =
4782 dynamic_cast<const row_graph_type&> (source);
4783
4784 if (verbose) {
4785 std::ostringstream os;
4786 os << *prefix << "Compute padding" << endl;
4787 std::cerr << os.str ();
4788 }
4789 auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4790 permuteToLIDs, permuteFromLIDs, verbose);
4791 applyCrsPadding(*padding, verbose);
4792
4793 // If the source object is actually a CrsGraph, we can use view
4794 // mode instead of copy mode to access the entries in each row,
4795 // if the graph is not fill complete.
4796 const this_CRS_type* srcCrsGraph =
4797 dynamic_cast<const this_CRS_type*> (&source);
4798
4799 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4800 const map_type& tgtRowMap = *(getRowMap());
4801 const bool src_filled = srcRowGraph.isFillComplete();
4802 nonconst_global_inds_host_view_type row_copy;
4803 LO myid = 0;
4804
4805 //
4806 // "Copy" part of "copy and permute."
4807 //
4808 if (src_filled || srcCrsGraph == nullptr) {
4809 if (verbose) {
4810 std::ostringstream os;
4811 os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4812 std::cerr << os.str ();
4813 }
4814 // If the source graph is fill complete, we can't use view mode,
4815 // because the data might be stored in a different format not
4816 // compatible with the expectations of view mode. Also, if the
4817 // source graph is not a CrsGraph, we can't use view mode,
4818 // because RowGraph only provides copy mode access to the data.
4819 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4820 const GO gid = srcRowMap.getGlobalElement (myid);
4821 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
4822 Kokkos::resize(row_copy,row_length);
4823 size_t check_row_length = 0;
4824 srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
4825 this->insertGlobalIndices (gid, row_length, row_copy.data());
4826 }
4827 } else {
4828 if (verbose) {
4829 std::ostringstream os;
4830 os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4831 std::cerr << os.str ();
4832 }
4833 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4834 const GO gid = srcRowMap.getGlobalElement (myid);
4835 global_inds_host_view_type row;
4836 srcCrsGraph->getGlobalRowView (gid, row);
4837 this->insertGlobalIndices (gid, row.extent(0), row.data());
4838 }
4839 }
4840
4841 //
4842 // "Permute" part of "copy and permute."
4843 //
4844 auto permuteToLIDs_h = permuteToLIDs.view_host ();
4845 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
4846
4847 if (src_filled || srcCrsGraph == nullptr) {
4848 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4849 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4850 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4851 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
4852 Kokkos::resize(row_copy,row_length);
4853 size_t check_row_length = 0;
4854 srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
4855 this->insertGlobalIndices (mygid, row_length, row_copy.data());
4856 }
4857 } else {
4858 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4859 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4860 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4861 global_inds_host_view_type row;
4862 srcCrsGraph->getGlobalRowView (srcgid, row);
4863 this->insertGlobalIndices (mygid, row.extent(0), row.data());
4864 }
4865 }
4866
4867 if (verbose) {
4868 std::ostringstream os;
4869 os << *prefix << "Done" << endl;
4870 std::cerr << os.str ();
4871 }
4872 }
4873
4874 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4875 void
4877 applyCrsPadding(const padding_type& padding,
4878 const bool verbose)
4879 {
4882 using std::endl;
4883 using LO = local_ordinal_type;
4884 using row_ptrs_type =
4885 typename local_graph_device_type::row_map_type::non_const_type;
4886 using range_policy =
4887 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4888 const char tfecfFuncName[] = "applyCrsPadding";
4889 ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4890
4891 std::unique_ptr<std::string> prefix;
4892 if (verbose) {
4893 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4894 std::ostringstream os;
4895 os << *prefix << "padding: ";
4896 padding.print(os);
4897 os << endl;
4898 std::cerr << os.str();
4899 }
4900 const int myRank = ! verbose ? -1 : [&] () {
4901 auto map = this->getMap();
4902 if (map.is_null()) {
4903 return -1;
4904 }
4905 auto comm = map->getComm();
4906 if (comm.is_null()) {
4907 return -1;
4908 }
4909 return comm->getRank();
4910 } ();
4911
4912 // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4913 // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4914 // size needs to increase. That should be the job of
4915 // padCrsArrays.
4916
4917 // Assume global indexing we don't have any indices yet
4918 if (! indicesAreAllocated()) {
4919 if (verbose) {
4920 std::ostringstream os;
4921 os << *prefix << "Call allocateIndices" << endl;
4922 std::cerr << os.str();
4923 }
4924 allocateIndices(GlobalIndices, verbose);
4925 }
4926 TEUCHOS_ASSERT( indicesAreAllocated() );
4927
4928 // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4929 // would use it directly.
4930
4931 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4932 if (verbose) {
4933 std::ostringstream os;
4934 os << *prefix << "Allocate row_ptrs_beg: "
4935 << rowPtrsUnpacked_dev.extent(0) << endl;
4936 std::cerr << os.str();
4937 }
4938 using Kokkos::view_alloc;
4939 using Kokkos::WithoutInitializing;
4940 row_ptrs_type row_ptrs_beg(
4941 view_alloc("row_ptrs_beg", WithoutInitializing),
4942 rowPtrsUnpacked_dev.extent(0));
4943 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4944 Kokkos::deep_copy(execution_space(),row_ptrs_beg, rowPtrsUnpacked_dev);
4945
4946 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
4947 size_t(row_ptrs_beg.extent(0) - 1);
4948 if (verbose) {
4949 std::ostringstream os;
4950 os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4951 std::cerr << os.str();
4952 }
4953 row_ptrs_type row_ptrs_end(
4954 view_alloc("row_ptrs_end", WithoutInitializing), N);
4955 row_ptrs_type num_row_entries;
4956
4957 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4958
4959 execution_space().fence(); // we need above deep_copy to be done
4960
4961 if (refill_num_row_entries) { // Case 1: Unpacked storage
4962 // We can't assume correct *this capture until C++17, and it's
4963 // likely more efficient just to capture what we need anyway.
4964 num_row_entries =
4965 row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
4966 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4967 Kokkos::parallel_for
4968 ("Fill end row pointers", range_policy(0, N),
4969 KOKKOS_LAMBDA (const size_t i) {
4970 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4971 });
4972 }
4973 else {
4974 // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
4975 // storage, we don't need row_ptr_end to be separate allocation;
4976 // could just have it alias row_ptr_beg+1.
4977 Kokkos::parallel_for
4978 ("Fill end row pointers", range_policy(0, N),
4979 KOKKOS_LAMBDA (const size_t i) {
4980 row_ptrs_end(i) = row_ptrs_beg(i+1);
4981 });
4982 }
4983
4984 if (isGloballyIndexed()) {
4985 padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
4986 padding, myRank, verbose);
4987 }
4988 else {
4989 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
4990 padding, myRank, verbose);
4991 }
4992
4993 if (refill_num_row_entries) {
4994 Kokkos::parallel_for
4995 ("Fill num entries", range_policy(0, N),
4996 KOKKOS_LAMBDA (const size_t i) {
4997 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
4998 });
4999 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
5000 }
5001 if (verbose) {
5002 std::ostringstream os;
5003 os << *prefix << "Reassign k_rowPtrs_; old size: "
5004 << rowPtrsUnpacked_dev.extent(0) << ", new size: "
5005 << row_ptrs_beg.extent(0) << endl;
5006 std::cerr << os.str();
5007 TEUCHOS_ASSERT( rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0) );
5008 }
5009
5010 setRowPtrsUnpacked(row_ptrs_beg);
5011 }
5012
5013 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5014 std::unique_ptr<
5015 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5016 >
5020 const size_t numSameIDs,
5021 const Kokkos::DualView<const local_ordinal_type*,
5022 buffer_device_type>& permuteToLIDs,
5023 const Kokkos::DualView<const local_ordinal_type*,
5024 buffer_device_type>& permuteFromLIDs,
5025 const bool verbose) const
5026 {
5027 using LO = local_ordinal_type;
5028 using std::endl;
5029
5030 std::unique_ptr<std::string> prefix;
5031 if (verbose) {
5032 prefix = this->createPrefix("CrsGraph",
5033 "computeCrsPadding(same & permute)");
5034 std::ostringstream os;
5035 os << *prefix << "{numSameIDs: " << numSameIDs
5036 << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5037 << endl;
5038 std::cerr << os.str();
5039 }
5040
5041 const int myRank = [&] () {
5042 auto comm = rowMap_.is_null() ? Teuchos::null :
5043 rowMap_->getComm();
5044 return comm.is_null() ? -1 : comm->getRank();
5045 } ();
5046 std::unique_ptr<padding_type> padding(
5047 new padding_type(myRank, numSameIDs,
5048 permuteFromLIDs.extent(0)));
5049
5050 computeCrsPaddingForSameIDs(*padding, source,
5051 static_cast<LO>(numSameIDs));
5052 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5053 permuteFromLIDs);
5054 return padding;
5055 }
5056
5057 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5058 void
5061 padding_type& padding,
5063 node_type>& source,
5064 const local_ordinal_type numSameIDs) const
5065 {
5066 using LO = local_ordinal_type;
5067 using GO = global_ordinal_type;
5068 using Details::Impl::getRowGraphGlobalRow;
5069 using std::endl;
5070 const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5071
5072 std::unique_ptr<std::string> prefix;
5073 const bool verbose = verbose_;
5074 if (verbose) {
5075 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5076 std::ostringstream os;
5077 os << *prefix << "numSameIDs: " << numSameIDs << endl;
5078 std::cerr << os.str();
5079 }
5080
5081 if (numSameIDs == 0) {
5082 return;
5083 }
5084
5085 const map_type& srcRowMap = *(source.getRowMap());
5086 const map_type& tgtRowMap = *rowMap_;
5087 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5088 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5089 const bool src_is_unique =
5090 srcCrs == nullptr ? false : srcCrs->isMerged();
5091 const bool tgt_is_unique = this->isMerged();
5092
5093 std::vector<GO> srcGblColIndsScratch;
5094 std::vector<GO> tgtGblColIndsScratch;
5095
5096 execute_sync_host_uvm_access(); // protect host UVM access
5097 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5098 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5099 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5100 auto srcGblColInds = getRowGraphGlobalRow(
5101 srcGblColIndsScratch, source, srcGblRowInd);
5102 auto tgtGblColInds = getRowGraphGlobalRow(
5103 tgtGblColIndsScratch, *this, tgtGblRowInd);
5104 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5105 tgtGblColInds.size(), tgt_is_unique,
5106 srcGblColInds.getRawPtr(),
5107 srcGblColInds.size(), src_is_unique);
5108 }
5109 if (verbose) {
5110 std::ostringstream os;
5111 os << *prefix << "Done" << endl;
5112 std::cerr << os.str();
5113 }
5114 }
5115
5116 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5117 void
5120 padding_type& padding,
5122 node_type>& source,
5123 const Kokkos::DualView<const local_ordinal_type*,
5124 buffer_device_type>& permuteToLIDs,
5125 const Kokkos::DualView<const local_ordinal_type*,
5126 buffer_device_type>& permuteFromLIDs) const
5127 {
5128 using LO = local_ordinal_type;
5129 using GO = global_ordinal_type;
5130 using Details::Impl::getRowGraphGlobalRow;
5131 using std::endl;
5132 const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5133
5134 std::unique_ptr<std::string> prefix;
5135 const bool verbose = verbose_;
5136 if (verbose) {
5137 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5138 std::ostringstream os;
5139 os << *prefix << "permuteToLIDs.extent(0): "
5140 << permuteToLIDs.extent(0)
5141 << ", permuteFromLIDs.extent(0): "
5142 << permuteFromLIDs.extent(0) << endl;
5143 std::cerr << os.str();
5144 }
5145
5146 if (permuteToLIDs.extent(0) == 0) {
5147 return;
5148 }
5149
5150 const map_type& srcRowMap = *(source.getRowMap());
5151 const map_type& tgtRowMap = *rowMap_;
5152 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5153 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5154 const bool src_is_unique =
5155 srcCrs == nullptr ? false : srcCrs->isMerged();
5156 const bool tgt_is_unique = this->isMerged();
5157
5158 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5159 auto permuteToLIDs_h = permuteToLIDs.view_host();
5160 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5161 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5162
5163 std::vector<GO> srcGblColIndsScratch;
5164 std::vector<GO> tgtGblColIndsScratch;
5165 const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5166
5167 execute_sync_host_uvm_access(); // protect host UVM access
5168 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5169 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5170 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5171 auto srcGblColInds = getRowGraphGlobalRow(
5172 srcGblColIndsScratch, source, srcGblRowInd);
5173 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5174 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5175 auto tgtGblColInds = getRowGraphGlobalRow(
5176 tgtGblColIndsScratch, *this, tgtGblRowInd);
5177 padding.update_permute(whichPermute, tgtLclRowInd,
5178 tgtGblColInds.getRawPtr(),
5179 tgtGblColInds.size(), tgt_is_unique,
5180 srcGblColInds.getRawPtr(),
5181 srcGblColInds.size(), src_is_unique);
5182 }
5183
5184 if (verbose) {
5185 std::ostringstream os;
5186 os << *prefix << "Done" << endl;
5187 std::cerr << os.str();
5188 }
5189 }
5190
5191 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5192 std::unique_ptr<
5193 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5194 >
5197 const Kokkos::DualView<const local_ordinal_type*,
5198 buffer_device_type>& importLIDs,
5199 Kokkos::DualView<packet_type*, buffer_device_type> imports,
5200 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5201 const bool verbose) const
5202 {
5203 using Details::Impl::getRowGraphGlobalRow;
5204 using std::endl;
5205 using LO = local_ordinal_type;
5206 using GO = global_ordinal_type;
5207 const char tfecfFuncName[] = "computeCrsPaddingForImports";
5208
5209 std::unique_ptr<std::string> prefix;
5210 if (verbose) {
5211 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5212 std::ostringstream os;
5213 os << *prefix << "importLIDs.extent(0): "
5214 << importLIDs.extent(0)
5215 << ", imports.extent(0): "
5216 << imports.extent(0)
5217 << ", numPacketsPerLID.extent(0): "
5218 << numPacketsPerLID.extent(0) << endl;
5219 std::cerr << os.str();
5220 }
5221
5222 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5223 const int myRank = [&] () {
5224 auto comm = rowMap_.is_null() ? Teuchos::null :
5225 rowMap_->getComm();
5226 return comm.is_null() ? -1 : comm->getRank();
5227 } ();
5228 std::unique_ptr<padding_type> padding(
5229 new padding_type(myRank, numImports));
5230
5231 if (imports.need_sync_host()) {
5232 imports.sync_host();
5233 }
5234 auto imports_h = imports.view_host();
5235 if (numPacketsPerLID.need_sync_host ()) {
5236 numPacketsPerLID.sync_host();
5237 }
5238 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5239
5240 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5241 auto importLIDs_h = importLIDs.view_host();
5242
5243 const map_type& tgtRowMap = *rowMap_;
5244 // Always merge source column indices, since isMerged() is
5245 // per-process state, and we don't know its value on other
5246 // processes that sent us data.
5247 constexpr bool src_is_unique = false;
5248 const bool tgt_is_unique = isMerged();
5249
5250 std::vector<GO> tgtGblColIndsScratch;
5251 size_t offset = 0;
5252 execute_sync_host_uvm_access(); // protect host UVM access
5253 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5254 // CrsGraph packs just global column indices, while CrsMatrix
5255 // packs bytes (first the number of entries in the row, then the
5256 // global column indices, then other stuff like the matrix
5257 // values in that row).
5258 const LO origSrcNumEnt =
5259 static_cast<LO>(numPacketsPerLID_h[whichImport]);
5260 GO* const srcGblColInds = imports_h.data() + offset;
5261
5262 const LO tgtLclRowInd = importLIDs_h[whichImport];
5263 const GO tgtGblRowInd =
5264 tgtRowMap.getGlobalElement(tgtLclRowInd);
5265 auto tgtGblColInds = getRowGraphGlobalRow(
5266 tgtGblColIndsScratch, *this, tgtGblRowInd);
5267 const size_t origTgtNumEnt(tgtGblColInds.size());
5268
5269 padding->update_import(whichImport, tgtLclRowInd,
5270 tgtGblColInds.getRawPtr(),
5271 origTgtNumEnt, tgt_is_unique,
5272 srcGblColInds,
5273 origSrcNumEnt, src_is_unique);
5274 offset += origSrcNumEnt;
5275 }
5276
5277 if (verbose) {
5278 std::ostringstream os;
5279 os << *prefix << "Done" << endl;
5280 std::cerr << os.str();
5281 }
5282 return padding;
5283 }
5284
5285 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5286 std::unique_ptr<
5287 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5288 >
5291 const Kokkos::DualView<const local_ordinal_type*,
5292 buffer_device_type>& importLIDs,
5293 Kokkos::DualView<char*, buffer_device_type> imports,
5294 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5295 const bool verbose) const
5296 {
5297 using Details::Impl::getRowGraphGlobalRow;
5298 using Details::PackTraits;
5299 using std::endl;
5300 using LO = local_ordinal_type;
5301 using GO = global_ordinal_type;
5302 const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5303
5304 std::unique_ptr<std::string> prefix;
5305 if (verbose) {
5306 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5307 std::ostringstream os;
5308 os << *prefix << "importLIDs.extent(0): "
5309 << importLIDs.extent(0)
5310 << ", imports.extent(0): "
5311 << imports.extent(0)
5312 << ", numPacketsPerLID.extent(0): "
5313 << numPacketsPerLID.extent(0) << endl;
5314 std::cerr << os.str();
5315 }
5316 const bool extraVerbose =
5317 verbose && Details::Behavior::verbose("CrsPadding");
5318
5319 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5320 TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5321 const int myRank = [&] () {
5322 auto comm = rowMap_.is_null() ? Teuchos::null :
5323 rowMap_->getComm();
5324 return comm.is_null() ? -1 : comm->getRank();
5325 } ();
5326 std::unique_ptr<padding_type> padding(
5327 new padding_type(myRank, numImports));
5328
5329 if (imports.need_sync_host()) {
5330 imports.sync_host();
5331 }
5332 auto imports_h = imports.view_host();
5333 if (numPacketsPerLID.need_sync_host ()) {
5334 numPacketsPerLID.sync_host();
5335 }
5336 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5337
5338 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5339 auto importLIDs_h = importLIDs.view_host();
5340
5341 const map_type& tgtRowMap = *rowMap_;
5342 // Always merge source column indices, since isMerged() is
5343 // per-process state, and we don't know its value on other
5344 // processes that sent us data.
5345 constexpr bool src_is_unique = false;
5346 const bool tgt_is_unique = isMerged();
5347
5348 std::vector<GO> srcGblColIndsScratch;
5349 std::vector<GO> tgtGblColIndsScratch;
5350 size_t offset = 0;
5351 execute_sync_host_uvm_access(); // protect host UVM access
5352 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5353 // CrsGraph packs just global column indices, while CrsMatrix
5354 // packs bytes (first the number of entries in the row, then the
5355 // global column indices, then other stuff like the matrix
5356 // values in that row).
5357 const size_t numBytes = numPacketsPerLID_h[whichImport];
5358 if (extraVerbose) {
5359 std::ostringstream os;
5360 os << *prefix << "whichImport=" << whichImport
5361 << ", numImports=" << numImports
5362 << ", numBytes=" << numBytes << endl;
5363 std::cerr << os.str();
5364 }
5365 if (numBytes == 0) {
5366 continue; // special case: no entries to unpack for this row
5367 }
5368 LO origSrcNumEnt = 0;
5369 const size_t numEntBeg = offset;
5370 const size_t numEntLen =
5371 PackTraits<LO>::packValueCount(origSrcNumEnt);
5372 TEUCHOS_ASSERT( numBytes >= numEntLen );
5373 TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5374 PackTraits<LO>::unpackValue(origSrcNumEnt,
5375 imports_h.data() + numEntBeg);
5376 if (extraVerbose) {
5377 std::ostringstream os;
5378 os << *prefix << "whichImport=" << whichImport
5379 << ", numImports=" << numImports
5380 << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5381 std::cerr << os.str();
5382 }
5383 TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5384 TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5385 const size_t gidsBeg = numEntBeg + numEntLen;
5386 if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5387 srcGblColIndsScratch.resize(origSrcNumEnt);
5388 }
5389 GO* const srcGblColInds = srcGblColIndsScratch.data();
5390 PackTraits<GO>::unpackArray(srcGblColInds,
5391 imports_h.data() + gidsBeg,
5392 origSrcNumEnt);
5393 const LO tgtLclRowInd = importLIDs_h[whichImport];
5394 const GO tgtGblRowInd =
5395 tgtRowMap.getGlobalElement(tgtLclRowInd);
5396 auto tgtGblColInds = getRowGraphGlobalRow(
5397 tgtGblColIndsScratch, *this, tgtGblRowInd);
5398 const size_t origNumTgtEnt(tgtGblColInds.size());
5399
5400 if (extraVerbose) {
5401 std::ostringstream os;
5402 os << *prefix << "whichImport=" << whichImport
5403 << ", numImports=" << numImports
5404 << ": Call padding->update_import" << endl;
5405 std::cerr << os.str();
5406 }
5407 padding->update_import(whichImport, tgtLclRowInd,
5408 tgtGblColInds.getRawPtr(),
5409 origNumTgtEnt, tgt_is_unique,
5410 srcGblColInds,
5411 origSrcNumEnt, src_is_unique);
5412 offset += numBytes;
5413 }
5414
5415 if (verbose) {
5416 std::ostringstream os;
5417 os << *prefix << "Done" << endl;
5418 std::cerr << os.str();
5419 }
5420 return padding;
5421 }
5422
5423 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5424 void
5427 (const SrcDistObject& source,
5428 const Kokkos::DualView<const local_ordinal_type*,
5429 buffer_device_type>& exportLIDs,
5430 Kokkos::DualView<packet_type*,
5431 buffer_device_type>& exports,
5432 Kokkos::DualView<size_t*,
5433 buffer_device_type> numPacketsPerLID,
5434 size_t& constantNumPackets)
5435 {
5436 using Tpetra::Details::ProfilingRegion;
5437 using GO = global_ordinal_type;
5438 using std::endl;
5439 using crs_graph_type =
5441 const char tfecfFuncName[] = "packAndPrepare: ";
5442 ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5443
5444 const bool verbose = verbose_;
5445 std::unique_ptr<std::string> prefix;
5446 if (verbose) {
5447 prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5448 std::ostringstream os;
5449 os << *prefix << "Start" << endl;
5450 std::cerr << os.str();
5451 }
5452
5453 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5454 (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5455 std::runtime_error,
5456 "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5457 << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5458 << ".");
5459 const row_graph_type* srcRowGraphPtr =
5460 dynamic_cast<const row_graph_type*> (&source);
5461 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5462 (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5463 "or Import operation to a CrsGraph must be a RowGraph with the same "
5464 "template parameters.");
5465 // We don't check whether src_graph has had fillComplete called,
5466 // because it doesn't matter whether the *source* graph has been
5467 // fillComplete'd. The target graph can not be fillComplete'd yet.
5468 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5469 (this->isFillComplete (), std::runtime_error,
5470 "The target graph of an Import or Export must not be fill complete.");
5471
5472 const crs_graph_type* srcCrsGraphPtr =
5473 dynamic_cast<const crs_graph_type*> (&source);
5474
5475 if (srcCrsGraphPtr == nullptr) {
5476 using Teuchos::ArrayView;
5477 using LO = local_ordinal_type;
5478
5479 if (verbose) {
5480 std::ostringstream os;
5481 os << *prefix << "Source is a RowGraph but not a CrsGraph"
5482 << endl;
5483 std::cerr << os.str();
5484 }
5485 // RowGraph::pack serves the "old" DistObject interface. It
5486 // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5487 // entails deep-copying the exports buffer on output. RowGraph
5488 // is a convenience interface when not a CrsGraph, so we accept
5489 // the performance hit.
5490 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5491 auto exportLIDs_h = exportLIDs.view_host ();
5492 ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5493 exportLIDs_h.extent (0));
5494 Teuchos::Array<GO> exports_a;
5495
5496 numPacketsPerLID.clear_sync_state ();
5497 numPacketsPerLID.modify_host ();
5498 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5499 ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5500 numPacketsPerLID_h.extent (0));
5501 srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5502 constantNumPackets);
5503 const size_t newSize = static_cast<size_t> (exports_a.size ());
5504 if (static_cast<size_t> (exports.extent (0)) != newSize) {
5505 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5506 exports = exports_dv_type ("exports", newSize);
5507 }
5508 Kokkos::View<const packet_type*, Kokkos::HostSpace,
5509 Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5510 exports.clear_sync_state ();
5511 exports.modify_host ();
5512 // DEEP_COPY REVIEW - NOT TESTED
5513 Kokkos::deep_copy (exports.view_host (), exports_a_h);
5514 }
5515 // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5516 else if (! getColMap ().is_null () &&
5517 (this->getRowPtrsPackedDevice().extent (0) != 0 ||
5518 getRowMap ()->getLocalNumElements () == 0)) {
5519 if (verbose) {
5520 std::ostringstream os;
5521 os << *prefix << "packCrsGraphNew path" << endl;
5522 std::cerr << os.str();
5523 }
5524 using export_pids_type =
5525 Kokkos::DualView<const int*, buffer_device_type>;
5526 export_pids_type exportPIDs; // not filling it; needed for syntax
5527 using LO = local_ordinal_type;
5528 using NT = node_type;
5530 packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5531 exports, numPacketsPerLID,
5532 constantNumPackets, false);
5533 }
5534 else {
5535 srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5536 constantNumPackets);
5537 }
5538
5539 if (verbose) {
5540 std::ostringstream os;
5541 os << *prefix << "Done" << endl;
5542 std::cerr << os.str();
5543 }
5544 }
5545
5546 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5547 void
5549 pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5550 Teuchos::Array<GlobalOrdinal>& exports,
5551 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5552 size_t& constantNumPackets) const
5553 {
5554 auto col_map = this->getColMap();
5555 // packCrsGraph requires k_rowPtrsPacked to be set
5556 if( !col_map.is_null() && (this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() ==0)) {
5558 packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5559 exportLIDs, constantNumPackets);
5560 }
5561 else {
5562 this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5563 constantNumPackets);
5564 }
5565 }
5566
5567 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5568 void
5570 packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5571 Teuchos::Array<GlobalOrdinal>& exports,
5572 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5573 size_t& constantNumPackets) const
5574 {
5575 using std::endl;
5576 using LO = LocalOrdinal;
5577 using GO = GlobalOrdinal;
5578 using host_execution_space =
5579 typename Kokkos::View<size_t*, device_type>::
5580 HostMirror::execution_space;
5581 const char tfecfFuncName[] = "packFillActive: ";
5582 const bool verbose = verbose_;
5583
5584 const auto numExportLIDs = exportLIDs.size ();
5585 std::unique_ptr<std::string> prefix;
5586 if (verbose) {
5587 prefix = this->createPrefix("CrsGraph", "allocateIndices");
5588 std::ostringstream os;
5589 os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5590 std::cerr << os.str();
5591 }
5592 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5593 (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5594 "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5595 " = " << numPacketsPerLID.size () << ".");
5596
5597 const map_type& rowMap = * (this->getRowMap ());
5598 const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5599 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5600 (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5601 "This graph claims to be locally indexed, but its column Map is nullptr. "
5602 "This should never happen. Please report this bug to the Tpetra "
5603 "developers.");
5604
5605 // We may pack different amounts of data for different rows.
5606 constantNumPackets = 0;
5607
5608 // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5609 // it might be now, but we might as well be safe).
5610 size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5611 const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5612
5613 // Count the total number of packets (column indices, in the case
5614 // of a CrsGraph) to pack. While doing so, set
5615 // numPacketsPerLID[i] to the number of entries owned by the
5616 // calling process in (local) row exportLIDs[i] of the graph, that
5617 // the caller wants us to send out.
5618 Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5619 size_t totalNumPackets = 0;
5620 size_t errCount = 0;
5621 // lambdas turn what they capture const, so we can't
5622 // atomic_add(&errCount,1). Instead, we need a View to modify.
5623 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5624 host_device_type;
5625 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5626 constexpr size_t ONE = 1;
5627
5628 execute_sync_host_uvm_access(); // protect host UVM access
5629 Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5630 inputRange,
5631 [=] (const LO& i, size_t& curTotalNumPackets) {
5632 const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5633 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5634 Kokkos::atomic_add (&errCountView(), ONE);
5635 numPacketsPerLID_raw[i] = 0;
5636 }
5637 else {
5638 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5639 numPacketsPerLID_raw[i] = numEnt;
5640 curTotalNumPackets += numEnt;
5641 }
5642 },
5643 totalNumPackets);
5644
5645 if (verbose) {
5646 std::ostringstream os;
5647 os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5648 std::cerr << os.str();
5649 }
5650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5651 (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5652 "one or more errors! errCount = " << errCount
5653 << ", totalNumPackets = " << totalNumPackets << ".");
5654 errCount = 0;
5655
5656 // Allocate space for all the column indices to pack.
5657 exports.resize (totalNumPackets);
5658
5659 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5660 (! this->supportsRowViews (), std::logic_error,
5661 "this->supportsRowViews() returns false; this should never happen. "
5662 "Please report this bug to the Tpetra developers.");
5663
5664 // Loop again over the rows to export, and pack rows of indices
5665 // into the output buffer.
5666
5667 if (verbose) {
5668 std::ostringstream os;
5669 os << *prefix << "Pack into exports" << endl;
5670 std::cerr << os.str();
5671 }
5672
5673 // Teuchos::ArrayView may not be thread safe, or may not be
5674 // efficiently thread safe. Better to use the raw pointer.
5675 GO* const exports_raw = exports.getRawPtr ();
5676 errCount = 0;
5677 Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
5678 inputRange, [=, &prefix]
5679 (const LO i, size_t& exportsOffset, const bool final) {
5680 const size_t curOffset = exportsOffset;
5681 const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5682 const RowInfo rowInfo =
5683 this->getRowInfoFromGlobalRowIndex (gblRow);
5684
5685 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5686 if (rowInfo.localRow == TDO::invalid ()) {
5687 if (verbose) {
5688 std::ostringstream os;
5689 os << *prefix << ": INVALID rowInfo: i=" << i
5690 << ", lclRow=" << exportLIDs_raw[i] << endl;
5691 std::cerr << os.str();
5692 }
5693 Kokkos::atomic_add (&errCountView(), ONE);
5694 }
5695 else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5696 if (verbose) {
5697 std::ostringstream os;
5698 os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5699 << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5700 "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
5701 << ") > totalNumPackets (= " << totalNumPackets << ")."
5702 << endl;
5703 std::cerr << os.str();
5704 }
5705 Kokkos::atomic_add (&errCountView(), ONE);
5706 }
5707 else {
5708 const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5709 if (this->isLocallyIndexed ()) {
5710 auto lclColInds = getLocalIndsViewHost (rowInfo);
5711 if (final) {
5712 for (LO k = 0; k < numEnt; ++k) {
5713 const LO lclColInd = lclColInds(k);
5714 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5715 // Pack it, even if it's wrong. Let the receiving
5716 // process deal with it. Otherwise, we'll miss out
5717 // on any correct data.
5718 exports_raw[curOffset + k] = gblColInd;
5719 } // for each entry in the row
5720 } // final pass?
5721 exportsOffset = curOffset + numEnt;
5722 }
5723 else if (this->isGloballyIndexed ()) {
5724 auto gblColInds = getGlobalIndsViewHost (rowInfo);
5725 if (final) {
5726 for (LO k = 0; k < numEnt; ++k) {
5727 const GO gblColInd = gblColInds(k);
5728 // Pack it, even if it's wrong. Let the receiving
5729 // process deal with it. Otherwise, we'll miss out
5730 // on any correct data.
5731 exports_raw[curOffset + k] = gblColInd;
5732 } // for each entry in the row
5733 } // final pass?
5734 exportsOffset = curOffset + numEnt;
5735 }
5736 // If neither globally nor locally indexed, then the graph
5737 // has no entries in this row (or indeed, in any row on this
5738 // process) to pack.
5739 }
5740 });
5741
5742 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5743 (errCount != 0, std::logic_error, "Packing encountered "
5744 "one or more errors! errCount = " << errCount
5745 << ", totalNumPackets = " << totalNumPackets << ".");
5746
5747 if (verbose) {
5748 std::ostringstream os;
5749 os << *prefix << "Done" << endl;
5750 std::cerr << os.str();
5751 }
5752 }
5753
5754 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5755 void
5757 packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
5758 buffer_device_type>& exportLIDs,
5759 Kokkos::DualView<packet_type*,
5760 buffer_device_type>& exports,
5761 Kokkos::DualView<size_t*,
5762 buffer_device_type> numPacketsPerLID,
5763 size_t& constantNumPackets) const
5764 {
5765 using std::endl;
5766 using LO = local_ordinal_type;
5767 using GO = global_ordinal_type;
5768 using host_execution_space = typename Kokkos::View<size_t*,
5769 device_type>::HostMirror::execution_space;
5770 using host_device_type =
5771 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5772 using exports_dv_type =
5773 Kokkos::DualView<packet_type*, buffer_device_type>;
5774 const char tfecfFuncName[] = "packFillActiveNew: ";
5775 const bool verbose = verbose_;
5776
5777 const auto numExportLIDs = exportLIDs.extent (0);
5778 std::unique_ptr<std::string> prefix;
5779 if (verbose) {
5780 prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5781 std::ostringstream os;
5782 os << *prefix << "numExportLIDs: " << numExportLIDs
5783 << ", numPacketsPerLID.extent(0): "
5784 << numPacketsPerLID.extent(0) << endl;
5785 std::cerr << os.str();
5786 }
5787 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5788 (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
5789 "exportLIDs.extent(0) = " << numExportLIDs
5790 << " != numPacketsPerLID.extent(0) = "
5791 << numPacketsPerLID.extent (0) << ".");
5792 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5793 auto exportLIDs_h = exportLIDs.view_host ();
5794
5795 const map_type& rowMap = * (this->getRowMap ());
5796 const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5797 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5798 (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5799 "This graph claims to be locally indexed, but its column Map is nullptr. "
5800 "This should never happen. Please report this bug to the Tpetra "
5801 "developers.");
5802
5803 // We may pack different amounts of data for different rows.
5804 constantNumPackets = 0;
5805
5806 numPacketsPerLID.clear_sync_state ();
5807 numPacketsPerLID.modify_host ();
5808 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5809
5810 // Count the total number of packets (column indices, in the case
5811 // of a CrsGraph) to pack. While doing so, set
5812 // numPacketsPerLID[i] to the number of entries owned by the
5813 // calling process in (local) row exportLIDs[i] of the graph, that
5814 // the caller wants us to send out.
5815 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5816 range_type inputRange (0, numExportLIDs);
5817 size_t totalNumPackets = 0;
5818 size_t errCount = 0;
5819 // lambdas turn what they capture const, so we can't
5820 // atomic_add(&errCount,1). Instead, we need a View to modify.
5821 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5822 constexpr size_t ONE = 1;
5823
5824 if (verbose) {
5825 std::ostringstream os;
5826 os << *prefix << "Compute totalNumPackets" << endl;
5827 std::cerr << os.str ();
5828 }
5829
5830 execute_sync_host_uvm_access(); // protect host UVM access
5831 Kokkos::parallel_reduce
5832 ("Tpetra::CrsGraph::pack: totalNumPackets",
5833 inputRange,
5834 [=, &prefix] (const LO i, size_t& curTotalNumPackets) {
5835 const LO lclRow = exportLIDs_h[i];
5836 const GO gblRow = rowMap.getGlobalElement (lclRow);
5837 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5838 if (verbose) {
5839 std::ostringstream os;
5840 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5841 << " not in row Map on this process" << endl;
5842 std::cerr << os.str();
5843 }
5844 Kokkos::atomic_add (&errCountView(), ONE);
5845 numPacketsPerLID_h(i) = 0;
5846 }
5847 else {
5848 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5849 numPacketsPerLID_h(i) = numEnt;
5850 curTotalNumPackets += numEnt;
5851 }
5852 },
5853 totalNumPackets);
5854
5855 if (verbose) {
5856 std::ostringstream os;
5857 os << *prefix << "totalNumPackets: " << totalNumPackets
5858 << ", errCount: " << errCount << endl;
5859 std::cerr << os.str ();
5860 }
5861 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5862 (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5863 "one or more errors! totalNumPackets: " << totalNumPackets
5864 << ", errCount: " << errCount << ".");
5865
5866 // Allocate space for all the column indices to pack.
5867 if (size_t(exports.extent (0)) < totalNumPackets) {
5868 // FIXME (mfh 09 Apr 2019) Create without initializing.
5869 exports = exports_dv_type ("exports", totalNumPackets);
5870 }
5871
5872 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5873 (! this->supportsRowViews (), std::logic_error,
5874 "this->supportsRowViews() returns false; this should never happen. "
5875 "Please report this bug to the Tpetra developers.");
5876
5877 // Loop again over the rows to export, and pack rows of indices
5878 // into the output buffer.
5879
5880 if (verbose) {
5881 std::ostringstream os;
5882 os << *prefix << "Pack into exports buffer" << endl;
5883 std::cerr << os.str();
5884 }
5885
5886 exports.clear_sync_state ();
5887 exports.modify_host ();
5888 auto exports_h = exports.view_host ();
5889
5890 errCount = 0;
5891
5892 // The following parallel_scan needs const host access to lclIndsUnpacked_wdv
5893 // (if locally indexed) or gblInds_wdv (if globally indexed).
5894 if(isLocallyIndexed())
5895 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5896 else if(isGloballyIndexed())
5897 gblInds_wdv.getHostView(Access::ReadOnly);
5898
5900 Kokkos::parallel_scan
5901 ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5902 inputRange, [=, &prefix]
5903 (const LO i, size_t& exportsOffset, const bool final) {
5904 const size_t curOffset = exportsOffset;
5905 const LO lclRow = exportLIDs_h(i);
5906 const GO gblRow = rowMap.getGlobalElement (lclRow);
5907 if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
5908 if (verbose) {
5909 std::ostringstream os;
5910 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5911 << " not in row Map on this process" << endl;
5912 std::cerr << os.str();
5913 }
5914 Kokkos::atomic_add (&errCountView(), ONE);
5915 return;
5916 }
5917
5918 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
5919 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
5920 if (verbose) {
5921 std::ostringstream os;
5922 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5923 << ", gblRow=" << gblRow << ": invalid rowInfo"
5924 << endl;
5925 std::cerr << os.str();
5926 }
5927 Kokkos::atomic_add (&errCountView(), ONE);
5928 return;
5929 }
5930
5931 if (curOffset + rowInfo.numEntries > totalNumPackets) {
5932 if (verbose) {
5933 std::ostringstream os;
5934 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5935 << ", gblRow=" << gblRow << ", curOffset (= "
5936 << curOffset << ") + numEnt (= " << rowInfo.numEntries
5937 << ") > totalNumPackets (= " << totalNumPackets
5938 << ")." << endl;
5939 std::cerr << os.str();
5940 }
5941 Kokkos::atomic_add (&errCountView(), ONE);
5942 return;
5943 }
5944
5945 const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5946 if (this->isLocallyIndexed ()) {
5947 auto lclColInds = getLocalIndsViewHost(rowInfo);
5948 if (final) {
5949 for (LO k = 0; k < numEnt; ++k) {
5950 const LO lclColInd = lclColInds(k);
5951 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5952 // Pack it, even if it's wrong. Let the receiving
5953 // process deal with it. Otherwise, we'll miss out
5954 // on any correct data.
5955 exports_h(curOffset + k) = gblColInd;
5956 } // for each entry in the row
5957 } // final pass?
5958 exportsOffset = curOffset + numEnt;
5959 }
5960 else if (this->isGloballyIndexed ()) {
5961 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5962 if (final) {
5963 for (LO k = 0; k < numEnt; ++k) {
5964 const GO gblColInd = gblColInds(k);
5965 // Pack it, even if it's wrong. Let the receiving
5966 // process deal with it. Otherwise, we'll miss out
5967 // on any correct data.
5968 exports_h(curOffset + k) = gblColInd;
5969 } // for each entry in the row
5970 } // final pass?
5971 exportsOffset = curOffset + numEnt;
5972 }
5973 // If neither globally nor locally indexed, then the graph
5974 // has no entries in this row (or indeed, in any row on this
5975 // process) to pack.
5976 });
5978
5979 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5980 // (errCount != 0, std::logic_error, "Packing encountered "
5981 // "one or more errors! errCount = " << errCount
5982 // << ", totalNumPackets = " << totalNumPackets << ".");
5983
5984 if (verbose) {
5985 std::ostringstream os;
5986 os << *prefix << "errCount=" << errCount << "; Done" << endl;
5987 std::cerr << os.str();
5988 }
5989 }
5990
5991 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5992 void
5995 (const Kokkos::DualView<const local_ordinal_type*,
5996 buffer_device_type>& importLIDs,
5997 Kokkos::DualView<packet_type*,
5998 buffer_device_type> imports,
5999 Kokkos::DualView<size_t*,
6000 buffer_device_type> numPacketsPerLID,
6001 const size_t /* constantNumPackets */,
6002 const CombineMode /* combineMode */ )
6003 {
6005 using std::endl;
6006 using LO = local_ordinal_type;
6007 using GO = global_ordinal_type;
6008 const char tfecfFuncName[] = "unpackAndCombine";
6009
6010 ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
6011 const bool verbose = verbose_;
6012
6013 std::unique_ptr<std::string> prefix;
6014 if (verbose) {
6015 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6016 std::ostringstream os;
6017 os << *prefix << "Start" << endl;
6018 std::cerr << os.str ();
6019 }
6020 {
6021 auto padding = computeCrsPaddingForImports(
6022 importLIDs, imports, numPacketsPerLID, verbose);
6023 applyCrsPadding(*padding, verbose);
6024 if (verbose) {
6025 std::ostringstream os;
6026 os << *prefix << "Done computing & applying padding" << endl;
6027 std::cerr << os.str ();
6028 }
6029 }
6030
6031 // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6032 // reasonable meaning, whether or not the matrix is fill complete.
6033 // It's just more work to implement.
6034
6035 // We are not checking the value of the CombineMode input
6036 // argument. For CrsGraph, we only support import/export
6037 // operations if fillComplete has not yet been called. Any
6038 // incoming column-indices are inserted into the target graph. In
6039 // this context, CombineMode values of ADD vs INSERT are
6040 // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6041 // duplicate column-index is inserted, it will be compressed out
6042 // when fillComplete is called.
6043 //
6044 // Note: I think REPLACE means that an existing row is replaced by
6045 // the imported row, i.e., the existing indices are cleared. CGB,
6046 // 6/17/2010
6047
6048 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6049 (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6050 std::runtime_error, ": importLIDs.extent(0) = "
6051 << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6052 << numPacketsPerLID.extent (0) << ".");
6053 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6054 (isFillComplete (), std::runtime_error,
6055 ": Import or Export operations are not allowed on a target "
6056 "CrsGraph that is fillComplete.");
6057
6058 const size_t numImportLIDs(importLIDs.extent(0));
6059 if (numPacketsPerLID.need_sync_host()) {
6060 numPacketsPerLID.sync_host();
6061 }
6062 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6063 if (imports.need_sync_host()) {
6064 imports.sync_host();
6065 }
6066 auto imports_h = imports.view_host();
6067 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6068 auto importLIDs_h = importLIDs.view_host();
6069
6070 // If we're inserting in local indices, let's pre-allocate
6071 Teuchos::Array<LO> lclColInds;
6072 if (isLocallyIndexed()) {
6073 if (verbose) {
6074 std::ostringstream os;
6075 os << *prefix << "Preallocate local indices scratch" << endl;
6076 std::cerr << os.str();
6077 }
6078 size_t maxNumInserts = 0;
6079 for (size_t i = 0; i < numImportLIDs; ++i) {
6080 maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6081 }
6082 if (verbose) {
6083 std::ostringstream os;
6084 os << *prefix << "Local indices scratch size: "
6085 << maxNumInserts << endl;
6086 std::cerr << os.str();
6087 }
6088 lclColInds.resize (maxNumInserts);
6089 }
6090 else {
6091 if (verbose) {
6092 std::ostringstream os;
6093 os << *prefix;
6094 if (isGloballyIndexed()) {
6095 os << "Graph is globally indexed";
6096 }
6097 else {
6098 os << "Graph is neither locally nor globally indexed";
6099 }
6100 os << endl;
6101 std::cerr << os.str();
6102 }
6103 }
6104
6105 TEUCHOS_ASSERT( ! rowMap_.is_null() );
6106 const map_type& rowMap = *rowMap_;
6107
6108 try {
6109 size_t importsOffset = 0;
6110 for (size_t i = 0; i < numImportLIDs; ++i) {
6111 if (verbose) {
6112 std::ostringstream os;
6113 os << *prefix << "i=" << i << ", numImportLIDs="
6114 << numImportLIDs << endl;
6115 std::cerr << os.str();
6116 }
6117 // We can only unpack into owned rows, since we only have
6118 // local row indices.
6119 const LO lclRow = importLIDs_h[i];
6120 const GO gblRow = rowMap.getGlobalElement(lclRow);
6121 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6122 (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6123 std::logic_error, "importLIDs[i=" << i << "]="
6124 << lclRow << " is not in the row Map on the calling "
6125 "process.");
6126 const LO numEnt = numPacketsPerLID_h[i];
6127 const GO* const gblColInds = (numEnt == 0) ? nullptr :
6128 imports_h.data() + importsOffset;
6129 if (! isLocallyIndexed()) {
6130 insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6131 }
6132 else {
6133 // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6134 // column Map filtering.
6135 for (LO j = 0; j < numEnt; j++) {
6136 lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6137 }
6138 insertLocalIndices(lclRow, numEnt, lclColInds.data());
6139 }
6140 importsOffset += numEnt;
6141 }
6142 }
6143 catch (std::exception& e) {
6144 TEUCHOS_TEST_FOR_EXCEPTION
6145 (true, std::runtime_error,
6146 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6147 "exception: " << endl << e.what());
6148 }
6149
6150 if (verbose) {
6151 std::ostringstream os;
6152 os << *prefix << "Done" << endl;
6153 std::cerr << os.str();
6154 }
6155 }
6156
6157 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6158 void
6160 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6161 {
6162 using Teuchos::Comm;
6163 using Teuchos::null;
6164 using Teuchos::ParameterList;
6165 using Teuchos::RCP;
6166
6167 // We'll set all the state "transactionally," so that this method
6168 // satisfies the strong exception guarantee. This object's state
6169 // won't be modified until the end of this method.
6170 RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6171 RCP<import_type> importer;
6172 RCP<export_type> exporter;
6173
6174 rowMap = newMap;
6175 RCP<const Comm<int> > newComm =
6176 (newMap.is_null ()) ? null : newMap->getComm ();
6177
6178 if (! domainMap_.is_null ()) {
6179 if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6180 // Common case: original domain and row Maps are identical.
6181 // In that case, we need only replace the original domain Map
6182 // with the new Map. This ensures that the new domain and row
6183 // Maps _stay_ identical.
6184 domainMap = newMap;
6185 } else {
6186 domainMap = domainMap_->replaceCommWithSubset (newComm);
6187 }
6188 }
6189 if (! rangeMap_.is_null ()) {
6190 if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6191 // Common case: original range and row Maps are identical. In
6192 // that case, we need only replace the original range Map with
6193 // the new Map. This ensures that the new range and row Maps
6194 // _stay_ identical.
6195 rangeMap = newMap;
6196 } else {
6197 rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6198 }
6199 }
6200 if (! colMap_.is_null ()) {
6201 colMap = colMap_->replaceCommWithSubset (newComm);
6202 }
6203
6204 // (Re)create the Export and / or Import if necessary.
6205 if (! newComm.is_null ()) {
6206 RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6207 //
6208 // The operations below are collective on the new communicator.
6209 //
6210 // (Re)create the Export object if necessary. If I haven't
6211 // called fillComplete yet, I don't have a rangeMap, so I must
6212 // first check if the _original_ rangeMap is not null. Ditto
6213 // for the Import object and the domain Map.
6214 if (! rangeMap_.is_null () &&
6215 rangeMap != rowMap &&
6216 ! rangeMap->isSameAs (*rowMap)) {
6217 if (params.is_null () || ! params->isSublist ("Export")) {
6218 exporter = rcp (new export_type (rowMap, rangeMap));
6219 }
6220 else {
6221 RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6222 exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6223 }
6224 }
6225 // (Re)create the Import object if necessary.
6226 if (! domainMap_.is_null () &&
6227 domainMap != colMap &&
6228 ! domainMap->isSameAs (*colMap)) {
6229 if (params.is_null () || ! params->isSublist ("Import")) {
6230 importer = rcp (new import_type (domainMap, colMap));
6231 } else {
6232 RCP<ParameterList> importSublist = sublist (params, "Import", true);
6233 importer = rcp (new import_type (domainMap, colMap, importSublist));
6234 }
6235 }
6236 } // if newComm is not null
6237
6238 // Defer side effects until the end. If no destructors throw
6239 // exceptions (they shouldn't anyway), then this method satisfies
6240 // the strong exception guarantee.
6241 exporter_ = exporter;
6242 importer_ = importer;
6243 rowMap_ = rowMap;
6244 // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6245 // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6246 // the same object. We might want to get rid of this redundant
6247 // pointer sometime, but for now, we'll leave it alone and just
6248 // set map_ to the same object.
6249 this->map_ = rowMap;
6250 domainMap_ = domainMap;
6251 rangeMap_ = rangeMap;
6252 colMap_ = colMap;
6253 }
6254
6255 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6256 void
6258 getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6259 {
6260 using std::endl;
6261 using LO = LocalOrdinal;
6262 using GO = GlobalOrdinal;
6263 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6264 const bool verbose = verbose_;
6265
6266 std::unique_ptr<std::string> prefix;
6267 if (verbose) {
6268 prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6269 std::ostringstream os;
6270 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6271 << endl;
6272 std::cerr << os.str();
6273 }
6274
6275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6276 (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6277 const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
6278 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6279 (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6280 std::invalid_argument, "offsets.extent(0) = " <<
6281 offsets.extent (0) << " < getLocalNumRows() = " << lclNumRows << ".");
6282
6283 const map_type& rowMap = * (this->getRowMap ());
6284 const map_type& colMap = * (this->getColMap ());
6285
6286 // We only use these in debug mode, but since debug mode is a
6287 // run-time option, they need to exist here. That's why we create
6288 // the vector with explicit size zero, to avoid overhead if debug
6289 // mode is off.
6290 bool allRowMapDiagEntriesInColMap = true;
6291 bool allDiagEntriesFound = true;
6292 bool allOffsetsCorrect = true;
6293 bool noOtherWeirdness = true;
6294 using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6295 wrong_offsets_type wrongOffsets(0);
6296
6297 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6298 // the subset of Map functionality that we need below.
6299 auto lclRowMap = rowMap.getLocalMap ();
6300 auto lclColMap = colMap.getLocalMap ();
6301
6302 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6303 // setup, at least on the host. For CUDA, we have to use LocalMap
6304 // (that comes from each of the two Maps).
6305
6306 const bool sorted = this->isSorted ();
6307 if (isFillComplete ()) {
6308 auto lclGraph = this->getLocalGraphDevice ();
6309 ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6310 lclGraph.row_map,
6311 lclGraph.entries, sorted);
6312 }
6313 else {
6314 // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6315 // since the graph is not fill complete. The previous version
6316 // of this code assumed UVM; this version does not.
6317 auto offsets_h = Kokkos::create_mirror_view (offsets);
6318
6319 for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6320 // Find the diagonal entry. Since the row Map and column Map
6321 // may differ, we have to compare global row and column
6322 // indices, not local.
6323 const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6324 const GO gblColInd = gblRowInd;
6325 const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6326
6327 if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6328 allRowMapDiagEntriesInColMap = false;
6329 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6330 }
6331 else {
6332 const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6333 if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6334 rowInfo.numEntries > 0) {
6335
6336 auto colInds = this->getLocalIndsViewHost (rowInfo);
6337 const size_t hint = 0; // not needed for this algorithm
6338 const size_t offset =
6339 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6340 lclColInd, hint, sorted);
6341 offsets_h(lclRowInd) = offset;
6342
6343 if (debug_) {
6344 // Now that we have what we think is an offset, make sure
6345 // that it really does point to the diagonal entry. Offsets
6346 // are _relative_ to each row, not absolute (for the whole
6347 // (local) graph).
6348 typename local_inds_dualv_type::t_host::const_type lclColInds;
6349 try {
6350 lclColInds = this->getLocalIndsViewHost (rowInfo);
6351 }
6352 catch (...) {
6353 noOtherWeirdness = false;
6354 }
6355 // Don't continue with error checking if the above failed.
6356 if (noOtherWeirdness) {
6357 const size_t numEnt = lclColInds.extent (0);
6358 if (offset >= numEnt) {
6359 // Offsets are relative to each row, so this means that
6360 // the offset is out of bounds.
6361 allOffsetsCorrect = false;
6362 wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6363 } else {
6364 const LO actualLclColInd = lclColInds(offset);
6365 const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6366 if (actualGblColInd != gblColInd) {
6367 allOffsetsCorrect = false;
6368 wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6369 }
6370 }
6371 }
6372 } // debug_
6373 }
6374 else { // either row is empty, or something went wrong w/ getRowInfo()
6375 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6376 allDiagEntriesFound = false;
6377 }
6378 } // whether lclColInd is a valid local column index
6379 } // for each local row
6380 // DEEP_COPY REVIEW - NOT TESTED
6381 Kokkos::deep_copy (offsets, offsets_h);
6382 } // whether the graph is fill complete
6383
6384 if (verbose && wrongOffsets.size () != 0) {
6385 std::ostringstream os;
6386 os << *prefix << "Wrong offsets: [";
6387 for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6388 os << "(" << wrongOffsets[k].first << ","
6389 << wrongOffsets[k].second << ")";
6390 if (k + 1 < wrongOffsets.size ()) {
6391 os << ", ";
6392 }
6393 }
6394 os << "]" << endl;
6395 std::cerr << os.str();
6396 }
6397
6398 if (debug_) {
6399 using Teuchos::reduceAll;
6400 using std::endl;
6401 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6402 const bool localSuccess =
6403 allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6404 const int numResults = 5;
6405 int lclResults[5];
6406 lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6407 lclResults[1] = allDiagEntriesFound ? 1 : 0;
6408 lclResults[2] = allOffsetsCorrect ? 1 : 0;
6409 lclResults[3] = noOtherWeirdness ? 1 : 0;
6410 // min-all-reduce will compute least rank of all the processes
6411 // that didn't succeed.
6412 lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6413
6414 int gblResults[5];
6415 gblResults[0] = 0;
6416 gblResults[1] = 0;
6417 gblResults[2] = 0;
6418 gblResults[3] = 0;
6419 gblResults[4] = 0;
6420 reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6421 numResults, lclResults, gblResults);
6422
6423 if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6424 || gblResults[3] != 1) {
6425 std::ostringstream os; // build error message
6426 os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6427 "possibly among others): " << endl;
6428 if (gblResults[0] == 0) {
6429 os << " - The column Map does not contain at least one diagonal entry "
6430 "of the graph." << endl;
6431 }
6432 if (gblResults[1] == 0) {
6433 os << " - On one or more processes, some row does not contain a "
6434 "diagonal entry." << endl;
6435 }
6436 if (gblResults[2] == 0) {
6437 os << " - On one or more processes, some offsets are incorrect."
6438 << endl;
6439 }
6440 if (gblResults[3] == 0) {
6441 os << " - One or more processes had some other error."
6442 << endl;
6443 }
6444 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6445 }
6446 } // debug_
6447 }
6448
6449 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6450 void
6452 getLocalOffRankOffsets (offset_device_view_type& offsets) const
6453 {
6454 using std::endl;
6455 const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6456 const bool verbose = verbose_;
6457
6458 std::unique_ptr<std::string> prefix;
6459 if (verbose) {
6460 prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6461 std::ostringstream os;
6462 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6463 << endl;
6464 std::cerr << os.str();
6465 }
6466
6467 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6468 (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6469 // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6470
6471 const size_t lclNumRows = this->getLocalNumRows ();
6472
6473 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) {
6474 offsets = k_offRankOffsets_;
6475 return;
6476 }
6477 haveLocalOffRankOffsets_ = false;
6478 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1);
6479 offsets = k_offRankOffsets_;
6480
6481 const map_type& colMap = * (this->getColMap ());
6482 const map_type& domMap = * (this->getDomainMap ());
6483
6484 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6485 // the subset of Map functionality that we need below.
6486 auto lclColMap = colMap.getLocalMap ();
6487 auto lclDomMap = domMap.getLocalMap ();
6488
6489 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6490 // setup, at least on the host. For CUDA, we have to use LocalMap
6491 // (that comes from each of the two Maps).
6492
6493 TEUCHOS_ASSERT(this->isSorted ());
6494 if (isFillComplete ()) {
6495 auto lclGraph = this->getLocalGraphDevice ();
6496 ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6497 lclColMap, lclDomMap,
6498 lclGraph);
6499 haveLocalOffRankOffsets_ = true;
6500 }
6501 }
6502
6503 namespace { // (anonymous)
6504
6505 // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6506 // below). The point is to avoid the deep copy between the input
6507 // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6508 // can't use UVM to avoid the deep copy with CUDA, because the
6509 // ArrayRCP is a host pointer, while the input to the graph's
6510 // getLocalDiagOffsets method is a device pointer. Assigning a
6511 // host pointer to a device pointer is incorrect unless the host
6512 // pointer points to host pinned memory. The goal is to get rid
6513 // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6514 // copy for backwards compatibility.
6515 //
6516 // We have to use template magic because
6517 // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6518 // if device_type::memory_space is not Kokkos::HostSpace (as is
6519 // the case with CUDA).
6520
6521 template<class DeviceType,
6522 const bool memSpaceIsHostSpace =
6523 std::is_same<typename DeviceType::memory_space,
6524 Kokkos::HostSpace>::value>
6525 struct HelpGetLocalDiagOffsets {};
6526
6527 template<class DeviceType>
6528 struct HelpGetLocalDiagOffsets<DeviceType, true> {
6529 typedef DeviceType device_type;
6530 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6531 Kokkos::MemoryUnmanaged> device_offsets_type;
6532 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6533 Kokkos::MemoryUnmanaged> host_offsets_type;
6534
6535 static device_offsets_type
6536 getDeviceOffsets (const host_offsets_type& hostOffsets)
6537 {
6538 // Host and device are the same; no need to allocate a
6539 // temporary device View.
6540 return hostOffsets;
6541 }
6542
6543 static void
6544 copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6545 const device_offsets_type& /* deviceOffsets */)
6546 { /* copy back not needed; host and device are the same */ }
6547 };
6548
6549 template<class DeviceType>
6550 struct HelpGetLocalDiagOffsets<DeviceType, false> {
6551 typedef DeviceType device_type;
6552 // We have to do a deep copy, since host memory space != device
6553 // memory space. Thus, the device View is managed (we need to
6554 // allocate a temporary device View).
6555 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6556 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6557 Kokkos::MemoryUnmanaged> host_offsets_type;
6558
6559 static device_offsets_type
6560 getDeviceOffsets (const host_offsets_type& hostOffsets)
6561 {
6562 // Host memory space != device memory space, so we must
6563 // allocate a temporary device View for the graph.
6564 return device_offsets_type ("offsets", hostOffsets.extent (0));
6565 }
6566
6567 static void
6568 copyBackIfNeeded (const host_offsets_type& hostOffsets,
6569 const device_offsets_type& deviceOffsets)
6570 {
6571 // DEEP_COPY REVIEW - NOT TESTED
6572 Kokkos::deep_copy (hostOffsets, deviceOffsets);
6573 }
6574 };
6575 } // namespace (anonymous)
6576
6577
6578 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6579 void
6581 getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6582 {
6583 typedef LocalOrdinal LO;
6584 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6585 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6586 (! this->hasColMap (), std::runtime_error,
6587 "The graph does not yet have a column Map.");
6588 const LO myNumRows = static_cast<LO> (this->getLocalNumRows ());
6589 if (static_cast<LO> (offsets.size ()) != myNumRows) {
6590 // NOTE (mfh 21 Jan 2016) This means that the method does not
6591 // satisfy the strong exception guarantee (no side effects
6592 // unless successful).
6593 offsets.resize (myNumRows);
6594 }
6595
6596 // mfh 21 Jan 2016: This method unfortunately takes a
6597 // Teuchos::ArrayRCP, which is host memory. The graph wants a
6598 // device pointer. We can't access host memory from the device;
6599 // that's the wrong direction for UVM. (It's the right direction
6600 // for inefficient host pinned memory, but we don't want to use
6601 // that here.) Thus, if device memory space != host memory space,
6602 // we allocate and use a temporary device View to get the offsets.
6603 // If the two spaces are equal, the template magic makes the deep
6604 // copy go away.
6605 typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6606 typedef typename helper_type::host_offsets_type host_offsets_type;
6607 // Unmanaged host View that views the output array.
6608 host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6609 // Allocate temp device View if host != device, else reuse host array.
6610 auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6611 // NOT recursion; this calls the overload that takes a device View.
6612 this->getLocalDiagOffsets (deviceOffsets);
6613 helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6614 }
6615
6616 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6617 bool
6622
6623 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6624 void
6627 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6628 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6629 const Teuchos::RCP<const map_type>& domainMap,
6630 const Teuchos::RCP<const map_type>& rangeMap,
6631 const Teuchos::RCP<Teuchos::ParameterList>& params) const
6632 {
6637 using Teuchos::ArrayRCP;
6638 using Teuchos::ArrayView;
6639 using Teuchos::Comm;
6640 using Teuchos::ParameterList;
6641 using Teuchos::rcp;
6642 using Teuchos::RCP;
6643#ifdef HAVE_TPETRA_MMM_TIMINGS
6644 using std::string;
6645 using Teuchos::TimeMonitor;
6646#endif
6647
6648 using LO = LocalOrdinal;
6649 using GO = GlobalOrdinal;
6650 using NT = node_type;
6651 using this_CRS_type = CrsGraph<LO, GO, NT>;
6652 using ivector_type = Vector<int, LO, GO, NT>;
6653
6654 const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6655
6656#ifdef HAVE_TPETRA_MMM_TIMINGS
6657 string label;
6658 if(!params.is_null()) label = params->get("Timer Label", label);
6659 string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6660 RCP<TimeMonitor> MM =
6661 rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6662#endif
6663
6664 // Make sure that the input argument rowTransfer is either an
6665 // Import or an Export. Import and Export are the only two
6666 // subclasses of Transfer that we defined, but users might
6667 // (unwisely, for now at least) decide to implement their own
6668 // subclasses. Exclude this possibility.
6669 const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6670 const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6671 TEUCHOS_TEST_FOR_EXCEPTION(
6672 xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6673 prefix << "The 'rowTransfer' input argument must be either an Import or "
6674 "an Export, and its template parameters must match the corresponding "
6675 "template parameters of the CrsGraph.");
6676
6677 // Make sure that the input argument domainTransfer is either an
6678 // Import or an Export. Import and Export are the only two
6679 // subclasses of Transfer that we defined, but users might
6680 // (unwisely, for now at least) decide to implement their own
6681 // subclasses. Exclude this possibility.
6682 Teuchos::RCP<const import_type> xferDomainAsImport =
6683 Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6684 Teuchos::RCP<const export_type> xferDomainAsExport =
6685 Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6686
6687 if(! domainTransfer.is_null()) {
6688
6689 TEUCHOS_TEST_FOR_EXCEPTION(
6690 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6691 prefix << "The 'domainTransfer' input argument must be either an "
6692 "Import or an Export, and its template parameters must match the "
6693 "corresponding template parameters of the CrsGraph.");
6694
6695 TEUCHOS_TEST_FOR_EXCEPTION(
6696 ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
6697 (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
6698 ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
6699 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6700 "must be of the same type (either Import or Export).");
6701
6702 TEUCHOS_TEST_FOR_EXCEPTION(
6703 ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
6704 (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
6705 ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
6706 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6707 "must be of the same type (either Import or Export).");
6708
6709 } // domainTransfer != null
6710
6711
6712 // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6713 // if the source Map is not distributed but the target Map is?
6714 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6715
6716 //
6717 // Get the caller's parameters
6718 //
6719
6720 bool reverseMode = false; // Are we in reverse mode?
6721 bool restrictComm = false; // Do we need to restrict the communicator?
6722 RCP<ParameterList> graphparams; // parameters for the destination graph
6723 if (! params.is_null()) {
6724 reverseMode = params->get("Reverse Mode", reverseMode);
6725 restrictComm = params->get("Restrict Communicator", restrictComm);
6726 graphparams = sublist(params, "CrsGraph");
6727 }
6728
6729 // Get the new domain and range Maps. We need some of them for error
6730 // checking, now that we have the reverseMode parameter.
6731 RCP<const map_type> MyRowMap = reverseMode ?
6732 rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6733 RCP<const map_type> MyColMap; // create this below
6734 RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
6735 RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
6736 RCP<const map_type> BaseRowMap = MyRowMap;
6737 RCP<const map_type> BaseDomainMap = MyDomainMap;
6738
6739 // If the user gave us a nonnull destGraph, then check whether it's
6740 // "pristine." That means that it has no entries.
6741 //
6742 // FIXME (mfh 15 May 2014) If this is not true on all processes,
6743 // then this exception test may hang. It would be better to
6744 // forward an error flag to the next communication phase.
6745 if (! destGraph.is_null()) {
6746 // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6747 // whether a graph or matrix has no entries on the calling
6748 // process, is that it is neither locally nor globally indexed.
6749 // This may change eventually with the Kokkos refactor version
6750 // of Tpetra, so it would be better just to check the quantity
6751 // of interest directly. Note that with the Kokkos refactor
6752 // version of Tpetra, asking for the total number of entries in
6753 // a graph or matrix that is not fill complete might require
6754 // computation (kernel launch), since it is not thread scalable
6755 // to update a count every time an entry is inserted.
6756 const bool NewFlag =
6757 ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
6758 TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
6759 prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6760 "if its graph is empty (neither locally nor globally indexed).");
6761
6762 // FIXME (mfh 15 May 2014) At some point, we want to change
6763 // graphs and matrices so that their DistObject Map
6764 // (this->getMap()) may differ from their row Map. This will
6765 // make redistribution for 2-D distributions more efficient. I
6766 // hesitate to change this check, because I'm not sure how much
6767 // the code here depends on getMap() and getRowMap() being the
6768 // same.
6769 TEUCHOS_TEST_FOR_EXCEPTION(
6770 ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6771 prefix << "The (row) Map of the input argument 'destGraph' is not the "
6772 "same as the (row) Map specified by the input argument 'rowTransfer'.");
6773
6774 TEUCHOS_TEST_FOR_EXCEPTION(
6775 ! destGraph->checkSizes(*this), std::invalid_argument,
6776 prefix << "You provided a nonnull destination graph, but checkSizes() "
6777 "indicates that it is not a legal legal target for redistribution from "
6778 "the source graph (*this). This may mean that they do not have the "
6779 "same dimensions.");
6780 }
6781
6782 // If forward mode (the default), then *this's (row) Map must be
6783 // the same as the source Map of the Transfer. If reverse mode,
6784 // then *this's (row) Map must be the same as the target Map of
6785 // the Transfer.
6786 //
6787 // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6788 // and matrices so that their DistObject Map (this->getMap()) may
6789 // differ from their row Map. This will make redistribution for
6790 // 2-D distributions more efficient. I hesitate to change this
6791 // check, because I'm not sure how much the code here depends on
6792 // getMap() and getRowMap() being the same.
6793 TEUCHOS_TEST_FOR_EXCEPTION(
6794 ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6795 std::invalid_argument, prefix <<
6796 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6797
6798 TEUCHOS_TEST_FOR_EXCEPTION(
6799 ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6800 std::invalid_argument, prefix <<
6801 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6802
6803 // checks for domainTransfer
6804 TEUCHOS_TEST_FOR_EXCEPTION(
6805 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6806 std::invalid_argument,
6807 prefix << "The target map of the 'domainTransfer' input argument must be "
6808 "the same as the rebalanced domain map 'domainMap'");
6809
6810 TEUCHOS_TEST_FOR_EXCEPTION(
6811 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6812 std::invalid_argument,
6813 prefix << "The source map of the 'domainTransfer' input argument must be "
6814 "the same as the rebalanced domain map 'domainMap'");
6815
6816 // The basic algorithm here is:
6817 //
6818 // 1. Call the moral equivalent of "Distor.do" to handle the import.
6819 // 2. Copy all the Imported and Copy/Permuted data into the raw
6820 // CrsGraph pointers, still using GIDs.
6821 // 3. Call an optimized version of MakeColMap that avoids the
6822 // Directory lookups (since the importer knows who owns all the
6823 // GIDs) AND reindexes to LIDs.
6824 // 4. Call expertStaticFillComplete()
6825
6826 // Get information from the Importer
6827 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6828 ArrayView<const LO> ExportLIDs = reverseMode ?
6829 rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6830 ArrayView<const LO> RemoteLIDs = reverseMode ?
6831 rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6832 ArrayView<const LO> PermuteToLIDs = reverseMode ?
6833 rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6834 ArrayView<const LO> PermuteFromLIDs = reverseMode ?
6835 rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6836 Distributor& Distor = rowTransfer.getDistributor();
6837
6838 // Owning PIDs
6839 Teuchos::Array<int> SourcePids;
6840 Teuchos::Array<int> TargetPids;
6841 int MyPID = getComm()->getRank();
6842
6843 // Temp variables for sub-communicators
6844 RCP<const map_type> ReducedRowMap, ReducedColMap,
6845 ReducedDomainMap, ReducedRangeMap;
6846 RCP<const Comm<int> > ReducedComm;
6847
6848 // If the user gave us a null destGraph, then construct the new
6849 // destination graph. We will replace its column Map later.
6850 if (destGraph.is_null()) {
6851 destGraph = rcp(new this_CRS_type(MyRowMap, 0, graphparams));
6852 }
6853
6854 /***************************************************/
6855 /***** 1) First communicator restriction phase ****/
6856 /***************************************************/
6857 if (restrictComm) {
6858 ReducedRowMap = MyRowMap->removeEmptyProcesses();
6859 ReducedComm = ReducedRowMap.is_null() ?
6860 Teuchos::null :
6861 ReducedRowMap->getComm();
6862 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6863
6864 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
6865 ReducedRowMap :
6866 MyDomainMap->replaceCommWithSubset(ReducedComm);
6867 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
6868 ReducedRowMap :
6869 MyRangeMap->replaceCommWithSubset(ReducedComm);
6870
6871 // Reset the "my" maps
6872 MyRowMap = ReducedRowMap;
6873 MyDomainMap = ReducedDomainMap;
6874 MyRangeMap = ReducedRangeMap;
6875
6876 // Update my PID, if we've restricted the communicator
6877 if (! ReducedComm.is_null()) {
6878 MyPID = ReducedComm->getRank();
6879 }
6880 else {
6881 MyPID = -2; // For debugging
6882 }
6883 }
6884 else {
6885 ReducedComm = MyRowMap->getComm();
6886 }
6887
6888 /***************************************************/
6889 /***** 2) From Tpera::DistObject::doTransfer() ****/
6890 /***************************************************/
6891#ifdef HAVE_TPETRA_MMM_TIMINGS
6892 MM = Teuchos::null;
6893 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
6894#endif
6895 // Get the owning PIDs
6896 RCP<const import_type> MyImporter = getImporter();
6897
6898 // check whether domain maps of source graph and base domain map is the same
6899 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6900
6901 if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
6902 // Same domain map as source graph
6903 //
6904 // NOTE: This won't work for restrictComm (because the Import
6905 // doesn't know the restricted PIDs), though writing an
6906 // optimized version for that case would be easy (Import an
6907 // IntVector of the new PIDs). Might want to add this later.
6908 Import_Util::getPids(*MyImporter, SourcePids, false);
6909 }
6910 else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
6911 // Same domain map as source graph (restricted communicator)
6912 // We need one import from the domain to the column map
6913 ivector_type SourceDomain_pids(getDomainMap(),true);
6914 ivector_type SourceCol_pids(getColMap());
6915 // SourceDomain_pids contains the restricted pids
6916 SourceDomain_pids.putScalar(MyPID);
6917
6918 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6919 SourcePids.resize(getColMap()->getLocalNumElements());
6920 SourceCol_pids.get1dCopy(SourcePids());
6921 }
6922 else if (MyImporter.is_null() && bSameDomainMap) {
6923 // Graph has no off-process entries
6924 SourcePids.resize(getColMap()->getLocalNumElements());
6925 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6926 }
6927 else if ( ! MyImporter.is_null() &&
6928 ! domainTransfer.is_null() ) {
6929 // general implementation for rectangular matrices with
6930 // domain map different than SourceGraph domain map.
6931 // User has to provide a DomainTransfer object. We need
6932 // to communications (import/export)
6933
6934 // TargetDomain_pids lives on the rebalanced new domain map
6935 ivector_type TargetDomain_pids(domainMap);
6936 TargetDomain_pids.putScalar(MyPID);
6937
6938 // SourceDomain_pids lives on the non-rebalanced old domain map
6939 ivector_type SourceDomain_pids(getDomainMap());
6940
6941 // SourceCol_pids lives on the non-rebalanced old column map
6942 ivector_type SourceCol_pids(getColMap());
6943
6944 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
6945 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6946 }
6947 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
6948 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6949 }
6950 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
6951 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6952 }
6953 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
6954 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6955 }
6956 else {
6957 TEUCHOS_TEST_FOR_EXCEPTION(
6958 true, std::logic_error,
6959 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6960 }
6961 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6962 SourcePids.resize(getColMap()->getLocalNumElements());
6963 SourceCol_pids.get1dCopy(SourcePids());
6964 }
6965 else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6966 getDomainMap()->isSameAs(*getRowMap())) {
6967 // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
6968 ivector_type TargetRow_pids(domainMap);
6969 ivector_type SourceRow_pids(getRowMap());
6970 ivector_type SourceCol_pids(getColMap());
6971
6972 TargetRow_pids.putScalar(MyPID);
6973 if (! reverseMode && xferAsImport != nullptr) {
6974 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
6975 }
6976 else if (reverseMode && xferAsExport != nullptr) {
6977 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
6978 }
6979 else if (! reverseMode && xferAsExport != nullptr) {
6980 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
6981 }
6982 else if (reverseMode && xferAsImport != nullptr) {
6983 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
6984 }
6985 else {
6986 TEUCHOS_TEST_FOR_EXCEPTION(
6987 true, std::logic_error,
6988 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6989 }
6990 SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
6991 SourcePids.resize(getColMap()->getLocalNumElements());
6992 SourceCol_pids.get1dCopy(SourcePids());
6993 }
6994 else {
6995 TEUCHOS_TEST_FOR_EXCEPTION(
6996 true, std::invalid_argument,
6997 prefix << "This method only allows either domainMap == getDomainMap(), "
6998 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
6999 }
7000
7001 // Tpetra-specific stuff
7002 size_t constantNumPackets = destGraph->constantNumberOfPackets();
7003 if (constantNumPackets == 0) {
7004 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7005 RemoteLIDs.size());
7006 }
7007 else {
7008 // There are a constant number of packets per element. We
7009 // already know (from the number of "remote" (incoming)
7010 // elements) how many incoming elements we expect, so we can
7011 // resize the buffer accordingly.
7012 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7013 destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7014 }
7015
7016 {
7017 // packAndPrepare* methods modify numExportPacketsPerLID_.
7018 destGraph->numExportPacketsPerLID_.modify_host();
7019 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7020 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7021
7022 // Pack & Prepare w/ owning PIDs
7023 packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7024 numExportPacketsPerLID, ExportLIDs,
7025 SourcePids, constantNumPackets);
7026 }
7027
7028 // Do the exchange of remote data.
7029#ifdef HAVE_TPETRA_MMM_TIMINGS
7030 MM = Teuchos::null;
7031 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7032#endif
7033
7034 if (communication_needed) {
7035 if (reverseMode) {
7036 if (constantNumPackets == 0) { // variable number of packets per LID
7037 // Make sure that host has the latest version, since we're
7038 // using the version on host. If host has the latest
7039 // version, syncing to host does nothing.
7040 destGraph->numExportPacketsPerLID_.sync_host();
7041 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7042 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7043 destGraph->numImportPacketsPerLID_.sync_host();
7044 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7045 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7046
7047 Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7048 destGraph->numImportPacketsPerLID_.view_host());
7049 size_t totalImportPackets = 0;
7050 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7051 totalImportPackets += numImportPacketsPerLID[i];
7052 }
7053
7054 // Reallocation MUST go before setting the modified flag,
7055 // because it may clear out the flags.
7056 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7057 destGraph->imports_.modify_host();
7058 auto hostImports = destGraph->imports_.view_host();
7059 // This is a legacy host pack/unpack path, so use the host
7060 // version of exports_.
7061 destGraph->exports_.sync_host();
7062 auto hostExports = destGraph->exports_.view_host();
7063 Distor.doReversePostsAndWaits(hostExports,
7064 numExportPacketsPerLID,
7065 hostImports,
7066 numImportPacketsPerLID);
7067 }
7068 else { // constant number of packets per LI
7069 destGraph->imports_.modify_host();
7070 auto hostImports = destGraph->imports_.view_host();
7071 // This is a legacy host pack/unpack path, so use the host
7072 // version of exports_.
7073 destGraph->exports_.sync_host();
7074 auto hostExports = destGraph->exports_.view_host();
7075 Distor.doReversePostsAndWaits(hostExports,
7076 constantNumPackets,
7077 hostImports);
7078 }
7079 }
7080 else { // forward mode (the default)
7081 if (constantNumPackets == 0) { // variable number of packets per LID
7082 // Make sure that host has the latest version, since we're
7083 // using the version on host. If host has the latest
7084 // version, syncing to host does nothing.
7085 destGraph->numExportPacketsPerLID_.sync_host();
7086 destGraph->numImportPacketsPerLID_.sync_host();
7087 Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7088 destGraph->numImportPacketsPerLID_.view_host());
7089
7090 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7091 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7092 size_t totalImportPackets = 0;
7093 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7094 totalImportPackets += numImportPacketsPerLID[i];
7095 }
7096
7097 // Reallocation MUST go before setting the modified flag,
7098 // because it may clear out the flags.
7099 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7100 destGraph->imports_.modify_host();
7101 auto hostImports = destGraph->imports_.view_host();
7102 // This is a legacy host pack/unpack path, so use the host
7103 // version of exports_.
7104 destGraph->exports_.sync_host();
7105 auto hostExports = destGraph->exports_.view_host();
7106 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7107 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7108 Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
7109 }
7110 else { // constant number of packets per LID
7111 destGraph->imports_.modify_host();
7112 auto hostImports = destGraph->imports_.view_host();
7113 // This is a legacy host pack/unpack path, so use the host
7114 // version of exports_.
7115 destGraph->exports_.sync_host();
7116 auto hostExports = destGraph->exports_.view_host();
7117 Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
7118 }
7119 }
7120 }
7121
7122 /*********************************************************************/
7123 /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7124 /*********************************************************************/
7125
7126#ifdef HAVE_TPETRA_MMM_TIMINGS
7127 MM = Teuchos::null;
7128 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7129#endif
7130
7131 // Backwards compatibility measure. We'll use this again below.
7132 destGraph->numImportPacketsPerLID_.sync_host();
7133 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7134 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7135 destGraph->imports_.sync_host();
7136 Teuchos::ArrayView<const packet_type> hostImports =
7137 getArrayViewFromDualView(destGraph->imports_);
7138 size_t mynnz =
7139 unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7140 numImportPacketsPerLID,
7141 constantNumPackets, INSERT,
7142 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7143 size_t N = BaseRowMap->getLocalNumElements();
7144
7145 // Allocations
7146 ArrayRCP<size_t> CSR_rowptr(N+1);
7147 ArrayRCP<GO> CSR_colind_GID;
7148 ArrayRCP<LO> CSR_colind_LID;
7149 CSR_colind_GID.resize(mynnz);
7150
7151 // If LO and GO are the same, we can reuse memory when
7152 // converting the column indices from global to local indices.
7153 if (typeid(LO) == typeid(GO)) {
7154 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7155 }
7156 else {
7157 CSR_colind_LID.resize(mynnz);
7158 }
7159
7160 // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7161 // unpackAndCombine method on a "CrsArrays" object? This passing
7162 // in a huge list of arrays is icky. Can't we have a bit of an
7163 // abstraction? Implementing a concrete DistObject subclass only
7164 // takes five methods.
7165 unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7166 numImportPacketsPerLID, constantNumPackets,
7167 INSERT, NumSameIDs, PermuteToLIDs,
7168 PermuteFromLIDs, N, mynnz, MyPID,
7169 CSR_rowptr(), CSR_colind_GID(),
7170 SourcePids(), TargetPids);
7171
7172 /**************************************************************/
7173 /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7174 /**************************************************************/
7175#ifdef HAVE_TPETRA_MMM_TIMINGS
7176 MM = Teuchos::null;
7177 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7178#endif
7179 // Call an optimized version of makeColMap that avoids the
7180 // Directory lookups (since the Import object knows who owns all
7181 // the GIDs).
7182 Teuchos::Array<int> RemotePids;
7184 CSR_colind_LID(),
7185 CSR_colind_GID(),
7186 BaseDomainMap,
7187 TargetPids, RemotePids,
7188 MyColMap);
7189
7190 /*******************************************************/
7191 /**** 4) Second communicator restriction phase ****/
7192 /*******************************************************/
7193 if (restrictComm) {
7194 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7195 ReducedRowMap :
7196 MyColMap->replaceCommWithSubset(ReducedComm);
7197 MyColMap = ReducedColMap; // Reset the "my" maps
7198 }
7199
7200 // Replace the col map
7201 destGraph->replaceColMap(MyColMap);
7202
7203 // Short circuit if the processor is no longer in the communicator
7204 //
7205 // NOTE: Epetra replaces modifies all "removed" processes so they
7206 // have a dummy (serial) Map that doesn't touch the original
7207 // communicator. Duplicating that here might be a good idea.
7208 if (ReducedComm.is_null()) {
7209 return;
7210 }
7211
7212 /***************************************************/
7213 /**** 5) Sort ****/
7214 /***************************************************/
7215 if ((! reverseMode && xferAsImport != nullptr) ||
7216 (reverseMode && xferAsExport != nullptr)) {
7217 Import_Util::sortCrsEntries(CSR_rowptr(),
7218 CSR_colind_LID());
7219 }
7220 else if ((! reverseMode && xferAsExport != nullptr) ||
7221 (reverseMode && xferAsImport != nullptr)) {
7223 CSR_colind_LID());
7224 if (CSR_rowptr[N] != mynnz) {
7225 CSR_colind_LID.resize(CSR_rowptr[N]);
7226 }
7227 }
7228 else {
7229 TEUCHOS_TEST_FOR_EXCEPTION(
7230 true, std::logic_error,
7231 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7232 }
7233 /***************************************************/
7234 /**** 6) Reset the colmap and the arrays ****/
7235 /***************************************************/
7236
7237 // Call constructor for the new graph (restricted as needed)
7238 //
7239 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7240
7241 /***************************************************/
7242 /**** 7) Build Importer & Call ESFC ****/
7243 /***************************************************/
7244 // Pre-build the importer using the existing PIDs
7245 Teuchos::ParameterList esfc_params;
7246#ifdef HAVE_TPETRA_MMM_TIMINGS
7247 MM = Teuchos::null;
7248 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7249#endif
7250 RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7251#ifdef HAVE_TPETRA_MMM_TIMINGS
7252 MM = Teuchos::null;
7253 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7254
7255 esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7256#endif
7257 if(!params.is_null())
7258 esfc_params.set("compute global constants",params->get("compute global constants",true));
7259
7260 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7261 MyImport, Teuchos::null, rcp(&esfc_params,false));
7262
7263 }
7264
7265 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7266 void
7268 importAndFillComplete(Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& destGraph,
7269 const import_type& importer,
7270 const Teuchos::RCP<const map_type>& domainMap,
7271 const Teuchos::RCP<const map_type>& rangeMap,
7272 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7273 {
7274 transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7275 }
7276
7277 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7278 void
7280 importAndFillComplete(Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& destGraph,
7281 const import_type& rowImporter,
7282 const import_type& domainImporter,
7283 const Teuchos::RCP<const map_type>& domainMap,
7284 const Teuchos::RCP<const map_type>& rangeMap,
7285 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7286 {
7287 transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7288 }
7289
7290 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7291 void
7293 exportAndFillComplete(Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& destGraph,
7294 const export_type& exporter,
7295 const Teuchos::RCP<const map_type>& domainMap,
7296 const Teuchos::RCP<const map_type>& rangeMap,
7297 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7298 {
7299 transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7300 }
7301
7302 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7303 void
7305 exportAndFillComplete(Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& destGraph,
7306 const export_type& rowExporter,
7307 const export_type& domainExporter,
7308 const Teuchos::RCP<const map_type>& domainMap,
7309 const Teuchos::RCP<const map_type>& rangeMap,
7310 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7311 {
7312 transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7313 }
7314
7315
7316 template<class LocalOrdinal, class GlobalOrdinal, class Node>
7317 void
7319 swap(CrsGraph<LocalOrdinal, GlobalOrdinal, Node>& graph)
7320 {
7321 std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
7322
7323 std::swap(graph.rowMap_, this->rowMap_);
7324 std::swap(graph.colMap_, this->colMap_);
7325 std::swap(graph.rangeMap_, this->rangeMap_);
7326 std::swap(graph.domainMap_, this->domainMap_);
7327
7328 std::swap(graph.importer_, this->importer_);
7329 std::swap(graph.exporter_, this->exporter_);
7330
7331 std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7332
7333 std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7334 std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7335
7336 std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7337
7338 std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
7339 std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
7340
7341 std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
7342 std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
7343 std::swap(graph.packedUnpackedRowPtrsMatch_, this->packedUnpackedRowPtrsMatch_);
7344
7345 std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
7346
7347 std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
7348 std::swap(graph.gblInds_wdv, this->gblInds_wdv);
7349 std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
7350
7351 std::swap(graph.storageStatus_, this->storageStatus_);
7352
7353 std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7354 std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7355 std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7356 std::swap(graph.fillComplete_, this->fillComplete_);
7357 std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7358 std::swap(graph.noRedundancies_, this->noRedundancies_);
7359 std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7360 std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7361 std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
7362
7363 std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7364
7365 std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7366 std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7367 std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7368 }
7369
7370
7371 template<class LocalOrdinal, class GlobalOrdinal, class Node>
7372 bool
7374 isIdenticalTo(const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> & graph) const
7375 {
7376 auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7377 bool output = true;
7378 output = m1.size() == m2.size() ? output : false;
7379 for(auto & it_m: m1)
7380 {
7381 size_t key = it_m.first;
7382 output = m2.find(key) != m2.end() ? output : false;
7383 if(output)
7384 {
7385 auto v1 = m1.find(key)->second;
7386 auto v2 = m2.find(key)->second;
7387 std::sort(v1.begin(), v1.end());
7388 std::sort(v2.begin(), v2.end());
7389
7390 output = v1.size() == v2.size() ? output : false;
7391 for(size_t i=0; output && i<v1.size(); i++)
7392 {
7393 output = v1[i]==v2[i] ? output : false;
7394 }
7395 }
7396 }
7397 return output;
7398 };
7399
7400 bool output = true;
7401
7402 output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7403 output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7404 output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7405 output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7406
7407 output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7408
7409 output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7410 output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7411
7412 output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7413
7414 output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7415
7416 output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7417 output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7418 output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7419 output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7420 output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7421 output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7422 output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7423 output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7424 output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7426
7427 // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7428 // nonlocals_ isa std::map<GO, std::vector<GO> >
7429 output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7430
7431 // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
7432 // - since this is a HostMirror type, it should be in host memory already
7433 output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7434 if(output && this->k_numAllocPerRow_.extent(0) > 0)
7435 {
7436 for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
7437 output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7438 }
7439
7440 // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
7441 // - since this is a HostMirror type, it should be in host memory already
7442 output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7443 if(output && this->k_numRowEntries_.extent(0) > 0)
7444 {
7445 for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7446 output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7447 }
7448
7449 // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7450 {
7451 auto rowPtrsThis = this->getRowPtrsUnpackedHost();
7452 auto rowPtrsGraph = graph.getRowPtrsUnpackedHost();
7453 output = rowPtrsThis .extent(0) == rowPtrsGraph.extent(0) ? output : false;
7454 for(size_t i=0; output && i< rowPtrsThis.extent(0); i++)
7455 output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7456 }
7457
7458 // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7459 output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7460 if(output && this->lclIndsUnpacked_wdv.extent(0) > 0)
7461 {
7462 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7463 auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7464 for(size_t i=0; output && i < indThis.extent(0); i++)
7465 output = indThis(i) == indGraph(i) ? output : false;
7466 }
7467
7468 // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7469 output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7470 if(output && this->gblInds_wdv.extent(0) > 0)
7471 {
7472 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7473 auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7474 for(size_t i=0; output && i<indtThis.extent(0); i++)
7475 output = indtThis(i) == indtGraph(i) ? output : false;
7476 }
7477
7478 // Check lclGraph_ isa
7479 // Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7480 // Kokkos::StaticCrsGraph has 3 data members in it:
7481 // Kokkos::View<size_type*, ...> row_map
7482 // (local_graph_device_type::row_map_type)
7483 // Kokkos::View<data_type*, ...> entries
7484 // (local_graph_device_type::entries_type)
7485 // Kokkos::View<size_type*, ...> row_block_offsets
7486 // (local_graph_device_type::row_block_type)
7487 // There is currently no Kokkos::StaticCrsGraph comparison function
7488 // that's built-in, so we will just compare
7489 // the three data items here. This can be replaced if Kokkos ever
7490 // puts in its own comparison routine.
7491 local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7492 local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7493
7494 output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7495 ? output : false;
7496 if(output && thisLclGraph.row_map.extent(0) > 0)
7497 {
7498 auto lclGraph_rowmap_host_this = thisLclGraph.row_map;
7499 auto lclGraph_rowmap_host_graph = graphLclGraph.row_map;
7500 for (size_t i=0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7501 output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i)
7502 ? output : false;
7503 }
7504
7505 output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7506 ? output : false;
7507 if(output && thisLclGraph.entries.extent(0) > 0)
7508 {
7509 auto lclGraph_entries_host_this = thisLclGraph.entries;
7510 auto lclGraph_entries_host_graph = graphLclGraph.entries;
7511 for (size_t i=0; output && i < lclGraph_entries_host_this.extent(0); i++)
7512 output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i)
7513 ? output : false;
7514 }
7515
7516 output =
7517 thisLclGraph.row_block_offsets.extent(0) ==
7518 graphLclGraph.row_block_offsets.extent(0) ? output : false;
7519 if(output && thisLclGraph.row_block_offsets.extent(0) > 0)
7520 {
7521 auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7522 auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7523 for (size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7524 output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i)
7525 ? output : false;
7526 }
7527
7528 // For Importer and Exporter, we don't need to explicitly check them since
7529 // they will be consistent with the maps.
7530 // Note: importer_ isa Teuchos::RCP<const import_type>
7531 // exporter_ isa Teuchos::RCP<const export_type>
7532
7533 return output;
7534 }
7535
7536
7537
7538} // namespace Tpetra
7539
7540//
7541// Explicit instantiation macros
7542//
7543// Must be expanded from within the Tpetra namespace!
7544//
7545
7546#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7547 template<> \
7548 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7549 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7550 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7551 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7552 CrsGraph<LO,GO,NODE>::node_type>& importer, \
7553 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7554 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7555 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7556 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7557 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7558 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7559 const Teuchos::RCP<Teuchos::ParameterList>& params);
7560
7561#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7562 template<> \
7563 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7564 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7565 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7566 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7567 CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7568 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7569 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7570 CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7571 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7572 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7573 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7574 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7575 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7576 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7577 const Teuchos::RCP<Teuchos::ParameterList>& params);
7578
7579
7580#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7581 template<> \
7582 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7583 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7584 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7585 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7586 CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7587 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7588 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7589 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7590 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7591 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7592 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7593 const Teuchos::RCP<Teuchos::ParameterList>& params);
7594
7595#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7596 template<> \
7597 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7598 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7599 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7600 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7601 CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7602 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7603 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7604 CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7605 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7606 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7607 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7608 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7609 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7610 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7611 const Teuchos::RCP<Teuchos::ParameterList>& params);
7612
7613
7614#define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7615 template class CrsGraph<LO, GO, NODE>; \
7616 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7617 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7618 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7619 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
7620
7621
7622#endif // TPETRA_CRSGRAPH_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Functions for manipulating CRS arrays.
Declaration of a function that prints strings from each process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Utility functions for packing and unpacking sparse matrix entries.
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowPointers, const Teuchos::ArrayView< LocalOrdinal > &columnIndices_LID, const Teuchos::ArrayView< GlobalOrdinal > &columnIndices_GID, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::ArrayView< const int > &owningPids, Teuchos::Array< int > &remotePids, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap)
lowCommunicationMakeColMapAndReindex
void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
void getPids(const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer, Teuchos::Array< int > &pids, bool use_minus_one_for_local)
Like getPidGidPairs, but just gets the PIDs, ordered by the column Map.
Stand-alone utility functions and macros.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
Details::EStorageStatus storageStatus_
Status of the graph's storage, when not in a fill-complete state.
::Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > import_type
The Import specialization used by this class.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
GlobalOrdinal global_ordinal_type
The type of the graph's global indices.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
local_inds_device_view_type getLocalIndicesDevice() const
Get a device view of the packed column indicies.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void globalAssemble()
Communicate nonlocal contributions to other processes.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows.
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
const row_ptrs_device_view_type & getRowPtrsUnpackedDevice() const
Get the unpacked row pointers on device.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
bool hasColMap() const override
Whether the graph has a column Map.
LocalOrdinal local_ordinal_type
The type of the graph's local indices.
std::string description() const override
Return a one-line human-readable description of this object.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
typename row_graph_type::local_inds_device_view_type local_inds_device_view_type
The Kokkos::View type for views of local ordinals on device and host.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
typename Node::device_type device_type
This class' Kokkos device type.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
typename local_graph_device_type::HostMirror local_graph_host_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
row_ptrs_host_view_type getLocalRowPtrsHost() const
Get a host view of the packed row offsets.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph's data directly, using 1-D storage.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
local_inds_host_view_type getLocalIndicesHost() const
Get a host view of the packed column indicies.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
::Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > map_type
The Map specialization used by this class.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row's global column indices.
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph's column Map.
bool haveGlobalConstants_
Whether all processes have computed global constants.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
typename device_type::execution_space execution_space
This class' Kokkos execution space.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
row_ptrs_device_view_type getLocalRowPtrsDevice() const
Get a device view of the packed row offsets.
void getLocalRowCopy(local_ordinal_type gblRow, nonconst_local_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
global_size_t globalNumEntries_
Global number of entries in the graph.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
::Tpetra::Export< LocalOrdinal, GlobalOrdinal, Node > export_type
The Export specialization used by this class.
size_t getLocalNumEntries() const override
The local number of entries in the graph.
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true Built during fillCo...
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
const row_ptrs_host_view_type & getRowPtrsPackedHost() const
Get the packed row pointers on host. Lazily make a copy from device.
size_t getLocalNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
Node node_type
This class' Kokkos Node type.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Kokkos::View< constsize_t *, device_type >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
offset_device_view_type k_offRankOffsets_
The offsets for off-rank entries.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
void setLocallyModified()
Report that we made a local modification to its structure.
size_t getLocalAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
const row_ptrs_device_view_type & getRowPtrsPackedDevice() const
Get the packed row pointers on device.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row's local column indices.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
size_t getLocalNumRows() const override
Returns the number of graph rows owned on the calling node.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph's current column Map with the given Map.
bool haveLocalConstants_
Whether this process has computed local constants.
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Sets up and executes a communication plan for a Tpetra DistObject.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
An abstract interface for graphs accessed by rows.
virtual bool isFillComplete() const =0
Whether fillComplete() has been called (without an intervening resumeFill()).
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes this graph's distribution of rows over processes.
virtual void getGlobalRowCopy(const GlobalOrdinal gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const =0
Get a copy of the global column indices in a given row of the graph.
virtual size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const =0
Returns the current number of entries on this node in the specified global row.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
Implementation details of Tpetra.
Nonmember function that computes a residual Computes R = B - A * X.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph's global column indices into local column indices.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
CombineMode
Rule for combining data in an Import or Export.
@ INSERT
Insert new values that don't currently exist.
Traits class for packing / unpacking data of type T.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.