3959 const char tfecfFuncName[] =
"replaceDomainMap: ";
3960 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3961 myGraph_.is_null (), std::runtime_error,
3962 "This method does not work if the matrix has a const graph. The whole "
3963 "idea of a const graph is that you are not allowed to change it, but this"
3964 " method necessarily must modify the graph, since the graph owns the "
3965 "matrix's domain Map and Import objects.");
3966 myGraph_->replaceDomainMap (newDomainMap);
3969 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3973 Teuchos::RCP<const import_type>& newImporter)
3975 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
3976 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3977 myGraph_.is_null (), std::runtime_error,
3978 "This method does not work if the matrix has a const graph. The whole "
3979 "idea of a const graph is that you are not allowed to change it, but this"
3980 " method necessarily must modify the graph, since the graph owns the "
3981 "matrix's domain Map and Import objects.");
3982 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3985 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3990 const char tfecfFuncName[] =
"replaceRangeMap: ";
3991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3992 myGraph_.is_null (), std::runtime_error,
3993 "This method does not work if the matrix has a const graph. The whole "
3994 "idea of a const graph is that you are not allowed to change it, but this"
3995 " method necessarily must modify the graph, since the graph owns the "
3996 "matrix's domain Map and Import objects.");
3997 myGraph_->replaceRangeMap (newRangeMap);
4000 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4004 Teuchos::RCP<const export_type>& newExporter)
4006 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
4007 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4008 myGraph_.is_null (), std::runtime_error,
4009 "This method does not work if the matrix has a const graph. The whole "
4010 "idea of a const graph is that you are not allowed to change it, but this"
4011 " method necessarily must modify the graph, since the graph owns the "
4012 "matrix's domain Map and Import objects.");
4013 myGraph_->replaceRangeMapAndExporter (newRangeMap, newExporter);
4016 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4020 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4021 const Teuchos::ArrayView<const Scalar>& values)
4023 using Teuchos::Array;
4024 typedef GlobalOrdinal GO;
4025 typedef typename Array<GO>::size_type size_type;
4027 const size_type numToInsert = indices.size ();
4030 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4031 Array<GO>& curRowInds = curRow.first;
4032 Array<Scalar>& curRowVals = curRow.second;
4033 const size_type newCapacity = curRowInds.size () + numToInsert;
4034 curRowInds.reserve (newCapacity);
4035 curRowVals.reserve (newCapacity);
4036 for (size_type k = 0; k < numToInsert; ++k) {
4037 curRowInds.push_back (indices[k]);
4038 curRowVals.push_back (values[k]);
4042 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4049 using Teuchos::Comm;
4050 using Teuchos::outArg;
4053 using Teuchos::REDUCE_MAX;
4054 using Teuchos::REDUCE_MIN;
4055 using Teuchos::reduceAll;
4057 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crs_matrix_type;
4059 typedef GlobalOrdinal GO;
4060 typedef typename Teuchos::Array<GO>::size_type size_type;
4061 const char tfecfFuncName[] =
"globalAssemble: ";
4062 ProfilingRegion regionGlobalAssemble (
"Tpetra::CrsMatrix::globalAssemble");
4064 const bool verbose = Behavior::verbose(
"CrsMatrix");
4065 std::unique_ptr<std::string> prefix;
4067 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4068 std::ostringstream os;
4069 os << *prefix <<
"nonlocals_.size()=" <<
nonlocals_.size()
4071 std::cerr << os.str();
4073 RCP<const Comm<int> > comm =
getComm ();
4075 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4076 (!
isFillActive (), std::runtime_error,
"Fill must be active before "
4077 "you may call this method.");
4079 const size_t myNumNonlocalRows =
nonlocals_.size ();
4086 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4087 int someoneHasNonlocalRows = 0;
4088 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4089 outArg (someoneHasNonlocalRows));
4090 if (someoneHasNonlocalRows == 0) {
4104 RCP<const map_type> nonlocalRowMap;
4105 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4107 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4108 size_type curPos = 0;
4110 ++mapIter, ++curPos) {
4111 myNonlocalGblRows[curPos] = mapIter->first;
4114 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4115 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4122 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4123 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4124 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4125 merge2 (gblCols_newEnd, vals_newEnd,
4126 gblCols.begin (), gblCols.end (),
4127 vals.begin (), vals.end ());
4128 gblCols.erase (gblCols_newEnd, gblCols.end ());
4129 vals.erase (vals_newEnd, vals.end ());
4130 numEntPerNonlocalRow[curPos] = gblCols.size ();
4141 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4143 auto iter = std::min_element (myNonlocalGblRows.begin (),
4144 myNonlocalGblRows.end ());
4145 if (iter != myNonlocalGblRows.end ()) {
4146 myMinNonlocalGblRow = *iter;
4149 GO gblMinNonlocalGblRow = 0;
4150 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4151 outArg (gblMinNonlocalGblRow));
4152 const GO indexBase = gblMinNonlocalGblRow;
4153 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4154 nonlocalRowMap = rcp (
new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4163 std::ostringstream os;
4164 os << *prefix <<
"Create nonlocal matrix" << endl;
4165 std::cerr << os.str();
4167 RCP<crs_matrix_type> nonlocalMatrix =
4168 rcp (
new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow ()));
4170 size_type curPos = 0;
4172 ++mapIter, ++curPos) {
4173 const GO gblRow = mapIter->first;
4175 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4176 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4178 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4191 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4193 int isLocallyComplete = 1;
4195 if (origRowMapIsOneToOne) {
4197 std::ostringstream os;
4198 os << *prefix <<
"Original row Map is 1-to-1" << endl;
4199 std::cerr << os.str();
4201 export_type exportToOrig (nonlocalRowMap, origRowMap);
4203 isLocallyComplete = 0;
4206 std::ostringstream os;
4207 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
4208 std::cerr << os.str();
4215 std::ostringstream os;
4216 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
4217 std::cerr << os.str();
4224 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4226 isLocallyComplete = 0;
4234 std::ostringstream os;
4235 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4237 std::cerr << os.str();
4239 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4241 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4247 std::ostringstream os;
4248 os << *prefix <<
"Free nonlocalMatrix" << endl;
4249 std::cerr << os.str();
4251 nonlocalMatrix = Teuchos::null;
4255 std::ostringstream os;
4256 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4257 std::cerr << os.str();
4259 import_type importToOrig (oneToOneRowMap, origRowMap);
4268 std::ostringstream os;
4269 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4270 std::cerr << os.str();
4282 int isGloballyComplete = 0;
4283 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4284 outArg (isGloballyComplete));
4285 TEUCHOS_TEST_FOR_EXCEPTION
4286 (isGloballyComplete != 1, std::runtime_error,
"On at least one process, "
4287 "you called insertGlobalValues with a global row index which is not in "
4288 "the matrix's row Map on any process in its communicator.");
4291 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4294 resumeFill (
const Teuchos::RCP<Teuchos::ParameterList>& params)
4297 myGraph_->resumeFill (params);
4302 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4306 return getCrsGraphRef ().haveGlobalConstants ();
4309 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4312 fillComplete (
const Teuchos::RCP<Teuchos::ParameterList>& params)
4314 const char tfecfFuncName[] =
"fillComplete(params): ";
4316 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4317 (this->
getCrsGraph ().is_null (), std::logic_error,
4318 "getCrsGraph() returns null. This should not happen at this point. "
4319 "Please report this bug to the Tpetra developers.");
4329 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap ();
4330 Teuchos::RCP<const map_type> domainMap = rangeMap;
4335 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4338 fillComplete (
const Teuchos::RCP<const map_type>& domainMap,
4339 const Teuchos::RCP<const map_type>& rangeMap,
4340 const Teuchos::RCP<Teuchos::ParameterList>& params)
4344 using Teuchos::ArrayRCP;
4348 const char tfecfFuncName[] =
"fillComplete: ";
4349 ProfilingRegion regionFillComplete
4350 (
"Tpetra::CrsMatrix::fillComplete");
4351 const bool verbose = Behavior::verbose(
"CrsMatrix");
4352 std::unique_ptr<std::string> prefix;
4354 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4355 std::ostringstream os;
4356 os << *prefix << endl;
4357 std::cerr << os.str ();
4360 "Tpetra::CrsMatrix::fillCompete",
4363 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4365 "Matrix fill state must be active (isFillActive() "
4366 "must be true) before you may call fillComplete().");
4367 const int numProcs = this->
getComm ()->getSize ();
4377 bool assertNoNonlocalInserts =
false;
4380 bool sortGhosts =
true;
4382 if (! params.is_null ()) {
4383 assertNoNonlocalInserts = params->get (
"No Nonlocal Changes",
4384 assertNoNonlocalInserts);
4385 if (params->isParameter (
"sort column map ghost gids")) {
4386 sortGhosts = params->get (
"sort column map ghost gids", sortGhosts);
4388 else if (params->isParameter (
"Sort column Map ghost GIDs")) {
4389 sortGhosts = params->get (
"Sort column Map ghost GIDs", sortGhosts);
4394 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4396 if (! this->myGraph_.is_null ()) {
4397 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4400 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4410 if (needGlobalAssemble) {
4414 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4416 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4417 "An invalid entry (i.e., with row index not in the row Map) must have "
4418 "been submitted to the CrsMatrix.");
4429#ifdef HAVE_TPETRA_DEBUG
4447 const bool domainMapsMatch =
4448 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4449 const bool rangeMapsMatch =
4450 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4452 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4453 (! domainMapsMatch, std::runtime_error,
4454 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4455 "The graph cannot be changed because it was given to the CrsMatrix "
4456 "constructor as const. You can fix this by passing in the graph's "
4457 "domain Map and range Map to the matrix's fillComplete call.");
4459 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4460 (! rangeMapsMatch, std::runtime_error,
4461 "The CrsMatrix's range Map does not match the graph's range Map. "
4462 "The graph cannot be changed because it was given to the CrsMatrix "
4463 "constructor as const. You can fix this by passing in the graph's "
4464 "domain Map and range Map to the matrix's fillComplete call.");
4477 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4480 Teuchos::Array<int> remotePIDs (0);
4481 const bool mustBuildColMap = ! this->
hasColMap ();
4482 if (mustBuildColMap) {
4483 this->myGraph_->makeColMap (remotePIDs);
4488 const std::pair<size_t, std::string> makeIndicesLocalResult =
4489 this->myGraph_->makeIndicesLocal(verbose);
4494 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4495 (makeIndicesLocalResult.first != 0, std::runtime_error,
4496 makeIndicesLocalResult.second);
4498 const bool sorted = this->myGraph_->isSorted ();
4499 const bool merged = this->myGraph_->isMerged ();
4505 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4511 const bool callGraphComputeGlobalConstants = params.get () ==
nullptr ||
4512 params->get (
"compute global constants",
true);
4513 if (callGraphComputeGlobalConstants) {
4514 this->myGraph_->computeGlobalConstants ();
4517 this->myGraph_->computeLocalConstants ();
4519 this->myGraph_->fillComplete_ =
true;
4520 this->myGraph_->checkInternalState ();
4528 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
4534 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4538 const Teuchos::RCP<const map_type> & rangeMap,
4539 const Teuchos::RCP<const import_type>& importer,
4540 const Teuchos::RCP<const export_type>& exporter,
4541 const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
4543#ifdef HAVE_TPETRA_MMM_TIMINGS
4545 if(!params.is_null())
4546 label = params->get(
"Timer Label",label);
4547 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
4548 using Teuchos::TimeMonitor;
4550 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4553 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4555 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
4556 "must be true) before calling fillComplete().");
4557 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4558 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
4561#ifdef HAVE_TPETRA_MMM_TIMINGS
4562 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4565 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4569#ifdef HAVE_TPETRA_MMM_TIMINGS
4570 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4581#ifdef HAVE_TPETRA_DEBUG
4582 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
isFillActive(), std::logic_error,
4583 ": We're at the end of fillComplete(), but isFillActive() is true. "
4584 "Please report this bug to the Tpetra developers.");
4585 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!
isFillComplete(), std::logic_error,
4586 ": We're at the end of fillComplete(), but isFillActive() is true. "
4587 "Please report this bug to the Tpetra developers.");
4590#ifdef HAVE_TPETRA_MMM_TIMINGS
4591 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4598 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4604 LocalOrdinal* beg = cols;
4605 LocalOrdinal* end = cols + rowLen;
4606 LocalOrdinal* newend = beg;
4608 LocalOrdinal* cur = beg + 1;
4612 while (cur != end) {
4613 if (*cur != *newend) {
4630 return newend - beg;
4633 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4639 typedef LocalOrdinal LO;
4640 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4641 host_execution_space;
4642 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4643 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4644 ProfilingRegion regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4646 if (! sorted || ! merged) {
4647 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4648 (this->
isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
4649 "\"static\" (const) graph, since the matrix does not own the graph.");
4650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4651 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
4652 "this matrix claims ! isStaticGraph(). "
4653 "Please report this bug to the Tpetra developers.");
4654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4656 "this method if the graph's storage has already been optimized. "
4657 "Please report this bug to the Tpetra developers.");
4661 size_t totalNumDups = 0;
4664 auto rowBegins_ = graph.rowPtrsUnpacked_host_;
4666 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4668 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
4669 [=] (
const LO lclRow,
size_t& numDups) {
4670 size_t rowBegin = rowBegins_(lclRow);
4671 size_t rowLen = rowLengths_(lclRow);
4672 LO* cols = cols_.data() + rowBegin;
4675 sort2 (cols, cols + rowLen, vals);
4679 rowLengths_(lclRow) = newRowLength;
4680 numDups += rowLen - newRowLength;
4693 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4704 using Teuchos::rcp_const_cast;
4705 using Teuchos::rcpFromRef;
4706 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4707 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4713 if (alpha ==
ZERO) {
4716 }
else if (beta != ONE) {
4730 RCP<const import_type> importer = this->
getGraph ()->getImporter ();
4731 RCP<const export_type> exporter = this->
getGraph ()->getExporter ();
4737 const bool Y_is_overwritten = (beta ==
ZERO);
4740 const bool Y_is_replicated =
4749 if (Y_is_replicated && this->
getComm ()->getRank () > 0) {
4756 RCP<const MV> X_colMap;
4757 if (importer.is_null ()) {
4767 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
4772 X_colMap = rcpFromRef (X_in);
4776 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply: Import");
4785 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
4786 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
4800 if (! exporter.is_null ()) {
4801 this->
localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha,
ZERO);
4803 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply: Export");
4809 if (Y_is_overwritten) {
4842 this->
localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4846 this->
localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4854 if (Y_is_replicated) {
4855 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
4860 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4865 const Teuchos::ETransp mode,
4870 using Teuchos::null;
4873 using Teuchos::rcp_const_cast;
4874 using Teuchos::rcpFromRef;
4875 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4878 if (alpha ==
ZERO) {
4901 RCP<const import_type> importer = this->
getGraph ()->getImporter ();
4902 RCP<const export_type> exporter = this->
getGraph ()->getExporter ();
4908 const bool Y_is_overwritten = (beta ==
ZERO);
4909 if (Y_is_replicated && this->
getComm ()->getRank () > 0) {
4915 X = rcp (
new MV (X_in, Teuchos::Copy));
4917 X = rcpFromRef (X_in);
4921 if (importer != Teuchos::null) {
4929 if (exporter != Teuchos::null) {
4940 if (! exporter.is_null ()) {
4941 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
4949 if (importer != Teuchos::null) {
4950 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
4961 if (Y_is_overwritten) {
4978 MV Y (Y_in, Teuchos::Copy);
4982 this->
localApply (*X, Y_in, mode, alpha, beta);
4989 if (Y_is_replicated) {
4990 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4995 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5000 const Teuchos::ETransp mode,
5001 const Scalar& alpha,
5002 const Scalar& beta)
const
5005 using Teuchos::NO_TRANS;
5006 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localApply");
5014 const char tfecfFuncName[] =
"localApply: ";
5015 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5019 const bool transpose = (mode != Teuchos::NO_TRANS);
5020 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5022 getColMap ()->getLocalNumElements (), std::runtime_error,
5023 "NO_TRANS case: X has the wrong number of local rows. "
5025 "getColMap()->getLocalNumElements() = " <<
5026 getColMap ()->getLocalNumElements () <<
".");
5027 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5029 getRowMap ()->getLocalNumElements (), std::runtime_error,
5030 "NO_TRANS case: Y has the wrong number of local rows. "
5032 "getRowMap()->getLocalNumElements() = " <<
5033 getRowMap ()->getLocalNumElements () <<
".");
5034 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5036 getRowMap ()->getLocalNumElements (), std::runtime_error,
5037 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5039 <<
" != getRowMap()->getLocalNumElements() = "
5040 <<
getRowMap ()->getLocalNumElements () <<
".");
5041 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5043 getColMap ()->getLocalNumElements (), std::runtime_error,
5044 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5046 <<
" != getColMap()->getLocalNumElements() = "
5047 <<
getColMap ()->getLocalNumElements () <<
".");
5048 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5050 "fill complete. You must call fillComplete() (possibly with "
5051 "domain and range Map arguments) without an intervening "
5052 "resumeFill() call before you may call this method.");
5053 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5055 std::runtime_error,
"X and Y must be constant stride.");
5060 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5061 (X_lcl.data () == Y_lcl.data () && X_lcl.data () !=
nullptr
5062 && X_lcl.extent(0) != 0,
5063 std::runtime_error,
"X and Y may not alias one another.");
5067 LocalOrdinal maxRowImbalance = 0;
5072 matrix_lcl->applyImbalancedRows (X_lcl, Y_lcl, mode, alpha, beta);
5074 matrix_lcl->apply (X_lcl, Y_lcl, mode, alpha, beta);
5077 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5082 Teuchos::ETransp mode,
5087 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5089 TEUCHOS_TEST_FOR_EXCEPTION
5091 fnName <<
": Cannot call apply() until fillComplete() "
5092 "has been called.");
5094 if (mode == Teuchos::NO_TRANS) {
5095 ProfilingRegion regionNonTranspose (fnName);
5099 ProfilingRegion regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5106 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5115 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5117 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5122 typedef CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> output_matrix_type;
5123 const char tfecfFuncName[] =
"convert: ";
5125 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5126 (! this->
isFillComplete (), std::runtime_error,
"This matrix (the source "
5127 "of the conversion) is not fill complete. You must first call "
5128 "fillComplete() (possibly with the domain and range Map) without an "
5129 "intervening call to resumeFill(), before you may call this method.");
5131 RCP<output_matrix_type> newMatrix
5136 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5137 this->getLocalMatrixDevice ().values);
5147 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5154 const char tfecfFuncName[] =
"checkInternalState: ";
5155 const char err[] =
"Internal state is not consistent. "
5156 "Please report this bug to the Tpetra developers.";
5160 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5161 (staticGraph_.is_null (), std::logic_error, err);
5165 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5166 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5167 std::logic_error, err);
5169 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5171 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
5172 "but its graph is NOT fill complete.");
5175 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5176 (staticGraph_->indicesAreAllocated () &&
5177 staticGraph_->getLocalAllocationSize() > 0 &&
5178 staticGraph_->getLocalNumRows() > 0 &&
5179 valuesUnpacked_wdv.extent (0) == 0,
5180 std::logic_error, err);
5184 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5189 std::ostringstream os;
5191 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5192 if (this->getObjectLabel () !=
"") {
5193 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5196 os <<
"isFillComplete: true"
5203 os <<
"isFillComplete: false"
5210 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5213 describe (Teuchos::FancyOStream &out,
5214 const Teuchos::EVerbosityLevel verbLevel)
const
5218 using Teuchos::ArrayView;
5219 using Teuchos::Comm;
5221 using Teuchos::TypeNameTraits;
5222 using Teuchos::VERB_DEFAULT;
5223 using Teuchos::VERB_NONE;
5224 using Teuchos::VERB_LOW;
5225 using Teuchos::VERB_MEDIUM;
5226 using Teuchos::VERB_HIGH;
5227 using Teuchos::VERB_EXTREME;
5229 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5231 if (vl == VERB_NONE) {
5236 Teuchos::OSTab tab0 (out);
5238 RCP<const Comm<int> > comm = this->
getComm();
5239 const int myRank = comm->getRank();
5240 const int numProcs = comm->getSize();
5245 width = std::max<size_t> (width,
static_cast<size_t> (11)) + 2;
5255 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5257 Teuchos::OSTab tab1 (out);
5260 if (this->getObjectLabel () !=
"") {
5261 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5264 out <<
"Template parameters:" << endl;
5265 Teuchos::OSTab tab2 (out);
5266 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
5267 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5268 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5269 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
5272 out <<
"isFillComplete: true" << endl
5276 << endl <<
"Global max number of entries in a row: "
5280 out <<
"isFillComplete: false" << endl
5286 if (vl < VERB_MEDIUM) {
5292 out << endl <<
"Row Map:" << endl;
5296 out <<
"null" << endl;
5308 out <<
"Column Map: ";
5312 out <<
"null" << endl;
5316 out <<
"same as row Map" << endl;
5327 out <<
"Domain Map: ";
5331 out <<
"null" << endl;
5335 out <<
"same as row Map" << endl;
5339 out <<
"same as column Map" << endl;
5350 out <<
"Range Map: ";
5354 out <<
"null" << endl;
5358 out <<
"same as domain Map" << endl;
5362 out <<
"same as row Map" << endl;
5372 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5373 if (myRank == curRank) {
5374 out <<
"Process rank: " << curRank << endl;
5375 Teuchos::OSTab tab2 (out);
5376 if (! staticGraph_->indicesAreAllocated ()) {
5377 out <<
"Graph indices not allocated" << endl;
5380 out <<
"Number of allocated entries: "
5381 << staticGraph_->getLocalAllocationSize () << endl;
5393 if (vl < VERB_HIGH) {
5398 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5399 if (myRank == curRank) {
5400 out << std::setw(width) <<
"Proc Rank"
5401 << std::setw(width) <<
"Global Row"
5402 << std::setw(width) <<
"Num Entries";
5403 if (vl == VERB_EXTREME) {
5404 out << std::setw(width) <<
"(Index,Value)";
5409 GlobalOrdinal gid =
getRowMap()->getGlobalElement(r);
5410 out << std::setw(width) << myRank
5411 << std::setw(width) << gid
5412 << std::setw(width) << nE;
5413 if (vl == VERB_EXTREME) {
5415 global_inds_host_view_type rowinds;
5416 values_host_view_type rowvals;
5418 for (
size_t j = 0; j < nE; ++j) {
5419 out <<
" (" << rowinds[j]
5420 <<
", " << rowvals[j]
5425 local_inds_host_view_type rowinds;
5426 values_host_view_type rowvals;
5428 for (
size_t j=0; j < nE; ++j) {
5429 out <<
" (" <<
getColMap()->getGlobalElement(rowinds[j])
5430 <<
", " << rowvals[j]
5446 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5460 return (srcRowMat !=
nullptr);
5463 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5467 const typename crs_graph_type::padding_type& padding,
5474 using row_ptrs_type =
5475 typename local_graph_device_type::row_map_type::non_const_type;
5476 using range_policy =
5477 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5478 const char tfecfFuncName[] =
"applyCrsPadding";
5479 const char suffix[] =
5480 ". Please report this bug to the Tpetra developers.";
5481 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5483 std::unique_ptr<std::string> prefix;
5485 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5486 std::ostringstream os;
5487 os << *prefix <<
"padding: ";
5490 std::cerr << os.str();
5492 const int myRank = ! verbose ? -1 : [&] () {
5493 auto map = this->getMap();
5494 if (map.is_null()) {
5497 auto comm = map->getComm();
5498 if (comm.is_null()) {
5501 return comm->getRank();
5505 if (! myGraph_->indicesAreAllocated()) {
5507 std::ostringstream os;
5508 os << *prefix <<
"Call allocateIndices" << endl;
5509 std::cerr << os.str();
5511 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5523 std::ostringstream os;
5524 os << *prefix <<
"Allocate row_ptrs_beg: "
5525 << myGraph_->rowPtrsUnpacked_host_.extent(0) << endl;
5526 std::cerr << os.str();
5528 using Kokkos::view_alloc;
5529 using Kokkos::WithoutInitializing;
5530 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5531 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5533 Kokkos::deep_copy(
execution_space(),row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5535 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
5536 size_t(row_ptr_beg.extent(0) - 1);
5538 std::ostringstream os;
5539 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5540 std::cerr << os.str();
5542 row_ptrs_type row_ptr_end(
5543 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5545 row_ptrs_type num_row_entries_d;
5547 const bool refill_num_row_entries =
5548 myGraph_->k_numRowEntries_.extent(0) != 0;
5550 if (refill_num_row_entries) {
5553 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5554 myGraph_->k_numRowEntries_);
5555 Kokkos::parallel_for
5556 (
"Fill end row pointers", range_policy(0, N),
5557 KOKKOS_LAMBDA (
const size_t i) {
5558 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5565 Kokkos::parallel_for
5566 (
"Fill end row pointers", range_policy(0, N),
5567 KOKKOS_LAMBDA (
const size_t i) {
5568 row_ptr_end(i) = row_ptr_beg(i+1);
5572 if (myGraph_->isGloballyIndexed()) {
5574 myGraph_->gblInds_wdv,
5575 valuesUnpacked_wdv, padding, myRank, verbose);
5576 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5577 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5578 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5579 (newValuesLen != newColIndsLen, std::logic_error,
5580 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5581 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5586 myGraph_->lclIndsUnpacked_wdv,
5587 valuesUnpacked_wdv, padding, myRank, verbose);
5588 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5589 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5590 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5591 (newValuesLen != newColIndsLen, std::logic_error,
5592 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5593 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5597 if (refill_num_row_entries) {
5598 Kokkos::parallel_for
5599 (
"Fill num entries", range_policy(0, N),
5600 KOKKOS_LAMBDA (
const size_t i) {
5601 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5603 Kokkos::deep_copy(myGraph_->k_numRowEntries_, num_row_entries_d);
5607 std::ostringstream os;
5608 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5609 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5610 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5611 std::cerr << os.str();
5612 TEUCHOS_ASSERT( myGraph_->rowPtrsUnpacked_host_.extent(0) ==
5613 row_ptr_beg.extent(0) );
5615 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5618 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5623 const size_t numSameIDs,
5624 const LocalOrdinal permuteToLIDs[],
5625 const LocalOrdinal permuteFromLIDs[],
5626 const size_t numPermutes)
5629 using Teuchos::Array;
5630 using Teuchos::ArrayView;
5632 using LO = LocalOrdinal;
5633 using GO = GlobalOrdinal;
5634 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5635 const char suffix[] =
5636 " Please report this bug to the Tpetra developers.";
5637 ProfilingRegion regionCAP
5638 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5642 std::unique_ptr<std::string> prefix;
5644 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5645 std::ostringstream os;
5646 os << *prefix <<
"Start" << endl;
5648 const char*
const prefix_raw =
5649 verbose ? prefix.get()->c_str() :
nullptr;
5651 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5656 const map_type& srcRowMap = * (srcMat.getRowMap ());
5657 nonconst_global_inds_host_view_type rowInds;
5658 nonconst_values_host_view_type rowVals;
5659 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5660 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5664 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5665 const GO targetGID = sourceGID;
5667 ArrayView<const GO>rowIndsConstView;
5668 ArrayView<const Scalar> rowValsConstView;
5670 if (sourceIsLocallyIndexed) {
5671 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5672 if (rowLength >
static_cast<size_t> (rowInds.size())) {
5673 Kokkos::resize(rowInds,rowLength);
5674 Kokkos::resize(rowVals,rowLength);
5678 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5679 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5684 size_t checkRowLength = 0;
5685 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
5686 rowValsView, checkRowLength);
5688 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5689 (rowLength != checkRowLength, std::logic_error,
"For "
5690 "global row index " << sourceGID <<
", the source "
5691 "matrix's getNumEntriesInGlobalRow returns a row length "
5692 "of " << rowLength <<
", but getGlobalRowCopy reports "
5693 "a row length of " << checkRowLength <<
"." << suffix);
5700 rowIndsConstView = Teuchos::ArrayView<const GO> (
5701 rowIndsView.data(), rowIndsView.extent(0),
5702 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5703 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5704 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5705 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5710 global_inds_host_view_type rowIndsView;
5711 values_host_view_type rowValsView;
5712 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5717 rowIndsConstView = Teuchos::ArrayView<const GO> (
5718 rowIndsView.data(), rowIndsView.extent(0),
5719 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5720 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5721 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5722 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5730 combineGlobalValues(targetGID, rowIndsConstView,
5732 prefix_raw, debug, verbose);
5736 std::ostringstream os;
5737 os << *prefix <<
"Do permutes" << endl;
5740 const map_type& tgtRowMap = * (this->getRowMap ());
5741 for (
size_t p = 0; p < numPermutes; ++p) {
5742 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5743 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5745 ArrayView<const GO> rowIndsConstView;
5746 ArrayView<const Scalar> rowValsConstView;
5748 if (sourceIsLocallyIndexed) {
5749 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5750 if (rowLength >
static_cast<size_t> (rowInds.size ())) {
5751 Kokkos::resize(rowInds,rowLength);
5752 Kokkos::resize(rowVals,rowLength);
5756 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5757 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5762 size_t checkRowLength = 0;
5763 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5764 rowValsView, checkRowLength);
5766 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5767 (rowLength != checkRowLength, std::logic_error,
"For "
5768 "source matrix global row index " << sourceGID <<
", "
5769 "getNumEntriesInGlobalRow returns a row length of " <<
5770 rowLength <<
", but getGlobalRowCopy a row length of "
5771 << checkRowLength <<
"." << suffix);
5778 rowIndsConstView = Teuchos::ArrayView<const GO> (
5779 rowIndsView.data(), rowIndsView.extent(0),
5780 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5781 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5782 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5783 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5788 global_inds_host_view_type rowIndsView;
5789 values_host_view_type rowValsView;
5790 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5795 rowIndsConstView = Teuchos::ArrayView<const GO> (
5796 rowIndsView.data(), rowIndsView.extent(0),
5797 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5798 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5799 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5800 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5805 combineGlobalValues(targetGID, rowIndsConstView,
5807 prefix_raw, debug, verbose);
5811 std::ostringstream os;
5812 os << *prefix <<
"Done" << endl;
5816 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5821 const size_t numSameIDs,
5822 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5823 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5824 const size_t numPermutes)
5827 using Teuchos::Array;
5828 using Teuchos::ArrayView;
5830 using LO = LocalOrdinal;
5831 using GO = GlobalOrdinal;
5832 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5833 const char suffix[] =
5834 " Please report this bug to the Tpetra developers.";
5835 ProfilingRegion regionCAP
5836 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5840 std::unique_ptr<std::string> prefix;
5842 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5843 std::ostringstream os;
5844 os << *prefix <<
"Start" << endl;
5846 const char*
const prefix_raw =
5847 verbose ? prefix.get()->c_str() :
nullptr;
5851 const row_graph_type& srcGraph = *(srcMat.getGraph());
5853 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5854 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5855 applyCrsPadding(*padding, verbose);
5857 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5862 const map_type& srcRowMap = * (srcMat.getRowMap ());
5863 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5864 using gids_type = nonconst_global_inds_host_view_type;
5865 using vals_type = nonconst_values_host_view_type;
5868 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5872 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5873 const GO targetGID = sourceGID;
5875 ArrayView<const GO> rowIndsConstView;
5876 ArrayView<const Scalar> rowValsConstView;
5878 if (sourceIsLocallyIndexed) {
5880 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5881 if (rowLength >
static_cast<size_t> (rowInds.extent(0))) {
5882 Kokkos::resize(rowInds,rowLength);
5883 Kokkos::resize(rowVals,rowLength);
5887 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5888 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5893 size_t checkRowLength = 0;
5894 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
5897 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5898 (rowLength != checkRowLength, std::logic_error,
": For "
5899 "global row index " << sourceGID <<
", the source "
5900 "matrix's getNumEntriesInGlobalRow returns a row length "
5901 "of " << rowLength <<
", but getGlobalRowCopy reports "
5902 "a row length of " << checkRowLength <<
"." << suffix);
5904 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5905 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5908 global_inds_host_view_type rowIndsView;
5909 values_host_view_type rowValsView;
5910 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5916 rowIndsConstView = Teuchos::ArrayView<const GO> (
5917 rowIndsView.data(), rowIndsView.extent(0),
5918 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5919 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5920 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5921 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5927 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5928 rowValsConstView, prefix_raw, debug, verbose);
5932 std::ostringstream os;
5933 os << *prefix <<
"Do permutes" << endl;
5935 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5936 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5938 const map_type& tgtRowMap = * (this->getRowMap ());
5939 for (
size_t p = 0; p < numPermutes; ++p) {
5940 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5941 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5943 ArrayView<const GO> rowIndsConstView;
5944 ArrayView<const Scalar> rowValsConstView;
5946 if (sourceIsLocallyIndexed) {
5947 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5948 if (rowLength >
static_cast<size_t> (rowInds.extent(0))) {
5949 Kokkos::resize(rowInds,rowLength);
5950 Kokkos::resize(rowVals,rowLength);
5954 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5955 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5960 size_t checkRowLength = 0;
5961 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5962 rowValsView, checkRowLength);
5964 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5965 (rowLength != checkRowLength, std::logic_error,
"For "
5966 "source matrix global row index " << sourceGID <<
", "
5967 "getNumEntriesInGlobalRow returns a row length of " <<
5968 rowLength <<
", but getGlobalRowCopy a row length of "
5969 << checkRowLength <<
"." << suffix);
5971 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5972 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5975 global_inds_host_view_type rowIndsView;
5976 values_host_view_type rowValsView;
5977 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5983 rowIndsConstView = Teuchos::ArrayView<const GO> (
5984 rowIndsView.data(), rowIndsView.extent(0),
5985 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5986 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5987 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5988 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5994 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5995 rowValsConstView, prefix_raw, debug, verbose);
5999 std::ostringstream os;
6000 os << *prefix <<
"Done" << endl;
6004 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6009 const size_t numSameIDs,
6010 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6011 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6020 const char tfecfFuncName[] =
"copyAndPermute: ";
6021 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6023 const bool verbose = Behavior::verbose(
"CrsMatrix");
6024 std::unique_ptr<std::string> prefix;
6026 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
6027 std::ostringstream os;
6028 os << *prefix << endl
6029 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6030 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6033 << dualViewStatusToString (permuteToLIDs,
"permuteToLIDs")
6036 << dualViewStatusToString (permuteFromLIDs,
"permuteFromLIDs")
6039 <<
"isStaticGraph: " << (
isStaticGraph() ?
"true" :
"false")
6041 std::cerr << os.str ();
6044 const auto numPermute = permuteToLIDs.extent (0);
6045 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6046 (numPermute != permuteFromLIDs.extent (0),
6047 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6048 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6049 << permuteFromLIDs.extent (0) <<
".");
6054 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6056 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6057 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6058 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6059 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6061 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6062 permuteToLIDs_h.data(),
6063 permuteFromLIDs_h.data(),
6067 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6068 permuteFromLIDs, numPermute);
6072 std::ostringstream os;
6073 os << *prefix <<
"Done" << endl;
6074 std::cerr << os.str();
6078 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6083 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6084 Kokkos::DualView<char*, buffer_device_type>& exports,
6085 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6086 size_t& constantNumPackets)
6091 using Teuchos::outArg;
6092 using Teuchos::REDUCE_MAX;
6093 using Teuchos::reduceAll;
6095 typedef LocalOrdinal LO;
6096 typedef GlobalOrdinal GO;
6097 const char tfecfFuncName[] =
"packAndPrepare: ";
6098 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6100 const bool debug = Behavior::debug(
"CrsMatrix");
6101 const bool verbose = Behavior::verbose(
"CrsMatrix");
6104 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6105 if (pComm.is_null ()) {
6108 const Teuchos::Comm<int>& comm = *pComm;
6109 const int myRank = comm.getSize ();
6111 std::unique_ptr<std::string> prefix;
6113 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
6114 std::ostringstream os;
6115 os << *prefix <<
"Start" << endl
6117 << dualViewStatusToString (exportLIDs,
"exportLIDs")
6120 << dualViewStatusToString (exports,
"exports")
6123 << dualViewStatusToString (numPacketsPerLID,
"numPacketsPerLID")
6125 std::cerr << os.str ();
6148 std::ostringstream msg;
6152 const crs_matrix_type* srcCrsMat =
6153 dynamic_cast<const crs_matrix_type*
> (&source);
6154 if (srcCrsMat !=
nullptr) {
6156 std::ostringstream os;
6157 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6158 "calling packNew" << endl;
6159 std::cerr << os.str ();
6162 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6163 constantNumPackets);
6165 catch (std::exception& e) {
6167 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6171 using Kokkos::HostSpace;
6172 using Kokkos::subview;
6173 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6174 using range_type = Kokkos::pair<size_t, size_t>;
6177 std::ostringstream os;
6178 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6180 std::cerr << os.str ();
6183 const row_matrix_type* srcRowMat =
6184 dynamic_cast<const row_matrix_type*
> (&source);
6185 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6186 (srcRowMat ==
nullptr, std::invalid_argument,
6187 "The source object of the Import or Export operation is neither a "
6188 "CrsMatrix (with the same template parameters as the target object), "
6189 "nor a RowMatrix (with the same first four template parameters as the "
6200 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6201 auto exportLIDs_h = exportLIDs.view_host ();
6202 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6203 exportLIDs_h.size ());
6207 Teuchos::Array<char> exports_a;
6213 numPacketsPerLID.clear_sync_state ();
6214 numPacketsPerLID.modify_host ();
6215 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6216 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6217 numPacketsPerLID_h.size ());
6222 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6223 constantNumPackets);
6225 catch (std::exception& e) {
6227 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6231 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6232 if (
static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6233 const std::string oldLabel = exports.d_view.label ();
6234 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6235 exports = exports_type (newLabel, newAllocSize);
6240 exports.modify_host();
6242 auto exports_h = exports.view_host ();
6243 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6247 typedef typename exports_type::t_host::execution_space HES;
6248 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6249 Kokkos::View<const char*, host_device_type>
6250 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6252 Kokkos::deep_copy (exports_h_sub, exports_a_kv);
6257 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6260 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6261 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6262 "one or more participating processes.");
6266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6267 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6268 "or more participating processes. Here is this process' error "
6269 "message: " << msg.str ());
6273 std::ostringstream os;
6274 os << *prefix <<
"packAndPrepare: Done!" << endl
6284 std::cerr << os.str ();
6288 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6292 const size_t offset,
6293 const size_t numEnt,
6294 const GlobalOrdinal gidsIn[],
6296 const size_t numBytesPerValue)
const
6299 using Kokkos::subview;
6300 using Tpetra::Details::PackTraits;
6301 typedef LocalOrdinal LO;
6302 typedef GlobalOrdinal GO;
6311 const LO numEntLO =
static_cast<size_t> (numEnt);
6313 const size_t numEntBeg = offset;
6314 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
6315 const size_t gidsBeg = numEntBeg + numEntLen;
6316 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6317 const size_t valsBeg = gidsBeg + gidsLen;
6318 const size_t valsLen = numEnt * numBytesPerValue;
6320 char*
const numEntOut = exports + numEntBeg;
6321 char*
const gidsOut = exports + gidsBeg;
6322 char*
const valsOut = exports + valsBeg;
6324 size_t numBytesOut = 0;
6326 numBytesOut += PackTraits<LO>::packValue (numEntOut, numEntLO);
6329 Kokkos::pair<int, size_t> p;
6330 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6331 errorCode += p.first;
6332 numBytesOut += p.second;
6334 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6335 errorCode += p.first;
6336 numBytesOut += p.second;
6339 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6340 TEUCHOS_TEST_FOR_EXCEPTION
6341 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6342 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6343 << expectedNumBytes <<
".");
6344 TEUCHOS_TEST_FOR_EXCEPTION
6345 (errorCode != 0, std::runtime_error,
"packRow: "
6346 "PackTraits::packArray returned a nonzero error code");
6351 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6356 const char imports[],
6357 const size_t offset,
6358 const size_t numBytes,
6359 const size_t numEnt,
6360 const size_t numBytesPerValue)
6363 using Kokkos::subview;
6364 using Tpetra::Details::PackTraits;
6365 typedef LocalOrdinal LO;
6366 typedef GlobalOrdinal GO;
6370 "Tpetra::CrsMatrix::unpackRow",
6374 if (numBytes == 0) {
6377 const int myRank = this->getMap ()->getComm ()->getRank ();
6378 TEUCHOS_TEST_FOR_EXCEPTION
6379 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6380 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6381 "number of entries to unpack (as reported by numPacketsPerLID) "
6382 "for this row numEnt=" << numEnt <<
" != 0.");
6387 if (numEnt == 0 && numBytes != 0) {
6388 const int myRank = this->getMap ()->getComm ()->getRank ();
6389 TEUCHOS_TEST_FOR_EXCEPTION
6390 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6391 "unpackRow: The number of entries to unpack (as reported by "
6392 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6393 "numBytes=" << numBytes <<
" != 0.");
6399 const size_t numEntBeg = offset;
6400 const size_t numEntLen = PackTraits<LO>::packValueCount (lid);
6401 const size_t gidsBeg = numEntBeg + numEntLen;
6402 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6403 const size_t valsBeg = gidsBeg + gidsLen;
6404 const size_t valsLen = numEnt * numBytesPerValue;
6406 const char*
const numEntIn = imports + numEntBeg;
6407 const char*
const gidsIn = imports + gidsBeg;
6408 const char*
const valsIn = imports + valsBeg;
6410 size_t numBytesOut = 0;
6413 numBytesOut += PackTraits<LO>::unpackValue (numEntOut, numEntIn);
6414 if (
static_cast<size_t> (numEntOut) != numEnt ||
6415 numEntOut ==
static_cast<LO
> (0)) {
6416 const int myRank = this->getMap ()->getComm ()->getRank ();
6417 std::ostringstream os;
6418 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6419 bool firstErrorCondition =
false;
6420 if (
static_cast<size_t> (numEntOut) != numEnt) {
6421 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6422 <<
" does not equal number of entries unpacked from imports "
6423 "buffer numEntOut=" << numEntOut <<
".";
6424 firstErrorCondition =
true;
6426 if (numEntOut ==
static_cast<LO
> (0)) {
6427 if (firstErrorCondition) {
6430 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6431 "but number of bytes to unpack for this row numBytes=" << numBytes
6432 <<
" != 0. This should never happen, since packRow should only "
6433 "ever pack rows with a nonzero number of entries. In this case, "
6434 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6437 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6441 Kokkos::pair<int, size_t> p;
6442 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6443 errorCode += p.first;
6444 numBytesOut += p.second;
6446 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6447 errorCode += p.first;
6448 numBytesOut += p.second;
6451 TEUCHOS_TEST_FOR_EXCEPTION
6452 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6453 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6455 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6456 TEUCHOS_TEST_FOR_EXCEPTION
6457 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6458 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6459 << expectedNumBytes <<
".");
6461 TEUCHOS_TEST_FOR_EXCEPTION
6462 (errorCode != 0, std::runtime_error,
"unpackRow: "
6463 "PackTraits::unpackArray returned a nonzero error code");
6468 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6472 size_t& totalNumEntries,
6473 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6479 typedef LocalOrdinal LO;
6480 typedef GlobalOrdinal GO;
6486 const bool verbose = Behavior::verbose(
"CrsMatrix");
6487 std::unique_ptr<std::string> prefix;
6489 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6490 std::ostringstream os;
6491 os << *prefix <<
"Before:"
6499 std::cerr << os.str ();
6504 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6506 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6507 auto exportLIDs_h = exportLIDs.view_host ();
6510 totalNumEntries = 0;
6511 for (LO i = 0; i < numExportLIDs; ++i) {
6512 const LO lclRow = exportLIDs_h[i];
6513 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6516 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
6519 totalNumEntries += curNumEntries;
6530 const size_t allocSize =
6531 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
6532 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
6533 if (
static_cast<size_t> (exports.extent (0)) < allocSize) {
6534 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6536 const std::string oldLabel = exports.d_view.label ();
6537 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6538 exports = exports_type (newLabel, allocSize);
6542 std::ostringstream os;
6543 os << *prefix <<
"After:"
6551 std::cerr << os.str ();
6555 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6558 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6559 Kokkos::DualView<char*, buffer_device_type>& exports,
6560 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6561 size_t& constantNumPackets)
const
6567 packCrsMatrixNew (*
this, exports, numPacketsPerLID, exportLIDs,
6568 constantNumPackets);
6571 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
6572 constantNumPackets);
6576 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6579 packNonStaticNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6580 Kokkos::DualView<char*, buffer_device_type>& exports,
6581 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6582 size_t& constantNumPackets)
const
6590 using LO = LocalOrdinal;
6591 using GO = GlobalOrdinal;
6593 const char tfecfFuncName[] =
"packNonStaticNew: ";
6595 const bool verbose = Behavior::verbose(
"CrsMatrix");
6596 std::unique_ptr<std::string> prefix;
6598 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6599 std::ostringstream os;
6600 os << *prefix <<
"Start" << endl;
6601 std::cerr << os.str ();
6604 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
6605 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6606 (numExportLIDs !=
static_cast<size_t> (numPacketsPerLID.extent (0)),
6607 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
6608 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
6614 constantNumPackets = 0;
6619 size_t totalNumEntries = 0;
6620 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
6621 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
6624 exports.clear_sync_state();
6625 exports.modify_host();
6626 auto exports_h = exports.view_host ();
6628 std::ostringstream os;
6629 os << *prefix <<
"After marking exports as modified on host, "
6630 << dualViewStatusToString (exports,
"exports") << endl;
6631 std::cerr << os.str ();
6635 auto exportLIDs_h = exportLIDs.view_host ();
6638 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6639 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6640 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6645 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6649 typename global_inds_host_view_type::non_const_type gidsIn_k;
6650 if (this->isLocallyIndexed()) {
6652 typename global_inds_host_view_type::non_const_type(
"packGids",
6657 for (
size_t i = 0; i < numExportLIDs; ++i) {
6658 const LO lclRow = exportLIDs_h[i];
6660 size_t numBytes = 0;
6661 size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
6668 numPacketsPerLID_h[i] = 0;
6672 if (this->isLocallyIndexed ()) {
6673 typename global_inds_host_view_type::non_const_type gidsIn;
6674 values_host_view_type valsIn;
6678 local_inds_host_view_type lidsIn;
6679 this->getLocalRowView (lclRow, lidsIn, valsIn);
6680 const map_type& colMap = * (this->getColMap ());
6681 for (
size_t k = 0; k < numEnt; ++k) {
6682 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
6684 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
6686 const size_t numBytesPerValue =
6687 PackTraits<ST>::packValueCount (valsIn[0]);
6688 numBytes = this->
packRow (exports_h.data (), offset, numEnt,
6689 gidsIn.data (), valsIn.data (),
6692 else if (this->isGloballyIndexed ()) {
6693 global_inds_host_view_type gidsIn;
6694 values_host_view_type valsIn;
6700 const map_type& rowMap = * (this->getRowMap ());
6701 const GO gblRow = rowMap.getGlobalElement (lclRow);
6702 this->getGlobalRowView (gblRow, gidsIn, valsIn);
6704 const size_t numBytesPerValue =
6705 PackTraits<ST>::packValueCount (valsIn[0]);
6706 numBytes = this->
packRow (exports_h.data (), offset, numEnt,
6707 gidsIn.data (), valsIn.data (),
6714 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6715 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6716 "First invalid offset into 'exports' pack buffer at index i = " << i
6717 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
6718 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
6723 numPacketsPerLID_h[i] = numBytes;
6728 std::ostringstream os;
6729 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6736 std::cerr << os.str ();
6740 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6744 const LocalOrdinal numEnt,
6746 const GlobalOrdinal cols[],
6748 const char*
const prefix,
6752 using GO = GlobalOrdinal;
6756 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6757 Teuchos::ArrayView<const GO> cols_av
6758 (numEnt == 0 ?
nullptr : cols, numEnt);
6759 Teuchos::ArrayView<const Scalar> vals_av
6760 (numEnt == 0 ?
nullptr :
reinterpret_cast<const Scalar*
> (vals), numEnt);
6765 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6766 prefix, debug, verbose);
6770 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6774 const GlobalOrdinal globalRowIndex,
6775 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6776 const Teuchos::ArrayView<const Scalar>& values,
6778 const char*
const prefix,
6782 const char tfecfFuncName[] =
"combineGlobalValues: ";
6784 if (isStaticGraph ()) {
6788 if (combineMode ==
ADD) {
6789 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6791 else if (combineMode ==
REPLACE) {
6792 replaceGlobalValues (globalRowIndex, columnIndices, values);
6794 else if (combineMode ==
ABSMAX) {
6795 using ::Tpetra::Details::AbsMax;
6797 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6801 else if (combineMode ==
INSERT) {
6802 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6803 (isStaticGraph() && combineMode ==
INSERT,
6804 std::invalid_argument,
"INSERT combine mode is forbidden "
6805 "if the matrix has a static (const) graph (i.e., was "
6806 "constructed with the CrsMatrix constructor that takes a "
6807 "const CrsGraph pointer).");
6810 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6811 (
true, std::logic_error,
"Invalid combine mode; should "
6813 "Please report this bug to the Tpetra developers.");
6817 if (combineMode ==
ADD || combineMode ==
INSERT) {
6824 insertGlobalValuesFilteredChecked(globalRowIndex,
6825 columnIndices, values, prefix, debug, verbose);
6836 else if (combineMode ==
ABSMAX) {
6837 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6838 ! isStaticGraph () && combineMode ==
ABSMAX, std::logic_error,
6839 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6842 else if (combineMode ==
REPLACE) {
6843 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6844 ! isStaticGraph () && combineMode ==
REPLACE, std::logic_error,
6845 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6849 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6850 true, std::logic_error,
"Should never get here! Please report this "
6851 "bug to the Tpetra developers.");
6856 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6860 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6861 Kokkos::DualView<char*, buffer_device_type> imports,
6862 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6863 const size_t constantNumPackets,
6870 const char tfecfFuncName[] =
"unpackAndCombine: ";
6871 ProfilingRegion regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
6873 const bool debug = Behavior::debug(
"CrsMatrix");
6874 const bool verbose = Behavior::verbose(
"CrsMatrix");
6875 constexpr int numValidModes = 5;
6878 const char* validModeNames[numValidModes] =
6879 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6881 std::unique_ptr<std::string> prefix;
6883 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6884 std::ostringstream os;
6885 os << *prefix <<
"Start:" << endl
6887 << dualViewStatusToString (importLIDs,
"importLIDs")
6890 << dualViewStatusToString (imports,
"imports")
6893 << dualViewStatusToString (numPacketsPerLID,
"numPacketsPerLID")
6895 << *prefix <<
" constantNumPackets: " << constantNumPackets
6899 std::cerr << os.str ();
6903 if (std::find (validModes, validModes+numValidModes, combineMode) ==
6904 validModes+numValidModes) {
6905 std::ostringstream os;
6906 os <<
"Invalid combine mode. Valid modes are {";
6907 for (
int k = 0; k < numValidModes; ++k) {
6908 os << validModeNames[k];
6909 if (k < numValidModes - 1) {
6914 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6915 (
true, std::invalid_argument, os.str ());
6917 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6918 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
6919 std::invalid_argument,
"importLIDs.extent(0)="
6920 << importLIDs.extent(0)
6921 <<
" != numPacketsPerLID.extent(0)="
6922 << numPacketsPerLID.extent(0) <<
".");
6925 if (combineMode ==
ZERO) {
6930 using Teuchos::reduceAll;
6931 std::unique_ptr<std::ostringstream> msg (
new std::ostringstream ());
6934 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6935 constantNumPackets, combineMode,
6937 }
catch (std::exception& e) {
6942 const Teuchos::Comm<int>& comm = * (this->
getComm ());
6943 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6944 lclBad, Teuchos::outArg (gblBad));
6950 std::ostringstream os;
6951 os <<
"Proc " << comm.getRank () <<
": " << msg->str () << endl;
6952 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
6954 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6955 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
6956 "threw an exception on one or more participating processes: "
6957 << endl << msg->str ());
6961 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6962 constantNumPackets, combineMode,
6967 std::ostringstream os;
6968 os << *prefix <<
"Done!" << endl
6970 << dualViewStatusToString (importLIDs,
"importLIDs")
6973 << dualViewStatusToString (imports,
"imports")
6976 << dualViewStatusToString (numPacketsPerLID,
"numPacketsPerLID")
6978 std::cerr << os.str ();
6982 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6988 Kokkos::DualView<char*, buffer_device_type> imports,
6989 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6990 const size_t constantNumPackets,
6995 "Tpetra::CrsMatrix::unpackAndCombineImpl",
6999 const char tfecfFuncName[] =
"unpackAndCombineImpl";
7000 std::unique_ptr<std::string> prefix;
7002 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
7003 std::ostringstream os;
7004 os << *prefix <<
"isStaticGraph(): "
7005 << (isStaticGraph() ?
"true" :
"false")
7006 <<
", importLIDs.extent(0): "
7007 << importLIDs.extent(0)
7008 <<
", imports.extent(0): "
7009 << imports.extent(0)
7010 <<
", numPacketsPerLID.extent(0): "
7011 << numPacketsPerLID.extent(0)
7013 std::cerr << os.str();
7016 if (isStaticGraph ()) {
7017 using Details::unpackCrsMatrixAndCombineNew;
7018 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7019 importLIDs, constantNumPackets,
7024 using padding_type =
typename crs_graph_type::padding_type;
7025 std::unique_ptr<padding_type> padding;
7027 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7028 importLIDs, imports, numPacketsPerLID, verbose);
7030 catch (std::exception& e) {
7031 const auto rowMap = getRowMap();
7032 const auto comm = rowMap.is_null() ? Teuchos::null :
7034 const int myRank = comm.is_null() ? -1 : comm->getRank();
7035 TEUCHOS_TEST_FOR_EXCEPTION
7036 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7037 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7038 "threw an exception: " << e.what());
7041 std::ostringstream os;
7042 os << *prefix <<
"Call applyCrsPadding" << endl;
7043 std::cerr << os.str();
7045 applyCrsPadding(*padding, verbose);
7048 std::ostringstream os;
7049 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7050 std::cerr << os.str();
7052 unpackAndCombineImplNonStatic(importLIDs, imports,
7059 std::ostringstream os;
7060 os << *prefix <<
"Done" << endl;
7061 std::cerr << os.str();
7065 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7071 Kokkos::DualView<char*, buffer_device_type> imports,
7072 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7073 const size_t constantNumPackets,
7077 using Kokkos::subview;
7078 using Kokkos::MemoryUnmanaged;
7085 using LO = LocalOrdinal;
7086 using GO = GlobalOrdinal;
7088 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7090 typename View<int*, device_type>::HostMirror::execution_space;
7091 using pair_type = std::pair<typename View<int*, HES>::size_type,
7092 typename View<int*, HES>::size_type>;
7093 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7094 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7095 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7097 const bool debug = Behavior::debug(
"CrsMatrix");
7098 const bool verbose = Behavior::verbose(
"CrsMatrix");
7099 std::unique_ptr<std::string> prefix;
7101 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7102 std::ostringstream os;
7103 os << *prefix << endl;
7104 std::cerr << os.str ();
7106 const char*
const prefix_raw =
7107 verbose ? prefix.get()->c_str() :
nullptr;
7109 const size_type numImportLIDs = importLIDs.extent (0);
7110 if (combineMode ==
ZERO || numImportLIDs == 0) {
7115 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7120 if (imports.need_sync_host()) {
7121 imports.sync_host ();
7123 auto imports_h = imports.view_host();
7126 if (numPacketsPerLID.need_sync_host()) {
7127 numPacketsPerLID.sync_host ();
7129 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7131 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7132 auto importLIDs_h = importLIDs.view_host();
7134 size_t numBytesPerValue;
7145 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7150 size_t maxRowNumEnt = 0;
7151 for (size_type i = 0; i < numImportLIDs; ++i) {
7152 const size_t numBytes = numPacketsPerLID_h[i];
7153 if (numBytes == 0) {
7158 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7159 (offset + numBytes >
size_t(imports_h.extent (0)),
7160 std::logic_error,
": At local row index importLIDs_h[i="
7161 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7162 <<
") + numBytes (=" << numBytes <<
") > "
7163 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7168 const size_t theNumBytes =
7169 PackTraits<LO>::packValueCount (numEntLO);
7170 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7171 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7172 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7174 const char*
const inBuf = imports_h.data () + offset;
7175 const size_t actualNumBytes =
7176 PackTraits<LO>::unpackValue (numEntLO, inBuf);
7179 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7180 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7181 <<
", actualNumBytes=" << actualNumBytes
7182 <<
" > numBytes=" << numBytes <<
".");
7183 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7184 (numEntLO == 0, std::logic_error,
": At local row index "
7185 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7186 "the number of entries read from the packed data is "
7187 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7191 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7199 View<GO*, HES> gblColInds;
7200 View<LO*, HES> lclColInds;
7201 View<ST*, HES> vals;
7214 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7215 gid, maxRowNumEnt,
"gids");
7216 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7217 lid, maxRowNumEnt,
"lids");
7218 vals = ScalarViewTraits<ST, HES>::allocateArray(
7219 val, maxRowNumEnt,
"vals");
7223 for (size_type i = 0; i < numImportLIDs; ++i) {
7224 const size_t numBytes = numPacketsPerLID_h[i];
7225 if (numBytes == 0) {
7229 const char*
const inBuf = imports_h.data () + offset;
7230 (void) PackTraits<LO>::unpackValue (numEntLO, inBuf);
7232 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7233 const LO lclRow = importLIDs_h[i];
7235 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7236 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7238 const size_t numBytesOut =
7239 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7240 offset, numBytes, numEnt, numBytesPerValue);
7241 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7242 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7243 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7244 << numBytesOut <<
".");
7246 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7247 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7248 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7249 combineMode, prefix_raw, debug, verbose);
7255 std::ostringstream os;
7256 os << *prefix <<
"Done" << endl;
7257 std::cerr << os.str();
7261 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7262 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7265 const bool force)
const
7267 using Teuchos::null;
7271 TEUCHOS_TEST_FOR_EXCEPTION(
7272 ! this->
hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7273 "MapMultiVector: You may only call this method if the matrix has a "
7274 "column Map. If the matrix does not yet have a column Map, you should "
7275 "first call fillComplete (with domain and range Map if necessary).");
7279 TEUCHOS_TEST_FOR_EXCEPTION(
7281 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7282 "this matrix's graph is fill complete.");
7285 RCP<const import_type> importer = this->
getGraph ()->getImporter ();
7286 RCP<const map_type> colMap = this->
getColMap ();
7299 if (! importer.is_null () || force) {
7301 X_colMap = rcp (
new MV (colMap, numVecs));
7318 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7319 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7322 const bool force)
const
7324 using Teuchos::null;
7330 TEUCHOS_TEST_FOR_EXCEPTION(
7332 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7333 "matrix's graph is fill complete.");
7336 RCP<const export_type> exporter = this->
getGraph ()->getExporter ();
7340 RCP<const map_type> rowMap = this->
getRowMap ();
7352 if (! exporter.is_null () || force) {
7354 Y_rowMap = rcp (
new MV (rowMap, numVecs));
7364 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7369 TEUCHOS_TEST_FOR_EXCEPTION(
7370 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7371 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7372 "was created with a constant graph (that is, when it was created using "
7373 "the version of its constructor that takes an RCP<const CrsGraph>). "
7374 "This is because the matrix is not allowed to modify the graph in that "
7375 "case, but removing empty processes requires modifying the graph.");
7376 myGraph_->removeEmptyProcessesInPlace (newMap);
7384 staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
7387 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7388 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7390 add (
const Scalar& alpha,
7393 const Teuchos::RCP<const map_type>& domainMap,
7394 const Teuchos::RCP<const map_type>& rangeMap,
7395 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7397 using Teuchos::Array;
7398 using Teuchos::ArrayView;
7399 using Teuchos::ParameterList;
7402 using Teuchos::rcp_implicit_cast;
7403 using Teuchos::sublist;
7407 using crs_matrix_type =
7408 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
7409 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7413 std::unique_ptr<std::string> prefix;
7415 prefix = this->createPrefix(
"CrsMatrix",
"add");
7416 std::ostringstream os;
7417 os << *prefix <<
"Start" << endl;
7418 std::cerr << os.str ();
7421 const crs_matrix_type& B = *
this;
7422 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7423 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7430 RCP<const map_type> A_rangeMap = A.
getRangeMap ();
7431 RCP<const map_type> B_domainMap = B.getDomainMap ();
7432 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7434 RCP<const map_type> theDomainMap = domainMap;
7435 RCP<const map_type> theRangeMap = rangeMap;
7437 if (domainMap.is_null ()) {
7438 if (B_domainMap.is_null ()) {
7439 TEUCHOS_TEST_FOR_EXCEPTION(
7440 A_domainMap.is_null (), std::invalid_argument,
7441 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7442 "then you must supply a nonnull domain Map to this method.");
7443 theDomainMap = A_domainMap;
7445 theDomainMap = B_domainMap;
7448 if (rangeMap.is_null ()) {
7449 if (B_rangeMap.is_null ()) {
7450 TEUCHOS_TEST_FOR_EXCEPTION(
7451 A_rangeMap.is_null (), std::invalid_argument,
7452 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7453 "then you must supply a nonnull range Map to this method.");
7454 theRangeMap = A_rangeMap;
7456 theRangeMap = B_rangeMap;
7464 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7465 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7466 TEUCHOS_TEST_FOR_EXCEPTION
7467 (! B_domainMap->isSameAs(*A_domainMap),
7468 std::invalid_argument,
7469 errPfx <<
"The input RowMatrix A must have a domain Map "
7470 "which is the same as (isSameAs) this RowMatrix's "
7472 TEUCHOS_TEST_FOR_EXCEPTION
7473 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7474 errPfx <<
"The input RowMatrix A must have a range Map "
7475 "which is the same as (isSameAs) this RowMatrix's range "
7477 TEUCHOS_TEST_FOR_EXCEPTION
7478 (! domainMap.is_null() &&
7479 ! domainMap->isSameAs(*B_domainMap),
7480 std::invalid_argument,
7481 errPfx <<
"The input domain Map must be the same as "
7482 "(isSameAs) this RowMatrix's domain Map.");
7483 TEUCHOS_TEST_FOR_EXCEPTION
7484 (! rangeMap.is_null() &&
7485 ! rangeMap->isSameAs(*B_rangeMap),
7486 std::invalid_argument,
7487 errPfx <<
"The input range Map must be the same as "
7488 "(isSameAs) this RowMatrix's range Map.");
7491 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7492 TEUCHOS_TEST_FOR_EXCEPTION
7493 (! domainMap.is_null() &&
7494 ! domainMap->isSameAs(*B_domainMap),
7495 std::invalid_argument,
7496 errPfx <<
"The input domain Map must be the same as "
7497 "(isSameAs) this RowMatrix's domain Map.");
7498 TEUCHOS_TEST_FOR_EXCEPTION
7499 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7500 std::invalid_argument,
7501 errPfx <<
"The input range Map must be the same as "
7502 "(isSameAs) this RowMatrix's range Map.");
7505 TEUCHOS_TEST_FOR_EXCEPTION
7506 (domainMap.is_null() || rangeMap.is_null(),
7507 std::invalid_argument, errPfx <<
"If neither A nor B "
7508 "have a domain and range Map, then you must supply a "
7509 "nonnull domain and range Map to this method.");
7516 bool callFillComplete =
true;
7517 RCP<ParameterList> constructorSublist;
7518 RCP<ParameterList> fillCompleteSublist;
7519 if (! params.is_null()) {
7521 params->get(
"Call fillComplete", callFillComplete);
7522 constructorSublist = sublist(params,
"Constructor parameters");
7523 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7526 RCP<const map_type> A_rowMap = A.
getRowMap ();
7527 RCP<const map_type> B_rowMap = B.getRowMap ();
7528 RCP<const map_type> C_rowMap = B_rowMap;
7529 RCP<crs_matrix_type> C;
7535 if (A_rowMap->isSameAs (*B_rowMap)) {
7536 const LO localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7537 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
7540 if (alpha !=
ZERO) {
7541 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7543 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7548 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7549 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7550 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7554 if (constructorSublist.is_null ()) {
7555 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow ()));
7557 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
7558 constructorSublist));
7569 TEUCHOS_TEST_FOR_EXCEPTION
7570 (
true, std::invalid_argument, errPfx <<
"The row maps must "
7571 "be the same for statically allocated matrices, to ensure "
7572 "that there is sufficient space to do the addition.");
7575 TEUCHOS_TEST_FOR_EXCEPTION
7576 (C.is_null (), std::logic_error,
7577 errPfx <<
"C should not be null at this point. "
7578 "Please report this bug to the Tpetra developers.");
7581 std::ostringstream os;
7582 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7583 std::cerr << os.str ();
7585 using gids_type = nonconst_global_inds_host_view_type;
7586 using vals_type = nonconst_values_host_view_type;
7590 if (alpha !=
ZERO) {
7591 const LO A_localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7592 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7594 const GO globalRow = A_rowMap->getGlobalElement (localRow);
7595 if (A_numEntries >
static_cast<size_t> (ind.size ())) {
7596 Kokkos::resize(ind,A_numEntries);
7597 Kokkos::resize(val,A_numEntries);
7599 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, A_numEntries));
7600 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, A_numEntries));
7604 for (
size_t k = 0; k < A_numEntries; ++k) {
7605 valView[k] *= alpha;
7608 C->insertGlobalValues (globalRow, A_numEntries,
7609 reinterpret_cast<Scalar *
>(valView.data()),
7615 const LO B_localNumRows =
static_cast<LO
> (B_rowMap->getLocalNumElements ());
7616 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7617 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7618 const GO globalRow = B_rowMap->getGlobalElement (localRow);
7619 if (B_numEntries >
static_cast<size_t> (ind.size ())) {
7620 Kokkos::resize(ind,B_numEntries);
7621 Kokkos::resize(val,B_numEntries);
7623 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, B_numEntries));
7624 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, B_numEntries));
7625 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
7628 for (
size_t k = 0; k < B_numEntries; ++k) {
7632 C->insertGlobalValues (globalRow, B_numEntries,
7633 reinterpret_cast<Scalar *
>(valView.data()),
7638 if (callFillComplete) {
7640 std::ostringstream os;
7641 os << *prefix <<
"Call fillComplete on C" << endl;
7642 std::cerr << os.str ();
7644 if (fillCompleteSublist.is_null ()) {
7645 C->fillComplete (theDomainMap, theRangeMap);
7647 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
7651 std::ostringstream os;
7652 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7653 std::cerr << os.str ();
7657 std::ostringstream os;
7658 os << *prefix <<
"Done" << endl;
7659 std::cerr << os.str ();
7661 return rcp_implicit_cast<row_matrix_type> (C);
7666 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7670 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7671 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7672 const Teuchos::RCP<const map_type>& domainMap,
7673 const Teuchos::RCP<const map_type>& rangeMap,
7674 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7681 using Teuchos::ArrayRCP;
7682 using Teuchos::ArrayView;
7683 using Teuchos::Comm;
7684 using Teuchos::ParameterList;
7687 typedef LocalOrdinal LO;
7688 typedef GlobalOrdinal GO;
7694 const bool debug = Behavior::debug(
"CrsMatrix");
7695 const bool verbose = Behavior::verbose(
"CrsMatrix");
7696 int MyPID = getComm ()->getRank ();
7698 std::unique_ptr<std::string> verbosePrefix;
7701 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7702 std::ostringstream os;
7703 os <<
"Start" << endl;
7704 std::cerr << os.str();
7711 bool reverseMode =
false;
7712 bool restrictComm =
false;
7714 int mm_optimization_core_count =
7715 Behavior::TAFC_OptimizationCoreCount();
7716 RCP<ParameterList> matrixparams;
7717 bool overrideAllreduce =
false;
7718 if (! params.is_null ()) {
7719 matrixparams = sublist (params,
"CrsMatrix");
7720 reverseMode = params->get (
"Reverse Mode", reverseMode);
7721 restrictComm = params->get (
"Restrict Communicator", restrictComm);
7722 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
7723 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7724 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
7726 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7727 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7728 if(reverseMode) isMM =
false;
7732 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
7734 int reduced_mismatch = 0;
7735 if (isMM && !overrideAllreduce) {
7738 const bool source_vals = ! getGraph ()->getImporter ().is_null();
7739 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
7740 rowTransfer.getRemoteLIDs ().size() == 0);
7741 mismatch = (source_vals != target_vals) ? 1 : 0;
7743 ::Tpetra::Details::iallreduce (mismatch, reduced_mismatch,
7744 Teuchos::REDUCE_MAX, * (getComm ()));
7747#ifdef HAVE_TPETRA_MMM_TIMINGS
7748 using Teuchos::TimeMonitor;
7750 if(!params.is_null())
7751 label = params->get(
"Timer Label",label);
7752 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
7755 std::ostringstream os;
7756 if(isMM) os<<
":MMOpt";
7757 else os<<
":MMLegacy";
7761 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
7771 TEUCHOS_TEST_FOR_EXCEPTION(
7772 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7773 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7774 "argument must be either an Import or an Export, and its template "
7775 "parameters must match the corresponding template parameters of the "
7783 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type> (domainTransfer);
7784 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type> (domainTransfer);
7786 if(! domainTransfer.is_null()) {
7787 TEUCHOS_TEST_FOR_EXCEPTION(
7788 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7789 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7790 "argument must be either an Import or an Export, and its template "
7791 "parameters must match the corresponding template parameters of the "
7794 TEUCHOS_TEST_FOR_EXCEPTION(
7795 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
7796 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
7797 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7798 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7799 "arguments must be of the same type (either Import or Export).");
7801 TEUCHOS_TEST_FOR_EXCEPTION(
7802 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
7803 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
7804 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7805 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7806 "arguments must be of the same type (either Import or Export).");
7812 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
7816 RCP<const map_type> MyRowMap = reverseMode ?
7817 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
7818 RCP<const map_type> MyColMap;
7819 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
7820 domainMap : getDomainMap ();
7821 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
7822 rangeMap : getRangeMap ();
7823 RCP<const map_type> BaseRowMap = MyRowMap;
7824 RCP<const map_type> BaseDomainMap = MyDomainMap;
7832 if (! destMat.is_null ()) {
7843 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
7844 ! destMat->getGraph ()->isGloballyIndexed ();
7845 TEUCHOS_TEST_FOR_EXCEPTION(
7846 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
7847 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7848 "to be nonnull, if its graph is empty (neither locally nor globally "
7857 TEUCHOS_TEST_FOR_EXCEPTION(
7858 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
7859 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7860 "input argument 'destMat' is not the same as the (row) Map specified "
7861 "by the input argument 'rowTransfer'.");
7862 TEUCHOS_TEST_FOR_EXCEPTION(
7863 ! destMat->checkSizes (*
this), std::invalid_argument,
7864 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7865 "destination matrix, but checkSizes() indicates that it is not a legal "
7866 "legal target for redistribution from the source matrix (*this). This "
7867 "may mean that they do not have the same dimensions.");
7881 TEUCHOS_TEST_FOR_EXCEPTION(
7882 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7883 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7884 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7885 TEUCHOS_TEST_FOR_EXCEPTION(
7886 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7887 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7888 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7891 TEUCHOS_TEST_FOR_EXCEPTION(
7892 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7893 std::invalid_argument,
7894 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7895 "argument must be the same as the rebalanced domain map 'domainMap'");
7897 TEUCHOS_TEST_FOR_EXCEPTION(
7898 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7899 std::invalid_argument,
7900 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7901 "argument must be the same as the rebalanced domain map 'domainMap'");
7914 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7915 ArrayView<const LO> ExportLIDs = reverseMode ?
7916 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7917 ArrayView<const LO> RemoteLIDs = reverseMode ?
7918 rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
7919 ArrayView<const LO> PermuteToLIDs = reverseMode ?
7920 rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
7921 ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7922 rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
7923 Distributor& Distor = rowTransfer.getDistributor ();
7926 Teuchos::Array<int> SourcePids;
7927 Teuchos::Array<int> TargetPids;
7930 RCP<const map_type> ReducedRowMap, ReducedColMap,
7931 ReducedDomainMap, ReducedRangeMap;
7932 RCP<const Comm<int> > ReducedComm;
7936 if (destMat.is_null ()) {
7937 destMat = rcp (
new this_CRS_type (MyRowMap, 0, matrixparams));
7944#ifdef HAVE_TPETRA_MMM_TIMINGS
7945 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
7947 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
7948 ReducedComm = ReducedRowMap.is_null () ?
7950 ReducedRowMap->getComm ();
7951 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
7953 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
7955 MyDomainMap->replaceCommWithSubset (ReducedComm);
7956 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
7958 MyRangeMap->replaceCommWithSubset (ReducedComm);
7961 MyRowMap = ReducedRowMap;
7962 MyDomainMap = ReducedDomainMap;
7963 MyRangeMap = ReducedRangeMap;
7966 if (! ReducedComm.is_null ()) {
7967 MyPID = ReducedComm->getRank ();
7974 ReducedComm = MyRowMap->getComm ();
7983 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
7986 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
7988 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
7989#ifdef HAVE_TPETRA_MMM_TIMINGS
7990 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
8000 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8003#ifdef HAVE_TPETRA_MMM_TIMINGS
8004 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
8006 IntVectorType SourceDomain_pids(getDomainMap (),
true);
8007 IntVectorType SourceCol_pids(getColMap());
8009 SourceDomain_pids.putScalar(MyPID);
8011 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8012 SourcePids.resize (getColMap ()->getLocalNumElements ());
8013 SourceCol_pids.get1dCopy (SourcePids ());
8015 else if (MyImporter.is_null ()) {
8017#ifdef HAVE_TPETRA_MMM_TIMINGS
8018 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
8020 SourcePids.resize (getColMap ()->getLocalNumElements ());
8021 SourcePids.assign (getColMap ()->getLocalNumElements (), MyPID);
8023 else if ( ! MyImporter.is_null () &&
8024 ! domainTransfer.is_null () ) {
8029#ifdef HAVE_TPETRA_MMM_TIMINGS
8030 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
8034 IntVectorType TargetDomain_pids (domainMap);
8035 TargetDomain_pids.putScalar (MyPID);
8038 IntVectorType SourceDomain_pids (getDomainMap ());
8041 IntVectorType SourceCol_pids (getColMap ());
8043 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8044 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8046 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8047 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8049 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8050 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8052 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8053 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8056 TEUCHOS_TEST_FOR_EXCEPTION(
8057 true, std::logic_error,
"Tpetra::CrsMatrix::"
8058 "transferAndFillComplete: Should never get here! "
8059 "Please report this bug to a Tpetra developer.");
8061 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8062 SourcePids.resize (getColMap ()->getLocalNumElements ());
8063 SourceCol_pids.get1dCopy (SourcePids ());
8065 else if ( ! MyImporter.is_null () &&
8066 BaseDomainMap->isSameAs (*BaseRowMap) &&
8067 getDomainMap ()->isSameAs (*getRowMap ())) {
8069#ifdef HAVE_TPETRA_MMM_TIMINGS
8070 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
8073 IntVectorType TargetRow_pids (domainMap);
8074 IntVectorType SourceRow_pids (getRowMap ());
8075 IntVectorType SourceCol_pids (getColMap ());
8077 TargetRow_pids.putScalar (MyPID);
8078 if (! reverseMode && xferAsImport !=
nullptr) {
8079 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport,
INSERT);
8081 else if (reverseMode && xferAsExport !=
nullptr) {
8082 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport,
INSERT);
8084 else if (! reverseMode && xferAsExport !=
nullptr) {
8085 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport,
INSERT);
8087 else if (reverseMode && xferAsImport !=
nullptr) {
8088 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport,
INSERT);
8091 TEUCHOS_TEST_FOR_EXCEPTION(
8092 true, std::logic_error,
"Tpetra::CrsMatrix::"
8093 "transferAndFillComplete: Should never get here! "
8094 "Please report this bug to a Tpetra developer.");
8097 SourceCol_pids.doImport (SourceRow_pids, *MyImporter,
INSERT);
8098 SourcePids.resize (getColMap ()->getLocalNumElements ());
8099 SourceCol_pids.get1dCopy (SourcePids ());
8102 TEUCHOS_TEST_FOR_EXCEPTION(
8103 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8104 "transferAndFillComplete: This method only allows either domainMap == "
8105 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8106 "getDomainMap () == getRowMap ()).");
8110 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8112#ifdef HAVE_TPETRA_MMM_TIMINGS
8113 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
8115 if (constantNumPackets == 0) {
8116 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8117 RemoteLIDs.size ());
8124 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
8125 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8131#ifdef HAVE_TPETRA_MMM_TIMINGS
8132 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
8135 using Teuchos::outArg;
8136 using Teuchos::REDUCE_MAX;
8137 using Teuchos::reduceAll;
8140 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8141 const int myRank = comm->getRank ();
8143 std::ostringstream errStrm;
8147 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8150 destMat->numExportPacketsPerLID_.modify_host ();
8151 numExportPacketsPerLID =
8154 catch (std::exception& e) {
8155 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8156 << e.what () << std::endl;
8160 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8161 "an exception not a subclass of std::exception" << std::endl;
8165 if (! comm.is_null ()) {
8166 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8170 TEUCHOS_TEST_FOR_EXCEPTION(
8171 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8172 "exception on at least one process.");
8176 std::ostringstream os;
8177 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8179 std::cerr << os.str ();
8184 numExportPacketsPerLID,
8187 constantNumPackets);
8189 catch (std::exception& e) {
8190 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8191 << e.what () << std::endl;
8195 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8196 "an exception not a subclass of std::exception" << std::endl;
8201 std::ostringstream os;
8202 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8204 std::cerr << os.str ();
8207 if (! comm.is_null ()) {
8208 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8212 TEUCHOS_TEST_FOR_EXCEPTION(
8213 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8214 "exception on at least one process.");
8219 destMat->numExportPacketsPerLID_.modify_host ();
8220 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8223 std::ostringstream os;
8224 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8226 std::cerr << os.str ();
8230 numExportPacketsPerLID,
8233 constantNumPackets);
8235 std::ostringstream os;
8236 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8238 std::cerr << os.str ();
8245#ifdef HAVE_TPETRA_MMM_TIMINGS
8246 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
8248 if (! communication_needed) {
8250 std::ostringstream os;
8251 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8252 std::cerr << os.str ();
8257 if (constantNumPackets == 0) {
8259 std::ostringstream os;
8260 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8262 std::cerr << os.str ();
8267 destMat->numExportPacketsPerLID_.sync_host ();
8268 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8270 destMat->numImportPacketsPerLID_.sync_host ();
8271 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8275 std::ostringstream os;
8276 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8278 std::cerr << os.str ();
8280 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8281 destMat->numImportPacketsPerLID_.view_host());
8283 std::ostringstream os;
8284 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8286 std::cerr << os.str ();
8289 size_t totalImportPackets = 0;
8290 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8291 totalImportPackets += numImportPacketsPerLID[i];
8296 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8297 verbosePrefix.get ());
8298 destMat->imports_.modify_host ();
8299 auto hostImports = destMat->imports_.view_host();
8302 destMat->exports_.sync_host ();
8303 auto hostExports = destMat->exports_.view_host();
8305 std::ostringstream os;
8306 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
8308 std::cerr << os.str ();
8310 Distor.doReversePostsAndWaits (hostExports,
8311 numExportPacketsPerLID,
8313 numImportPacketsPerLID);
8315 std::ostringstream os;
8316 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
8318 std::cerr << os.str ();
8323 std::ostringstream os;
8324 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8326 std::cerr << os.str ();
8328 destMat->imports_.modify_host ();
8329 auto hostImports = destMat->imports_.view_host();
8332 destMat->exports_.sync_host ();
8333 auto hostExports = destMat->exports_.view_host();
8335 std::ostringstream os;
8336 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8338 std::cerr << os.str ();
8340 Distor.doReversePostsAndWaits (hostExports,
8344 std::ostringstream os;
8345 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8347 std::cerr << os.str ();
8352 if (constantNumPackets == 0) {
8354 std::ostringstream os;
8355 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8357 std::cerr << os.str ();
8362 destMat->numExportPacketsPerLID_.sync_host ();
8363 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8365 destMat->numImportPacketsPerLID_.sync_host ();
8366 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8369 std::ostringstream os;
8370 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8372 std::cerr << os.str ();
8374 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8375 destMat->numImportPacketsPerLID_.view_host());
8377 std::ostringstream os;
8378 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8380 std::cerr << os.str ();
8383 size_t totalImportPackets = 0;
8384 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8385 totalImportPackets += numImportPacketsPerLID[i];
8390 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8391 verbosePrefix.get ());
8392 destMat->imports_.modify_host ();
8393 auto hostImports = destMat->imports_.view_host();
8396 destMat->exports_.sync_host ();
8397 auto hostExports = destMat->exports_.view_host();
8399 std::ostringstream os;
8400 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8402 std::cerr << os.str ();
8404 Distor.doPostsAndWaits (hostExports,
8405 numExportPacketsPerLID,
8407 numImportPacketsPerLID);
8409 std::ostringstream os;
8410 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8412 std::cerr << os.str ();
8417 std::ostringstream os;
8418 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8420 std::cerr << os.str ();
8422 destMat->imports_.modify_host ();
8423 auto hostImports = destMat->imports_.view_host();
8426 destMat->exports_.sync_host ();
8427 auto hostExports = destMat->exports_.view_host();
8429 std::ostringstream os;
8430 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8432 std::cerr << os.str ();
8434 Distor.doPostsAndWaits (hostExports,
8438 std::ostringstream os;
8439 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8441 std::cerr << os.str ();
8453#ifdef HAVE_TPETRA_MMM_TIMINGS
8454 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize"))));
8456 destMat->numImportPacketsPerLID_.sync_host ();
8457 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
8459 destMat->imports_.sync_host ();
8460 Teuchos::ArrayView<const char> hostImports =
8464 std::ostringstream os;
8465 os << *verbosePrefix <<
"Calling unpackAndCombineWithOwningPIDsCount"
8467 std::cerr << os.str ();
8473 numImportPacketsPerLID,
8480 std::ostringstream os;
8481 os << *verbosePrefix <<
"unpackAndCombineWithOwningPIDsCount returned "
8482 << mynnz << std::endl;
8483 std::cerr << os.str ();
8485 size_t N = BaseRowMap->getLocalNumElements ();
8488 ArrayRCP<size_t> CSR_rowptr(N+1);
8489 ArrayRCP<GO> CSR_colind_GID;
8490 ArrayRCP<LO> CSR_colind_LID;
8491 ArrayRCP<Scalar> CSR_vals;
8492 CSR_colind_GID.resize (mynnz);
8493 CSR_vals.resize (mynnz);
8497 if (
typeid (LO) ==
typeid (GO)) {
8498 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8501 CSR_colind_LID.resize (mynnz);
8503#ifdef HAVE_TPETRA_MMM_TIMINGS
8504 tmCopySPRdata = Teuchos::null;
8505 tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC copy same-perm-remote data"))));
8509 std::ostringstream os;
8510 os << *verbosePrefix <<
"Calling unpackAndCombineIntoCrsArrays"
8512 std::cerr << os.str ();
8522 numImportPacketsPerLID,
8533 Teuchos::av_reinterpret_cast<impl_scalar_type> (CSR_vals ()),
8539 for(
size_t i=0; i<static_cast<size_t>(TargetPids.size()); i++)
8541 if(TargetPids[i] == -1) TargetPids[i] = MyPID;
8543#ifdef HAVE_TPETRA_MMM_TIMINGS
8544 tmCopySPRdata = Teuchos::null;
8552 Teuchos::Array<int> RemotePids;
8554 std::ostringstream os;
8555 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8557 std::cerr << os.str ();
8560#ifdef HAVE_TPETRA_MMM_TIMINGS
8561 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8573 std::ostringstream os;
8574 os << *verbosePrefix <<
"restrictComm="
8575 << (restrictComm ?
"true" :
"false") << std::endl;
8576 std::cerr << os.str ();
8583#ifdef HAVE_TPETRA_MMM_TIMINGS
8584 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8587 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8589 MyColMap->replaceCommWithSubset (ReducedComm);
8590 MyColMap = ReducedColMap;
8595 std::ostringstream os;
8596 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8597 std::cerr << os.str ();
8599 destMat->replaceColMap (MyColMap);
8606 if (ReducedComm.is_null ()) {
8608 std::ostringstream os;
8609 os << *verbosePrefix <<
"I am no longer in the communicator; "
8610 "returning" << std::endl;
8611 std::cerr << os.str ();
8620 if ((! reverseMode && xferAsImport !=
nullptr) ||
8621 (reverseMode && xferAsExport !=
nullptr)) {
8623 std::ostringstream os;
8624 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8625 std::cerr << os.str ();
8627#ifdef HAVE_TPETRA_MMM_TIMINGS
8628 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8634 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8635 (reverseMode && xferAsImport !=
nullptr)) {
8637 std::ostringstream os;
8638 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8640 std::cerr << os.str();
8642#ifdef HAVE_TPETRA_MMM_TIMINGS
8643 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8648 if (CSR_rowptr[N] != mynnz) {
8649 CSR_colind_LID.resize (CSR_rowptr[N]);
8650 CSR_vals.resize (CSR_rowptr[N]);
8654 TEUCHOS_TEST_FOR_EXCEPTION(
8655 true, std::logic_error,
"Tpetra::CrsMatrix::"
8656 "transferAndFillComplete: Should never get here! "
8657 "Please report this bug to a Tpetra developer.");
8664 std::ostringstream os;
8665 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8666 std::cerr << os.str ();
8675#ifdef HAVE_TPETRA_MMM_TIMINGS
8676 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8678 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8684#ifdef HAVE_TPETRA_MMM_TIMINGS
8685 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8688 Teuchos::ParameterList esfc_params;
8690 RCP<import_type> MyImport;
8693 if (iallreduceRequest.get () !=
nullptr) {
8695 std::ostringstream os;
8696 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8698 std::cerr << os.str ();
8700 iallreduceRequest->wait ();
8701 if (reduced_mismatch != 0) {
8707#ifdef HAVE_TPETRA_MMM_TIMINGS
8708 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8713 std::ostringstream os;
8714 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8715 std::cerr << os.str ();
8718 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8719 Teuchos::ArrayRCP<int> type3PIDs;
8720 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8721 auto colind = getCrsGraph()->getLocalIndicesHost();
8724 std::ostringstream os;
8725 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8726 std::cerr << os.str ();
8730#ifdef HAVE_TPETRA_MMM_TIMINGS
8731 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8733 Import_Util::reverseNeighborDiscovery(*
this,
8745 std::ostringstream os;
8746 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8747 std::cerr << os.str ();
8750 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8751 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8753 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8754 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8756 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8758 std::vector<bool> IsOwned(numCols,
true);
8759 std::vector<int> SentTo(numCols,-1);
8760 if (! MyImporter.is_null ()) {
8761 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
8762 IsOwned[rlid]=
false;
8766 std::vector<std::pair<int,GO> > usrtg;
8767 usrtg.reserve(TEPID2.size());
8770 const auto& colMap = * (this->getColMap ());
8772 const LO row = TELID2[i];
8773 const int pid = TEPID2[i];
8774 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
8775 const int col = colind[j];
8776 if (IsOwned[col] && SentTo[col] != pid) {
8778 GO gid = colMap.getGlobalElement (col);
8779 usrtg.push_back (std::pair<int,GO> (pid, gid));
8786 std::sort(usrtg.begin(),usrtg.end());
8787 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
8789 usrtg.erase(eopg,usrtg.end());
8792 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
8793 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
8796 for(
auto && p : usrtg) {
8797 EPID2[pos]= p.first;
8798 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
8802 Teuchos::ArrayView<int> EPID3 = type3PIDs();
8803 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
8804 GO InfGID = std::numeric_limits<GO>::max();
8805 int InfPID = INT_MAX;
8809#define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
8810 int i1=0, i2=0, i3=0;
8811 int Len1 = EPID1.size();
8812 int Len2 = EPID2.size();
8813 int Len3 = EPID3.size();
8815 int MyLen=Len1+Len2+Len3;
8816 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
8817 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
8820 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
8821 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
8822 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
8823 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
8825 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
8826 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
8827 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
8829 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
8830 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
8834 bool added_entry=
false;
8836 if(PID1 == MIN_PID && GID1 == MIN_GID){
8837 userExportLIDs[iloc]=ELID1[i1];
8838 userExportPIDs[iloc]=EPID1[i1];
8843 if(PID2 == MIN_PID && GID2 == MIN_GID){
8845 userExportLIDs[iloc]=ELID2[i2];
8846 userExportPIDs[iloc]=EPID2[i2];
8852 if(PID3 == MIN_PID && GID3 == MIN_GID){
8854 userExportLIDs[iloc]=ELID3[i3];
8855 userExportPIDs[iloc]=EPID3[i3];
8863 std::ostringstream os;
8864 os << *verbosePrefix <<
"Create Import" << std::endl;
8865 std::cerr << os.str ();
8868#ifdef HAVE_TPETRA_MMM_TIMINGS
8869 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
8871 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
8873 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
8877 userExportLIDs.view(0,iloc).getConst(),
8878 userExportPIDs.view(0,iloc).getConst(),
8883 std::ostringstream os;
8884 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
8885 std::cerr << os.str ();
8889#ifdef HAVE_TPETRA_MMM_TIMINGS
8890 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
8891 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
8893 if(!params.is_null())
8894 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
8895 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
8901#ifdef HAVE_TPETRA_MMM_TIMINGS
8902 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
8905 std::ostringstream os;
8906 os << *verbosePrefix <<
"Create Import" << std::endl;
8907 std::cerr << os.str ();
8910#ifdef HAVE_TPETRA_MMM_TIMINGS
8911 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
8913 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
8914 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
8915 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
8916 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
8919 std::ostringstream os;
8920 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
8921 std::cerr << os.str ();
8924#ifdef HAVE_TPETRA_MMM_TIMINGS
8925 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
8926 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
8928 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
8931 if (!params.is_null ()) {
8932 esfc_params.set (
"compute global constants",
8933 params->get (
"compute global constants",
true));
8935 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
8936 MyImport, Teuchos::null,
8937 rcp (
new Teuchos::ParameterList (esfc_params)));
8940#ifdef HAVE_TPETRA_MMM_TIMINGS
8941 tmIESFC = Teuchos::null;
8945 std::ostringstream os;
8946 os << *verbosePrefix <<
"Done" << endl;
8947 std::cerr << os.str ();
8952 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8955 importAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& destMatrix,
8957 const Teuchos::RCP<const map_type>& domainMap,
8958 const Teuchos::RCP<const map_type>& rangeMap,
8959 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
8961 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
8964 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8967 importAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& destMatrix,
8970 const Teuchos::RCP<const map_type>& domainMap,
8971 const Teuchos::RCP<const map_type>& rangeMap,
8972 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
8974 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
8977 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8980 exportAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& destMatrix,
8982 const Teuchos::RCP<const map_type>& domainMap,
8983 const Teuchos::RCP<const map_type>& rangeMap,
8984 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
8986 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
8989 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8992 exportAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& destMatrix,
8995 const Teuchos::RCP<const map_type>& domainMap,
8996 const Teuchos::RCP<const map_type>& rangeMap,
8997 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
8999 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9010#define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9012 template class CrsMatrix< SCALAR , LO , GO , NODE >;
9014#define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9016 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9017 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9019#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9021 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9022 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9023 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9024 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9025 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9026 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9027 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9028 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9029 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9030 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9031 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9032 const Teuchos::RCP<Teuchos::ParameterList>& params);
9034#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9036 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9037 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9038 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9039 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9040 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9041 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9042 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9043 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9044 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9045 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9046 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9047 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9048 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9049 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9050 const Teuchos::RCP<Teuchos::ParameterList>& params);
9053#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9055 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9056 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9057 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9058 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9059 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9060 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9061 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9062 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9063 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9064 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9065 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9066 const Teuchos::RCP<Teuchos::ParameterList>& params);
9068#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9070 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9071 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9072 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9073 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9074 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9075 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9076 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9077 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9078 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9079 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9080 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9081 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9082 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9083 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9084 const Teuchos::RCP<Teuchos::ParameterList>& params);
9087#define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9088 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9089 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9090 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9091 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9092 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)