1032 std::cerr << os.str ();
1040 if (constantNumPackets == 0) {
1042 std::ostringstream os;
1043 os << *prefix <<
"3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1045 std::cerr << os.str ();
1049 this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1050 remoteLIDs.extent (0));
1054 std::ostringstream os;
1055 os << *prefix <<
"4. packAndPrepare: before, "
1056 << dualViewStatusToString (this->exports_,
"exports_")
1058 std::cerr << os.str ();
1061 doPackAndPrepare(src, exportLIDs, constantNumPackets,
execution_space());
1063 this->exports_.sync_host();
1066 this->exports_.sync_device();
1070 std::ostringstream os;
1071 os << *prefix <<
"5.1. After packAndPrepare, "
1074 std::cerr << os.str ();
1080 if (constantNumPackets != 0) {
1085 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1086 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1090 bool needCommunication =
true;
1093 const this_type* srcDistObj =
dynamic_cast<const this_type*
> (&src);
1095 if (revOp == DoReverse && ! this->isDistributed ()) {
1096 needCommunication =
false;
1105 else if (revOp == DoForward && srcDistObj != NULL &&
1106 ! srcDistObj->isDistributed ()) {
1107 needCommunication =
false;
1110 if (! needCommunication) {
1112 std::ostringstream os;
1113 os << *prefix <<
"Comm not needed; skipping" << endl;
1114 std::cerr << os.str ();
1118 ProfilingRegion region_dpw
1119 (
"Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1120#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1123 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1127 std::ostringstream os;
1128 os << *prefix <<
"7.0. "
1129 << (revOp == DoReverse ?
"Reverse" :
"Forward")
1131 std::cerr << os.str ();
1134 doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1139 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1143 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1144 const char modeString[],
1145 const ReverseOption revOp,
1147 bool restrictedMode)
1153 using Kokkos::Compat::getArrayView;
1154 using Kokkos::Compat::getConstArrayView;
1155 using Kokkos::Compat::getKokkosViewDeepCopy;
1156 using Kokkos::Compat::create_const_view;
1160 const char funcName[] =
"Tpetra::DistObject::doTransfer";
1162 ProfilingRegion region_doTransfer(funcName);
1163 const bool verbose = Behavior::verbose(
"DistObject");
1164 std::shared_ptr<std::string> prefix;
1166 std::ostringstream os;
1167 prefix = this->
createPrefix(
"DistObject",
"doTransfer");
1168 os << *prefix <<
"Source type: " << Teuchos::typeName(src)
1169 <<
", Target type: " << Teuchos::typeName(*
this) << endl;
1170 std::cerr << os.str();
1183 const bool debug = Behavior::debug(
"DistObject");
1185 if (! restrictedMode && revOp == DoForward) {
1186 const bool myMapSameAsTransferTgtMap =
1187 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1188 TEUCHOS_TEST_FOR_EXCEPTION
1189 (! myMapSameAsTransferTgtMap, std::invalid_argument,
1190 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
1191 "communication, the target DistObject's Map must be the same "
1192 "(in the sense of Tpetra::Map::isSameAs) as the input "
1193 "Export/Import object's target Map.");
1195 else if (! restrictedMode && revOp == DoReverse) {
1196 const bool myMapSameAsTransferSrcMap =
1197 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1198 TEUCHOS_TEST_FOR_EXCEPTION
1199 (! myMapSameAsTransferSrcMap, std::invalid_argument,
1200 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
1201 "communication, the target DistObject's Map must be the same "
1202 "(in the sense of Tpetra::Map::isSameAs) as the input "
1203 "Export/Import object's source Map.");
1205 else if (restrictedMode && revOp == DoForward) {
1206 const bool myMapLocallyFittedTransferTgtMap =
1207 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1208 TEUCHOS_TEST_FOR_EXCEPTION
1209 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1210 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
1211 "communication using restricted mode, Export/Import object's "
1212 "target Map must be locally fitted (in the sense of "
1213 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1216 const bool myMapLocallyFittedTransferSrcMap =
1217 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1218 TEUCHOS_TEST_FOR_EXCEPTION
1219 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1220 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
1221 "communication using restricted mode, Export/Import object's "
1222 "source Map must be locally fitted (in the sense of "
1223 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1229 const this_type* srcDistObj =
dynamic_cast<const this_type*
> (&src);
1230 if (srcDistObj !=
nullptr) {
1231 if (revOp == DoForward) {
1232 const bool srcMapSameAsImportSrcMap =
1233 srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1234 TEUCHOS_TEST_FOR_EXCEPTION
1235 (! srcMapSameAsImportSrcMap, std::invalid_argument,
1236 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
1237 "communication, the source DistObject's Map must be the same "
1238 "as the input Export/Import object's source Map.");
1241 const bool srcMapSameAsImportTgtMap =
1242 srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1243 TEUCHOS_TEST_FOR_EXCEPTION
1244 (! srcMapSameAsImportTgtMap, std::invalid_argument,
1245 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
1246 "communication, the source DistObject's Map must be the same "
1247 "as the input Export/Import object's target Map.");
1253 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1255 TEUCHOS_TEST_FOR_EXCEPTION
1256 (debug && restrictedMode &&
1257 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1258 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1259 std::invalid_argument,
1260 "Tpetra::DistObject::" << modeString <<
": Transfer object "
1261 "cannot have permutes in restricted mode.");
1264 const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
1266 std::ostringstream os;
1267 os << *prefix <<
"doTransfer: Use new interface; "
1268 "commOnHost=" << (commOnHost ?
"true" :
"false") << endl;
1269 std::cerr << os.str ();
1272 using const_lo_dv_type =
1273 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1274 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1275 transfer.getPermuteToLIDs_dv () :
1276 transfer.getPermuteFromLIDs_dv ();
1277 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1278 transfer.getPermuteFromLIDs_dv () :
1279 transfer.getPermuteToLIDs_dv ();
1280 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1281 transfer.getRemoteLIDs_dv () :
1282 transfer.getExportLIDs_dv ();
1283 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1284 transfer.getExportLIDs_dv () :
1285 transfer.getRemoteLIDs_dv ();
1286 const bool canTryAliasing = (revOp == DoForward) ?
1287 transfer.areRemoteLIDsContiguous() :
1288 transfer.areExportLIDsContiguous();
1290 size_t constantNumPackets = this->constantNumberOfPackets ();
1294 if (constantNumPackets != 0) {
1299 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1300 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1304 bool needCommunication =
true;
1307 const this_type* srcDistObj =
dynamic_cast<const this_type*
> (&src);
1309 if (revOp == DoReverse && ! this->isDistributed ()) {
1310 needCommunication =
false;
1319 else if (revOp == DoForward && srcDistObj != NULL &&
1320 ! srcDistObj->isDistributed ()) {
1321 needCommunication =
false;
1324 if (! needCommunication) {
1326 std::ostringstream os;
1327 os << *prefix <<
"Comm not needed; skipping" << endl;
1328 std::cerr << os.str ();
1332 distributorActor_.doWaits(distributorPlan);
1335 std::ostringstream os;
1336 os << *prefix <<
"8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) <<
", constantNumPackets " << constantNumPackets << endl;
1337 std::cerr << os.str ();
1339 doUnpackAndCombine(remoteLIDs, constantNumPackets, CM,
execution_space());
1344 std::ostringstream os;
1345 os << *prefix <<
"9. Done!" << endl;
1346 std::cerr << os.str ();
1350 std::ostringstream os;
1351 os << *prefix <<
"Tpetra::DistObject::doTransfer: Done!" << endl;
1352 std::cerr << os.str ();
1356 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1360 size_t constantNumPackets,
1362 std::shared_ptr<std::string> prefix,
1363 const bool canTryAliasing,
1368 using Kokkos::Compat::create_const_view;
1373 if (constantNumPackets == 0) {
1375 std::ostringstream os;
1376 os << *prefix <<
"7.1. Variable # packets / LID: first comm "
1377 <<
"(commOnHost = " << (commOnHost ?
"true" :
"false") <<
")"
1379 std::cerr << os.str ();
1381 size_t totalImportPackets = 0;
1383 if (this->numExportPacketsPerLID_.need_sync_host ()) {
1384 this->numExportPacketsPerLID_.sync_host ();
1386 if (this->numImportPacketsPerLID_.need_sync_host ()) {
1387 this->numImportPacketsPerLID_.sync_host ();
1389 this->numImportPacketsPerLID_.modify_host ();
1391 create_const_view (this->numExportPacketsPerLID_.view_host ());
1392 auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1396 std::ostringstream os;
1397 os << *prefix <<
"Call doPostsAndWaits"
1399 std::cerr << os.str ();
1401 distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1404 std::ostringstream os;
1405 os << *prefix <<
"Count totalImportPackets" << std::endl;
1406 std::cerr << os.str ();
1408 using the_dev_type =
typename decltype (numImp_h)::device_type;
1409 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1412 this->numExportPacketsPerLID_.sync_device ();
1413 this->numImportPacketsPerLID_.sync_device ();
1414 this->numImportPacketsPerLID_.modify_device ();
1415 auto numExp_d = create_const_view
1416 (this->numExportPacketsPerLID_.view_device ());
1417 auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1421 std::ostringstream os;
1422 os << *prefix <<
"Call doPostsAndWaits"
1424 std::cerr << os.str ();
1427 distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1430 std::ostringstream os;
1431 os << *prefix <<
"Count totalImportPackets" << std::endl;
1432 std::cerr << os.str ();
1434 using the_dev_type =
typename decltype (numImp_d)::device_type;
1435 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1439 std::ostringstream os;
1440 os << *prefix <<
"totalImportPackets=" << totalImportPackets << endl;
1441 std::cerr << os.str ();
1443 this->reallocImportsIfNeeded (totalImportPackets, verbose,
1444 prefix.get (), canTryAliasing, CM);
1446 std::ostringstream os;
1447 os << *prefix <<
"7.3. Second comm" << std::endl;
1448 std::cerr << os.str ();
1454 this->numExportPacketsPerLID_.sync_host ();
1455 this->numImportPacketsPerLID_.sync_host ();
1464 auto numExportPacketsPerLID_av =
1466 auto numImportPacketsPerLID_av =
1474 this->imports_.clear_sync_state ();
1477 std::ostringstream os;
1478 os << *prefix <<
"Comm on "
1479 << (commOnHost ?
"host" :
"device")
1480 <<
"; call doPosts" << endl;
1481 std::cerr << os.str ();
1485 this->imports_.modify_host ();
1486 distributorActor_.doPosts
1488 create_const_view (this->exports_.view_host ()),
1489 numExportPacketsPerLID_av,
1490 this->imports_.view_host (),
1491 numImportPacketsPerLID_av);
1495 this->imports_.modify_device ();
1496 distributorActor_.doPosts
1498 create_const_view (this->exports_.view_device ()),
1499 numExportPacketsPerLID_av,
1500 this->imports_.view_device (),
1501 numImportPacketsPerLID_av);
1506 std::ostringstream os;
1507 os << *prefix <<
"7.1. Const # packets per LID: " << endl
1514 std::cerr << os.str ();
1521 this->imports_.clear_sync_state ();
1524 std::ostringstream os;
1525 os << *prefix <<
"7.2. Comm on "
1526 << (commOnHost ?
"host" :
"device")
1527 <<
"; call doPosts" << endl;
1528 std::cerr << os.str ();
1531 this->imports_.modify_host ();
1532 distributorActor_.doPosts
1534 create_const_view (this->exports_.view_host ()),
1536 this->imports_.view_host ());
1540 this->imports_.modify_device ();
1541 distributorActor_.doPosts
1543 create_const_view (this->exports_.view_device ()),
1545 this->imports_.view_device ());
1550 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1554 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1555 size_t& constantNumPackets,
1562 ProfilingRegion region_pp
1563 (
"Tpetra::DistObject::doPackAndPrepare");
1564#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1567 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1587 std::ostringstream lclErrStrm;
1588 bool lclSuccess =
false;
1590 this->packAndPrepare (src, exportLIDs, this->exports_,
1591 this->numExportPacketsPerLID_,
1592 constantNumPackets, space);
1595 catch (std::exception& e) {
1596 lclErrStrm <<
"packAndPrepare threw an exception: "
1597 << endl << e.what();
1600 lclErrStrm <<
"packAndPrepare threw an exception "
1601 "not a subclass of std::exception.";
1603 const char gblErrMsgHeader[] =
"Tpetra::DistObject "
1604 "threw an exception in packAndPrepare on "
1605 "one or more processes in the DistObject's communicator.";
1606 auto comm = getMap()->getComm();
1607 Details::checkGlobalError(std::cerr, lclSuccess,
1608 lclErrStrm.str().c_str(),
1609 gblErrMsgHeader, *comm);
1612 this->packAndPrepare (src, exportLIDs, this->exports_,
1613 this->numExportPacketsPerLID_,
1614 constantNumPackets, space);
1618 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1621 doUnpackAndCombine(
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1622 size_t constantNumPackets,
1630 ProfilingRegion region_uc
1631 (
"Tpetra::DistObject::doUnpackAndCombine");
1632#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1635 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1639 std::ostringstream lclErrStrm;
1640 bool lclSuccess =
false;
1643 this->numImportPacketsPerLID_,
1644 constantNumPackets, CM, space);
1647 catch (std::exception& e) {
1648 lclErrStrm <<
"doUnpackAndCombine threw an exception: "
1649 << endl << e.what();
1652 lclErrStrm <<
"doUnpackAndCombine threw an exception "
1653 "not a subclass of std::exception.";
1655 const char gblErrMsgHeader[] =
"Tpetra::DistObject "
1656 "threw an exception in unpackAndCombine on "
1657 "one or more processes in the DistObject's communicator.";
1658 auto comm = getMap()->getComm();
1659 Details::checkGlobalError(std::cerr, lclSuccess,
1660 lclErrStrm.str().c_str(),
1661 gblErrMsgHeader, *comm);
1665 this->numImportPacketsPerLID_,
1666 constantNumPackets, CM, space);
1670 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1676 const Kokkos::DualView<
1679 const Kokkos::DualView<
1686template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1689 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1691 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1705 copyAndPermute(source, numSameIDs, permuteToLIDs, permuteFromLIDs,
1713 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1718 const Kokkos::DualView<
1731template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1734 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1736 Kokkos::DualView<packet_type *, buffer_device_type> &exports,
1737 Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1756 constantNumPackets);
1764 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1768 (
const Kokkos::DualView<
1782template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1784 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1786 Kokkos::DualView<packet_type *, buffer_device_type> imports,
1787 Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1788 const size_t constantNumPackets,
const CombineMode combineMode,
1793 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets,
1802template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1804 std::ostream &os)
const {
1806 using Teuchos::FancyOStream;
1807 using Teuchos::getFancyOStream;
1809 using Teuchos::rcpFromRef;
1811 RCP<FancyOStream> out = getFancyOStream(rcpFromRef(os));
1812 this->
describe(*out, Teuchos::VERB_DEFAULT);
1815template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1816std::unique_ptr<std::string>
1817DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::createPrefix(
1818 const char className[],
const char methodName[])
const {
1819 auto map = this->getMap();
1820 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1824template <
class DistObjectType>
1826 Teuchos::RCP<DistObjectType> &input,
1827 const Teuchos::RCP<
const Map<
typename DistObjectType::local_ordinal_type,
1828 typename DistObjectType::global_ordinal_type,
1829 typename DistObjectType::node_type>> &newMap) {
1830 input->removeEmptyProcessesInPlace(newMap);
1831 if (newMap.is_null()) {
1832 input = Teuchos::null;
1836template <
class DistObjectType>
1838 auto newMap = input->getMap()->removeEmptyProcesses();
1843#define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1844 template class DistObject<SCALAR, LO, GO, NODE>;
1848#define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1849 template class DistObject<char, LO, GO, NODE>;