228#ifdef HAVE_MUELU_CUDA
229 if (
parameterList_.get<
bool>(
"refmaxwell: cuda profile setup",
false)) cudaProfilerStart();
232 std::string timerLabel;
234 timerLabel =
"MueLu RefMaxwell: compute (reuse)";
236 timerLabel =
"MueLu RefMaxwell: compute";
237 RCP<Teuchos::TimeMonitor> tmCompute =
getTimer(timerLabel);
244 RCP<ParameterList> params = rcp(
new ParameterList());;
245 params->set(
"printLoadBalancingInfo",
true);
246 params->set(
"printCommInfo",
true);
280 RCP<MultiVector> CoordsSC;
290 ArrayRCP<ArrayRCP<const Scalar> > localNullspace(
Nullspace_->getNumVectors());
291 for (
size_t i = 0; i <
Nullspace_->getNumVectors(); i++)
293 coordinateType localMinLen = Teuchos::ScalarTraits<coordinateType>::rmax();
294 coordinateType localMeanLen = Teuchos::ScalarTraits<coordinateType>::zero();
295 coordinateType localMaxLen = Teuchos::ScalarTraits<coordinateType>::zero();
296 for (
size_t j=0; j <
Nullspace_->getMap()->getLocalNumElements(); j++) {
297 Scalar lenSC = Teuchos::ScalarTraits<Scalar>::zero();
298 for (
size_t i=0; i <
Nullspace_->getNumVectors(); i++)
299 lenSC += localNullspace[i][j]*localNullspace[i][j];
300 coordinateType len = Teuchos::as<coordinateType>(Teuchos::ScalarTraits<Scalar>::real(Teuchos::ScalarTraits<Scalar>::squareroot(lenSC)));
301 localMinLen = std::min(localMinLen, len);
302 localMaxLen = std::max(localMaxLen, len);
306 RCP<const Teuchos::Comm<int> > comm =
Nullspace_->getMap()->getComm();
310 meanLen /=
Nullspace_->getMap()->getGlobalNumElements();
314 GetOStream(
Statistics0) <<
"Edge length (min/mean/max): " << minLen <<
" / " << meanLen <<
" / " << maxLen << std::endl;
321 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
323 Array<Scalar> normsSC(
Coords_->getNumVectors(), one / Teuchos::as<Scalar>(meanLen));
328 GetOStream(
Errors) <<
"MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
343 std::string label(
"D0*Ms*D0");
354 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): building special prolongator" << std::endl;
359 bool doRebalancing =
false;
361 int rebalanceStriding =
parameterList_.get<
int>(
"refmaxwell: subsolves striding", -1);
362 int numProcsAH, numProcsA22;
363 int numProcs =
SM_Matrix_->getDomainMap()->getComm()->getSize();
365 doRebalancing =
false;
384 ParameterList repartheurParams;
385 repartheurParams.set(
"repartition: start level", 0);
387 int defaultTargetRows = 10000;
388 repartheurParams.set(
"repartition: min rows per proc",
precList11_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
389 repartheurParams.set(
"repartition: target rows per proc",
precList11_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
390 repartheurParams.set(
"repartition: min rows per thread",
precList11_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
391 repartheurParams.set(
"repartition: target rows per thread",
precList11_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
392 repartheurParams.set(
"repartition: max imbalance",
precList11_.get<
double>(
"repartition: max imbalance", 1.1));
393 repartheurFactory->SetParameterList(repartheurParams);
395 level.
Request(
"number of partitions", repartheurFactory.get());
396 repartheurFactory->Build(level);
397 numProcsAH = level.
Get<
int>(
"number of partitions", repartheurFactory.get());
398 numProcsAH = std::min(numProcsAH,numProcs);
411 ParameterList repartheurParams;
412 repartheurParams.set(
"repartition: start level", 0);
413 repartheurParams.set(
"repartition: use map",
true);
415 int defaultTargetRows = 10000;
416 repartheurParams.set(
"repartition: min rows per proc",
precList22_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
417 repartheurParams.set(
"repartition: target rows per proc",
precList22_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
418 repartheurParams.set(
"repartition: min rows per thread",
precList22_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
419 repartheurParams.set(
"repartition: target rows per thread",
precList22_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
421 repartheurFactory->SetParameterList(repartheurParams);
423 level.
Request(
"number of partitions", repartheurFactory.get());
424 repartheurFactory->Build(level);
425 numProcsA22 = level.
Get<
int>(
"number of partitions", repartheurFactory.get());
426 numProcsA22 = std::min(numProcsA22,numProcs);
429 if (rebalanceStriding >= 1) {
430 TEUCHOS_ASSERT(rebalanceStriding*numProcsAH<=numProcs);
431 TEUCHOS_ASSERT(rebalanceStriding*numProcsA22<=numProcs);
432 if (rebalanceStriding*(numProcsAH+numProcsA22)>numProcs) {
433 GetOStream(
Warnings0) <<
"RefMaxwell::compute(): Disabling striding = " << rebalanceStriding <<
", since AH needs " << numProcsAH
434 <<
" procs and A22 needs " << numProcsA22 <<
" procs."<< std::endl;
435 rebalanceStriding = -1;
441 if (
parameterList_.get<
bool>(
"refmaxwell: fill communicator",
false)) {
443 temp = std::nearbyint(Teuchos::as<double>(numProcsAH * numProcs) / Teuchos::as<double>(numProcsAH+numProcsA22));
444 numProcsA22 = std::nearbyint(Teuchos::as<double>(numProcsA22 * numProcs) / Teuchos::as<double>(numProcsAH+numProcsA22));
446 if (numProcsAH+numProcsA22 == numProcs+1)
448 TEUCHOS_ASSERT(numProcsAH + numProcsA22 == numProcs);
451 if ((numProcsAH < 0) || (numProcsA22 < 0) || (numProcsAH + numProcsA22 > numProcs)) {
452 GetOStream(
Warnings0) <<
"RefMaxwell::compute(): Disabling rebalancing of subsolves, since partition heuristic resulted "
453 <<
"in undesirable number of partitions: " << numProcsAH <<
", " << numProcsA22 << std::endl;
454 doRebalancing =
false;
460 RCP<Teuchos::TimeMonitor> tm =
getTimer(
"MueLu RefMaxwell: Rebalance AH");
462 Level fineLevel, coarseLevel;
473 coarseLevel.
Set(
"number of partitions", numProcsAH);
474 coarseLevel.
Set(
"repartition: heuristic target rows per process", 1000);
476 coarseLevel.
setlib(
AH_->getDomainMap()->lib());
477 fineLevel.
setlib(
AH_->getDomainMap()->lib());
478 coarseLevel.setObjectLabel(
"RefMaxwell coarse (1,1)");
479 fineLevel.setObjectLabel(
"RefMaxwell coarse (1,1)");
481 std::string partName =
precList11_.get<std::string>(
"repartition: partitioner",
"zoltan2");
482 RCP<Factory> partitioner;
483 if (partName ==
"zoltan") {
484#ifdef HAVE_MUELU_ZOLTAN
491 }
else if (partName ==
"zoltan2") {
492#ifdef HAVE_MUELU_ZOLTAN2
494 ParameterList partParams;
495 RCP<const ParameterList> partpartParams = rcp(
new ParameterList(
precList11_.sublist(
"repartition: params",
false)));
496 partParams.set(
"ParameterList", partpartParams);
497 partitioner->SetParameterList(partParams);
505 ParameterList repartParams;
506 repartParams.set(
"repartition: print partition distribution",
precList11_.get<
bool>(
"repartition: print partition distribution",
false));
507 repartParams.set(
"repartition: remap parts",
precList11_.get<
bool>(
"repartition: remap parts",
true));
508 if (rebalanceStriding >= 1) {
509 bool acceptPart = (
SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
510 if (
SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsAH*rebalanceStriding)
512 repartParams.set(
"repartition: remap accept partition", acceptPart);
514 repartFactory->SetParameterList(repartParams);
516 repartFactory->SetFactory(
"Partition", partitioner);
519 ParameterList newPparams;
520 newPparams.set(
"type",
"Interpolation");
521 newPparams.set(
"repartition: rebalance P and R",
precList11_.get<
bool>(
"repartition: rebalance P and R",
false));
522 newPparams.set(
"repartition: use subcommunicators",
true);
523 newPparams.set(
"repartition: rebalance Nullspace", !
NullspaceH_.is_null());
527 newP->SetParameterList(newPparams);
528 newP->SetFactory(
"Importer", repartFactory);
531 ParameterList rebAcParams;
532 rebAcParams.set(
"repartition: use subcommunicators",
true);
533 newA->SetParameterList(rebAcParams);
534 newA->SetFactory(
"Importer", repartFactory);
536 coarseLevel.
Request(
"P", newP.get());
537 coarseLevel.
Request(
"Importer", repartFactory.get());
538 coarseLevel.
Request(
"A", newA.get());
539 coarseLevel.
Request(
"Coordinates", newP.get());
541 coarseLevel.
Request(
"Nullspace", newP.get());
542 repartFactory->Build(coarseLevel);
544 if (!
precList11_.get<
bool>(
"repartition: rebalance P and R",
false))
545 ImporterH_ = coarseLevel.
Get< RCP<const Import> >(
"Importer", repartFactory.get());
546 P11_ = coarseLevel.
Get< RCP<Matrix> >(
"P", newP.get());
547 AH_ = coarseLevel.
Get< RCP<Matrix> >(
"A", newA.get());
548 CoordsH_ = coarseLevel.
Get< RCP<RealValuedMultiVector> >(
"Coordinates", newP.get());
550 NullspaceH_ = coarseLevel.
Get< RCP<MultiVector> >(
"Nullspace", newP.get());
557 RCP<const Import> ImporterH = coarseLevel.
Get< RCP<const Import> >(
"Importer", repartFactory.get());
558 RCP<const Map> targetMap = ImporterH->getTargetMap();
559 ParameterList XpetraList;
560 XpetraList.set(
"Restrict Communicator",
true);
571 GetOStream(
Warnings0) <<
"RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
572 <<
"support BlockSize > 1 and drop tol != 0.0" << std::endl;
584 if (!
AH_.is_null()) {
587 AH_->SetFixedBlockSize(dim);
588 AH_->setObjectLabel(
"RefMaxwell coarse (1,1)");
592 RCP<ParameterList> params = rcp(
new ParameterList());;
593 params->set(
"printLoadBalancingInfo",
true);
594 params->set(
"printCommInfo",
true);
597#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
598 if (
precList11_.isType<std::string>(
"Preconditioner Type")) {
600 if (
precList11_.get<std::string>(
"Preconditioner Type") ==
"MueLu") {
601 ParameterList& userParamList =
precList11_.sublist(
"Preconditioner Types").sublist(
"MueLu").sublist(
"user data");
602 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates",
CoordsH_);
614 ParameterList& userParamList =
precList11_.sublist(
"user data");
615 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates",
CoordsH_);
617 userParamList.set<RCP<MultiVector> >(
"Nullspace",
NullspaceH_);
620 RCP<MueLu::Level> level0 =
HierarchyH_->GetLevel(0);
621 level0->Set(
"A",
AH_);
636 if (
D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
637 replaceWith= Teuchos::ScalarTraits<SC>::eps();
639 replaceWith = Teuchos::ScalarTraits<SC>::zero();
647 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
650 RCP<Teuchos::TimeMonitor> tm =
getTimer(
"MueLu RefMaxwell: Build A22");
652 Level fineLevel, coarseLevel;
664 coarseLevel.setObjectLabel(
"RefMaxwell (2,2)");
665 fineLevel.setObjectLabel(
"RefMaxwell (2,2)");
667 RCP<RAPFactory> rapFact = rcp(
new RAPFactory());
668 ParameterList rapList = *(rapFact->GetValidParameterList());
669 rapList.set(
"transpose: use implicit",
true);
670 rapList.set(
"rap: fix zero diagonals",
parameterList_.get<
bool>(
"rap: fix zero diagonals",
true));
671 rapList.set(
"rap: fix zero diagonals threshold",
parameterList_.get<
double>(
"rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
672 rapList.set(
"rap: triple product",
parameterList_.get<
bool>(
"rap: triple product",
false));
673 rapFact->SetParameterList(rapList);
676 coarseLevel.
AddKeepFlag(
"AP reuse data", rapFact.get());
677 coarseLevel.
Set<Teuchos::RCP<Teuchos::ParameterList> >(
"AP reuse data",
A22_AP_reuse_data_, rapFact.get());
680 coarseLevel.
AddKeepFlag(
"RAP reuse data", rapFact.get());
681 coarseLevel.
Set<Teuchos::RCP<Teuchos::ParameterList> >(
"RAP reuse data",
A22_RAP_reuse_data_, rapFact.get());
687 coarseLevel.
Set(
"number of partitions", numProcsA22);
688 coarseLevel.
Set(
"repartition: heuristic target rows per process", 1000);
690 std::string partName =
precList22_.get<std::string>(
"repartition: partitioner",
"zoltan2");
691 RCP<Factory> partitioner;
692 if (partName ==
"zoltan") {
693#ifdef HAVE_MUELU_ZOLTAN
695 partitioner->SetFactory(
"A", rapFact);
701 }
else if (partName ==
"zoltan2") {
702#ifdef HAVE_MUELU_ZOLTAN2
704 ParameterList partParams;
705 RCP<const ParameterList> partpartParams = rcp(
new ParameterList(
precList22_.sublist(
"repartition: params",
false)));
706 partParams.set(
"ParameterList", partpartParams);
707 partitioner->SetParameterList(partParams);
708 partitioner->SetFactory(
"A", rapFact);
716 ParameterList repartParams;
717 repartParams.set(
"repartition: print partition distribution",
precList22_.get<
bool>(
"repartition: print partition distribution",
false));
718 repartParams.set(
"repartition: remap parts",
precList22_.get<
bool>(
"repartition: remap parts",
true));
719 if (rebalanceStriding >= 1) {
720 bool acceptPart = ((
SM_Matrix_->getDomainMap()->getComm()->getSize()-1-
SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
721 if (
SM_Matrix_->getDomainMap()->getComm()->getSize()-1-
SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22*rebalanceStriding)
724 TEUCHOS_ASSERT(
AH_.is_null());
725 repartParams.set(
"repartition: remap accept partition", acceptPart);
727 repartParams.set(
"repartition: remap accept partition",
AH_.is_null());
728 repartFactory->SetParameterList(repartParams);
729 repartFactory->SetFactory(
"A", rapFact);
731 repartFactory->SetFactory(
"Partition", partitioner);
734 ParameterList newPparams;
735 newPparams.set(
"type",
"Interpolation");
736 newPparams.set(
"repartition: rebalance P and R",
precList22_.get<
bool>(
"repartition: rebalance P and R",
false));
737 newPparams.set(
"repartition: use subcommunicators",
true);
738 newPparams.set(
"repartition: rebalance Nullspace",
false);
740 newP->SetParameterList(newPparams);
741 newP->SetFactory(
"Importer", repartFactory);
744 ParameterList rebAcParams;
745 rebAcParams.set(
"repartition: use subcommunicators",
true);
746 newA->SetParameterList(rebAcParams);
747 newA->SetFactory(
"A", rapFact);
748 newA->SetFactory(
"Importer", repartFactory);
750 coarseLevel.
Request(
"P", newP.get());
751 coarseLevel.
Request(
"Importer", repartFactory.get());
752 coarseLevel.
Request(
"A", newA.get());
753 coarseLevel.
Request(
"Coordinates", newP.get());
754 rapFact->Build(fineLevel,coarseLevel);
755 repartFactory->Build(coarseLevel);
757 if (!
precList22_.get<
bool>(
"repartition: rebalance P and R",
false))
758 Importer22_ = coarseLevel.
Get< RCP<const Import> >(
"Importer", repartFactory.get());
759 D0_Matrix_ = coarseLevel.
Get< RCP<Matrix> >(
"P", newP.get());
760 A22_ = coarseLevel.
Get< RCP<Matrix> >(
"A", newA.get());
761 Coords_ = coarseLevel.
Get< RCP<RealValuedMultiVector> >(
"Coordinates", newP.get());
766 coarseLevel.
Request(
"A", rapFact.get());
768 coarseLevel.
Request(
"AP reuse data", rapFact.get());
769 coarseLevel.
Request(
"RAP reuse data", rapFact.get());
772 A22_ = coarseLevel.
Get< RCP<Matrix> >(
"A", rapFact.get());
775 if (coarseLevel.
IsAvailable(
"AP reuse data", rapFact.get()))
777 if (coarseLevel.
IsAvailable(
"RAP reuse data", rapFact.get()))
782 RCP<Teuchos::TimeMonitor> tm =
getTimer(
"MueLu RefMaxwell: Build A22");
785 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
SM_Matrix_,
false,*
D0_Matrix_,
false,temp,
GetOStream(
Runtime0),
true,
true);
787 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_T_Matrix_,
false,*temp,
false,
A22_,
GetOStream(
Runtime0),
true,
true);
789 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_Matrix_,
true,*temp,
false,
A22_,
GetOStream(
Runtime0),
true,
true);
792 RCP<const Import> D0importer = rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
795 RCP<Matrix> temp, temp2;
796 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
SM_Matrix_,
false,*
D0_Matrix_,
false,temp,
GetOStream(
Runtime0),
true,
true);
798 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_T_Matrix_,
false,*temp,
false,temp2,
GetOStream(
Runtime0),
true,
true);
800 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_Matrix_,
true,*temp,
false,temp2,
GetOStream(
Runtime0),
true,
true);
803 rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(
Importer22_->getTargetMap(), D0importer);
805 ParameterList XpetraList;
806 XpetraList.set(
"Restrict Communicator",
true);
807 XpetraList.set(
"Timer Label",
"MueLu::RebalanceA22");
808 RCP<const Map> targetMap =
Importer22_->getTargetMap();
818 if (!
A22_.is_null()) {
820 A22_->setObjectLabel(
"RefMaxwell (2,2)");
823 RCP<ParameterList> params = rcp(
new ParameterList());;
824 params->set(
"printLoadBalancingInfo",
true);
825 params->set(
"printCommInfo",
true);
828#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
829 if (
precList22_.isType<std::string>(
"Preconditioner Type")) {
831 if (
precList22_.get<std::string>(
"Preconditioner Type") ==
"MueLu") {
832 ParameterList& userParamList =
precList22_.sublist(
"Preconditioner Types").sublist(
"MueLu").sublist(
"user data");
833 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates",
Coords_);
841 ParameterList& userParamList =
precList22_.sublist(
"user data");
842 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates",
Coords_);
845 std::string coarseType =
"";
847 coarseType =
precList22_.get<std::string>(
"coarse: type");
849 std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
850 std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
854 coarseType ==
"Klu" ||
855 coarseType ==
"Klu2") &&
857 !
precList22_.sublist(
"coarse: params").isParameter(
"fix nullspace")))
858 precList22_.sublist(
"coarse: params").set(
"fix nullspace",
true);
862 level0->Set(
"A",
A22_);
877 if (
D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
878 replaceWith= Teuchos::ScalarTraits<SC>::eps();
880 replaceWith = Teuchos::ScalarTraits<SC>::zero();
890 RCP<const Import> ImporterP11 = ImportFactory::Build(
ImporterH_->getTargetMap(),
P11_->getColMap());
891 rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix()->replaceDomainMapAndImporter(
ImporterH_->getTargetMap(), ImporterP11);
899 RCP<const Import> ImporterD0 = ImportFactory::Build(
Importer22_->getTargetMap(),
D0_Matrix_->getColMap());
900 rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(
Importer22_->getTargetMap(), ImporterD0);
905 (!rcp_dynamic_cast<CrsMatrixWrap>(
D0_T_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
906 (!rcp_dynamic_cast<CrsMatrixWrap>(
R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
907 (
D0_T_Matrix_->getColMap()->lib() == Xpetra::UseTpetra) &&
908 (
R11_->getColMap()->lib() == Xpetra::UseTpetra))
913 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): D0_T and R11 have matching colMaps" << std::endl;
920 RCP<ParameterList> matvecParams = rcpFromRef(
parameterList_.sublist(
"matvec params"));
930 RCP<ParameterList> importerParams = rcpFromRef(
parameterList_.sublist(
"refmaxwell: ImporterH params"));
931 ImporterH_->setDistributorParameters(importerParams);
934 RCP<ParameterList> importerParams = rcpFromRef(
parameterList_.sublist(
"refmaxwell: Importer22 params"));
935 Importer22_->setDistributorParameters(importerParams);
941#ifdef HAVE_MUELU_CUDA
942 if (
parameterList_.get<
bool>(
"refmaxwell: cuda profile setup",
false)) cudaProfilerStop();
1159 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1160 const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1161 const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1163 size_t numLocalRows =
SM_Matrix_->getLocalNumRows();
1165 RCP<Matrix> P_nodal;
1166 RCP<CrsMatrix> P_nodal_imported;
1167 RCP<MultiVector> Nullspace_nodal;
1171 bool read_P_from_file =
parameterList_.get(
"refmaxwell: read_P_from_file",
false);
1172 if (read_P_from_file) {
1175 std::string P_filename =
parameterList_.get(
"refmaxwell: P_filename",std::string(
"P.m"));
1176 std::string domainmap_filename =
parameterList_.get(
"refmaxwell: P_domainmap_filename",std::string(
"domainmap_P.m"));
1177 std::string colmap_filename =
parameterList_.get(
"refmaxwell: P_colmap_filename",std::string(
"colmap_P.m"));
1178 std::string coords_filename =
parameterList_.get(
"refmaxwell: CoordsH",std::string(
"coordsH.m"));
1179 RCP<const Map> colmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(colmap_filename,
A_nodal_Matrix_->getDomainMap()->lib(),
A_nodal_Matrix_->getDomainMap()->getComm());
1180 RCP<const Map> domainmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(domainmap_filename,
A_nodal_Matrix_->getDomainMap()->lib(),
A_nodal_Matrix_->getDomainMap()->getComm());
1182 CoordsH_ = Xpetra::IO<typename RealValuedMultiVector::scalar_type, LO, GO, NO>::ReadMultiVector(coords_filename, domainmap);
1184 Level fineLevel, coarseLevel;
1192 fineLevel.
Set(
"DofsPerNode",1);
1195 coarseLevel.setObjectLabel(
"RefMaxwell (1,1) A_nodal");
1196 fineLevel.setObjectLabel(
"RefMaxwell (1,1) A_nodal");
1199 RCP<MultiVector> nullSpace = MultiVectorFactory::Build(
A_nodal_Matrix_->getRowMap(),NSdim);
1200 nullSpace->putScalar(SC_ONE);
1201 fineLevel.
Set(
"Nullspace",nullSpace);
1203 RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1210 if (
parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa")
1220 if (
parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa")
1224 dropFact->SetFactory(
"UnAmalgamationInfo", amalgFact);
1225 double dropTol =
parameterList_.get(
"aggregation: drop tol",0.0);
1226 std::string dropScheme =
parameterList_.get(
"aggregation: drop scheme",
"classical");
1227 std::string distLaplAlgo =
parameterList_.get(
"aggregation: distance laplacian algo",
"default");
1228 dropFact->SetParameter(
"aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1229 dropFact->SetParameter(
"aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme));
1231 dropFact->SetParameter(
"aggregation: distance laplacian algo",Teuchos::ParameterEntry(distLaplAlgo));
1233 UncoupledAggFact->SetFactory(
"Graph", dropFact);
1234 int minAggSize =
parameterList_.get(
"aggregation: min agg size",2);
1235 UncoupledAggFact->SetParameter(
"aggregation: min agg size",Teuchos::ParameterEntry(minAggSize));
1236 int maxAggSize =
parameterList_.get(
"aggregation: max agg size",-1);
1237 UncoupledAggFact->SetParameter(
"aggregation: max agg size",Teuchos::ParameterEntry(maxAggSize));
1239 UncoupledAggFact->SetParameter(
"aggregation: match ML phase2a",Teuchos::ParameterEntry(matchMLbehavior));
1240 bool avoidSingletons =
parameterList_.get(
"aggregation: phase3 avoid singletons",
true);
1241 UncoupledAggFact->SetParameter(
"aggregation: phase3 avoid singletons",Teuchos::ParameterEntry(avoidSingletons));
1243 coarseMapFact->SetFactory(
"Aggregates", UncoupledAggFact);
1245 TentativePFact->SetFactory(
"Aggregates", UncoupledAggFact);
1246 TentativePFact->SetFactory(
"UnAmalgamationInfo", amalgFact);
1247 TentativePFact->SetFactory(
"CoarseMap", coarseMapFact);
1249 Tfact->SetFactory(
"Aggregates", UncoupledAggFact);
1250 Tfact->SetFactory(
"CoarseMap", coarseMapFact);
1252 if (
parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa") {
1253 SaPFact->SetFactory(
"P", TentativePFact);
1254 coarseLevel.
Request(
"P", SaPFact.get());
1256 coarseLevel.
Request(
"P",TentativePFact.get());
1257 coarseLevel.
Request(
"Nullspace",TentativePFact.get());
1258 coarseLevel.
Request(
"Coordinates",Tfact.get());
1260 RCP<AggregationExportFactory> aggExport;
1261 if (
parameterList_.get(
"aggregation: export visualization data",
false)) {
1263 ParameterList aggExportParams;
1264 aggExportParams.set(
"aggregation: output filename",
"aggs.vtk");
1265 aggExportParams.set(
"aggregation: output file: agg style",
"Jacks");
1266 aggExport->SetParameterList(aggExportParams);
1268 aggExport->SetFactory(
"Aggregates", UncoupledAggFact);
1269 aggExport->SetFactory(
"UnAmalgamationInfo", amalgFact);
1270 fineLevel.
Request(
"Aggregates",UncoupledAggFact.get());
1271 fineLevel.
Request(
"UnAmalgamationInfo",amalgFact.get());
1274 if (
parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa")
1275 coarseLevel.
Get(
"P",P_nodal,SaPFact.get());
1277 coarseLevel.
Get(
"P",P_nodal,TentativePFact.get());
1278 coarseLevel.
Get(
"Nullspace",Nullspace_nodal,TentativePFact.get());
1279 coarseLevel.
Get(
"Coordinates",
CoordsH_,Tfact.get());
1282 if (
parameterList_.get(
"aggregation: export visualization data",
false))
1283 aggExport->Build(fineLevel, coarseLevel);
1285 dump(*P_nodal,
"P_nodal.m");
1286 dump(*Nullspace_nodal,
"Nullspace_nodal.m");
1289 RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix();
1292 int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1294 RCP<CrsMatrixWrap> P_nodal_temp;
1295 RCP<const Map> targetMap = D0Crs->getColMap();
1296 P_nodal_temp = rcp(
new CrsMatrixWrap(targetMap));
1297 RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1298 P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1299 P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->
getDomainMap(),
1300 rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->
getRangeMap());
1301 P_nodal_imported = P_nodal_temp->getCrsMatrix();
1302 dump(*P_nodal_temp,
"P_nodal_imported.m");
1304 P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1310 using ATS = Kokkos::ArithTraits<SC>;
1311 using impl_Scalar =
typename ATS::val_type;
1312 using impl_ATS = Kokkos::ArithTraits<impl_Scalar>;
1313 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1315 typedef typename Matrix::local_matrix_type KCRS;
1316 typedef typename KCRS::StaticCrsGraphType graph_t;
1317 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1318 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1319 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1321 const impl_Scalar impl_SC_ZERO = impl_ATS::zero();
1322 const impl_Scalar impl_SC_ONE = impl_ATS::one();
1323 const impl_Scalar impl_half = impl_SC_ONE / (impl_SC_ONE + impl_SC_ONE);
1329 std::string defaultAlgo =
"mat-mat";
1330 std::string algo =
parameterList_.get(
"refmaxwell: prolongator compute algorithm",defaultAlgo);
1335 if (algo ==
"mat-mat") {
1336 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(
SM_Matrix_->getRowMap());
1337 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_Matrix_,
false,*P_nodal,
false,*D0_P_nodal,
true,
true);
1339#ifdef HAVE_MUELU_DEBUG
1340 TEUCHOS_ASSERT(D0_P_nodal->getColMap()->isSameAs(*P_nodal_imported->getColMap()));
1344 auto localD0P = D0_P_nodal->getLocalMatrixDevice();
1347 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1348 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1350 size_t nnzEstimate = dim*localD0P.graph.entries.size();
1351 lno_view_t P11rowptr(
"P11_rowptr", numLocalRows+1);
1352 lno_nnz_view_t P11colind(
"P11_colind",nnzEstimate);
1353 scalar_view_t P11vals(
"P11_vals",nnzEstimate);
1356 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1357 KOKKOS_LAMBDA(
const size_t i) {
1358 P11rowptr(i) = dim*localD0P.graph.row_map(i);
1362 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1363 KOKKOS_LAMBDA(
const size_t jj) {
1364 for (
size_t k = 0; k < dim; k++) {
1365 P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1366 P11vals(dim*jj+k) = impl_SC_ZERO;
1370 auto localNullspace =
Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1373 if (
D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1377 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1379 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1381 auto localD0 =
D0_Matrix_->getLocalMatrixDevice();
1382 auto localP = P_nodal_imported->getLocalMatrixDevice();
1383 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1384 KOKKOS_LAMBDA(
const size_t i) {
1385 for (
size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1386 LO l = localD0.graph.entries(ll);
1387 impl_Scalar p = localD0.values(ll);
1388 if (impl_ATS::magnitude(p) < tol)
1390 for (
size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1391 LO j = localP.graph.entries(jj);
1392 impl_Scalar v = localP.values(jj);
1393 for (
size_t k = 0; k < dim; k++) {
1395 impl_Scalar n = localNullspace(i,k);
1397 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1398 if (P11colind(m) == jNew)
1400#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP) && !defined(HAVE_MUELU_SYCL)
1401 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1403 P11vals(m) += impl_half * v * n;
1410 auto localD0 =
D0_Matrix_->getLocalMatrixDevice();
1411 auto localP = P_nodal_imported->getLocalMatrixDevice();
1412 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1413 KOKKOS_LAMBDA(
const size_t i) {
1414 for (
size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1415 LO l = localD0.graph.entries(ll);
1416 for (
size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1417 LO j = localP.graph.entries(jj);
1418 impl_Scalar v = localP.values(jj);
1419 for (
size_t k = 0; k < dim; k++) {
1421 impl_Scalar n = localNullspace(i,k);
1423 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1424 if (P11colind(m) == jNew)
1426#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP) && !defined(HAVE_MUELU_SYCL)
1427 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1429 P11vals(m) += impl_half * v * n;
1436 P11_ = rcp(
new CrsMatrixWrap(
SM_Matrix_->getRowMap(), blockColMap, 0));
1437 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix();
1438 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1439 P11Crs->expertStaticFillComplete(blockDomainMap,
SM_Matrix_->getRangeMap());
1442 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1446 auto localNullspace_nodal = Nullspace_nodal->getDeviceLocalView(Xpetra::Access::ReadOnly);
1447 auto localNullspaceH =
NullspaceH_->getDeviceLocalView(Xpetra::Access::ReadWrite);
1448 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_nullspace", range_type(0,Nullspace_nodal->getLocalLength()),
1449 KOKKOS_LAMBDA(
const size_t i) {
1450 impl_Scalar val = localNullspace_nodal(i,0);
1451 for (
size_t j = 0; j < dim; j++)
1452 localNullspaceH(dim*i+j, j) = val;
1457 auto localD0 =
D0_Matrix_->getLocalMatrixDevice();
1461 if (algo ==
"mat-mat") {
1464 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(
D0_Matrix_->getColMap(), dim);
1465 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(
D0_Matrix_->getDomainMap(), dim);
1467 size_t nnzEstimate = dim*localD0.graph.entries.size();
1468 lno_view_t P11rowptr(
"P11_rowptr", numLocalRows+1);
1469 lno_nnz_view_t P11colind(
"P11_colind",nnzEstimate);
1470 scalar_view_t P11vals(
"P11_vals",nnzEstimate);
1473 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1474 KOKKOS_LAMBDA(
const size_t i) {
1475 P11rowptr(i) = dim*localD0.graph.row_map(i);
1479 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0.graph.entries.size()),
1480 KOKKOS_LAMBDA(
const size_t jj) {
1481 for (
size_t k = 0; k < dim; k++) {
1482 P11colind(dim*jj+k) = dim*localD0.graph.entries(jj)+k;
1483 P11vals(dim*jj+k) = impl_SC_ZERO;
1487 auto localNullspace =
Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1490 if (
D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1494 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1496 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1498 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1499 KOKKOS_LAMBDA(
const size_t i) {
1500 for (
size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1501 LO j = localD0.graph.entries(jj);
1502 impl_Scalar p = localD0.values(jj);
1503 if (impl_ATS::magnitude(p) < tol)
1505 for (
size_t k = 0; k < dim; k++) {
1507 impl_Scalar n = localNullspace(i,k);
1509 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1510 if (P11colind(m) == jNew)
1512#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP) && !defined(HAVE_MUELU_SYCL)
1513 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1515 P11vals(m) += impl_half * n;
1521 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1522 KOKKOS_LAMBDA(
const size_t i) {
1523 for (
size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1524 LO j = localD0.graph.entries(jj);
1525 for (
size_t k = 0; k < dim; k++) {
1527 impl_Scalar n = localNullspace(i,k);
1529 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1530 if (P11colind(m) == jNew)
1532#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP) && !defined(HAVE_MUELU_SYCL)
1533 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1535 P11vals(m) += impl_half * n;
1541 P11_ = rcp(
new CrsMatrixWrap(
SM_Matrix_->getRowMap(), blockColMap, 0));
1542 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix();
1543 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1544 P11Crs->expertStaticFillComplete(blockDomainMap,
SM_Matrix_->getRangeMap());
1546 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1552 ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1553 ArrayRCP<ArrayView<const SC> > nullspace(dim);
1554 for(
size_t i=0; i<dim; i++) {
1556 nullspace[i] = nullspaceRCP[i]();
1560 ArrayRCP<size_t> P11rowptr_RCP;
1561 ArrayRCP<LO> P11colind_RCP;
1562 ArrayRCP<SC> P11vals_RCP;
1571 std::string defaultAlgo =
"mat-mat";
1572 std::string algo =
parameterList_.get(
"refmaxwell: prolongator compute algorithm",defaultAlgo);
1576 if (algo ==
"mat-mat") {
1577 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(
SM_Matrix_->getRowMap());
1578 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_Matrix_,
false,*P_nodal,
false,*D0_P_nodal,
true,
true);
1581 ArrayRCP<const size_t> D0rowptr_RCP;
1582 ArrayRCP<const LO> D0colind_RCP;
1583 ArrayRCP<const SC> D0vals_RCP;
1584 rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1588 ArrayView<const size_t> D0rowptr;
1589 ArrayView<const LO> D0colind;
1590 ArrayView<const SC> D0vals;
1591 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1594 ArrayRCP<const size_t> Prowptr_RCP;
1595 ArrayRCP<const LO> Pcolind_RCP;
1596 ArrayRCP<const SC> Pvals_RCP;
1597 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1598 ArrayView<const size_t> Prowptr;
1599 ArrayView<const LO> Pcolind;
1600 ArrayView<const SC> Pvals;
1601 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1604 ArrayRCP<const size_t> D0Prowptr_RCP;
1605 ArrayRCP<const LO> D0Pcolind_RCP;
1606 ArrayRCP<const SC> D0Pvals_RCP;
1607 rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1612 ArrayView<const size_t> D0Prowptr;
1613 ArrayView<const LO> D0Pcolind;
1614 D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1617 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1618 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1619 P11_ = rcp(
new CrsMatrixWrap(
SM_Matrix_->getRowMap(), blockColMap, 0));
1620 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix();
1621 size_t nnzEstimate = dim*D0Prowptr[numLocalRows];
1622 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1624 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1625 ArrayView<LO> P11colind = P11colind_RCP();
1626 ArrayView<SC> P11vals = P11vals_RCP();
1629 for (
size_t i = 0; i < numLocalRows+1; i++) {
1630 P11rowptr[i] = dim*D0Prowptr[i];
1634 for (
size_t jj = 0; jj < (size_t) D0Prowptr[numLocalRows]; jj++)
1635 for (
size_t k = 0; k < dim; k++) {
1636 P11colind[dim*jj+k] = dim*D0Pcolind[jj]+k;
1637 P11vals[dim*jj+k] = SC_ZERO;
1640 RCP<const Map> P_nodal_imported_colmap = P_nodal_imported->getColMap();
1641 RCP<const Map> D0_P_nodal_colmap = D0_P_nodal->getColMap();
1643 if (
D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1647 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1649 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1650 for (
size_t i = 0; i < numLocalRows; i++) {
1651 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1652 LO l = D0colind[ll];
1654 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1656 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1658 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1660 for (
size_t k = 0; k < dim; k++) {
1662 SC n = nullspace[k][i];
1664 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1665 if (P11colind[m] == jNew)
1667#ifdef HAVE_MUELU_DEBUG
1668 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1670 P11vals[m] += half * v * n;
1677 for (
size_t i = 0; i < numLocalRows; i++) {
1678 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1679 LO l = D0colind[ll];
1680 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1682 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1684 for (
size_t k = 0; k < dim; k++) {
1686 SC n = nullspace[k][i];
1688 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1689 if (P11colind[m] == jNew)
1691#ifdef HAVE_MUELU_DEBUG
1692 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1694 P11vals[m] += half * v * n;
1701 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1702 P11Crs->expertStaticFillComplete(blockDomainMap,
SM_Matrix_->getRangeMap());
1704 }
else if (algo ==
"gustavson") {
1705 ArrayRCP<const size_t> D0rowptr_RCP;
1706 ArrayRCP<const LO> D0colind_RCP;
1707 ArrayRCP<const SC> D0vals_RCP;
1708 rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1712 ArrayView<const size_t> D0rowptr;
1713 ArrayView<const LO> D0colind;
1714 ArrayView<const SC> D0vals;
1715 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1718 ArrayRCP<const size_t> Prowptr_RCP;
1719 ArrayRCP<const LO> Pcolind_RCP;
1720 ArrayRCP<const SC> Pvals_RCP;
1721 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1722 ArrayView<const size_t> Prowptr;
1723 ArrayView<const LO> Pcolind;
1724 ArrayView<const SC> Pvals;
1725 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1727 LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1728 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1729 Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1731 size_t nnz_alloc = dim*D0vals_RCP.size();
1734 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1735 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1736 P11_ = rcp(
new CrsMatrixWrap(
SM_Matrix_->getRowMap(), blockColMap, 0));
1737 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix();
1738 P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1740 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1741 ArrayView<LO> P11colind = P11colind_RCP();
1742 ArrayView<SC> P11vals = P11vals_RCP();
1745 if (
D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1749 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1751 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1754 for (
size_t i = 0; i < numLocalRows; i++) {
1756 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1757 LO l = D0colind[ll];
1759 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1761 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1764 for (
size_t k = 0; k < dim; k++) {
1766 SC n = nullspace[k][i];
1768 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1769 P11_status[jNew] = nnz;
1770 P11colind[nnz] = jNew;
1771 P11vals[nnz] = half * v * n;
1776 P11vals[P11_status[jNew]] += half * v * n;
1785 P11rowptr[numLocalRows] = nnz;
1789 for (
size_t i = 0; i < numLocalRows; i++) {
1791 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1792 LO l = D0colind[ll];
1793 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1796 for (
size_t k = 0; k < dim; k++) {
1798 SC n = nullspace[k][i];
1800 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1801 P11_status[jNew] = nnz;
1802 P11colind[nnz] = jNew;
1803 P11vals[nnz] = half * v * n;
1808 P11vals[P11_status[jNew]] += half * v * n;
1817 P11rowptr[numLocalRows] = nnz;
1820 if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1824 P11vals_RCP.resize(nnz);
1825 P11colind_RCP.resize(nnz);
1828 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1829 P11Crs->expertStaticFillComplete(blockDomainMap,
SM_Matrix_->getRangeMap());
1831 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1835 ArrayRCP<const Scalar> ns_rcp = Nullspace_nodal->getData(0);
1836 ArrayView<const Scalar> ns_view = ns_rcp();
1837 for (
size_t i = 0; i < Nullspace_nodal->getLocalLength(); i++) {
1839 for (
size_t j = 0; j < dim; j++)
1845 ArrayRCP<const size_t> D0rowptr_RCP;
1846 ArrayRCP<const LO> D0colind_RCP;
1847 ArrayRCP<const SC> D0vals_RCP;
1848 rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1852 ArrayView<const size_t> D0rowptr;
1853 ArrayView<const LO> D0colind;
1854 ArrayView<const SC> D0vals;
1855 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1858 if (algo ==
"mat-mat") {
1861 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(
D0_Matrix_->getColMap(), dim);
1862 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(
D0_Matrix_->getDomainMap(), dim);
1863 P11_ = rcp(
new CrsMatrixWrap(
SM_Matrix_->getRowMap(), blockColMap, 0));
1864 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix();
1865 size_t nnzEstimate = dim*D0rowptr[numLocalRows];
1866 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1868 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1869 ArrayView<LO> P11colind = P11colind_RCP();
1870 ArrayView<SC> P11vals = P11vals_RCP();
1873 for (
size_t i = 0; i < numLocalRows+1; i++) {
1874 P11rowptr[i] = dim*D0rowptr[i];
1878 for (
size_t jj = 0; jj < (size_t) D0rowptr[numLocalRows]; jj++)
1879 for (
size_t k = 0; k < dim; k++) {
1880 P11colind[dim*jj+k] = dim*D0colind[jj]+k;
1881 P11vals[dim*jj+k] = SC_ZERO;
1885 if (
D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1889 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1891 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1892 for (
size_t i = 0; i < numLocalRows; i++) {
1893 for (
size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
1894 LO j = D0colind[jj];
1896 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1898 for (
size_t k = 0; k < dim; k++) {
1900 SC n = nullspace[k][i];
1902 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1903 if (P11colind[m] == jNew)
1905#ifdef HAVE_MUELU_DEBUG
1906 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1908 P11vals[m] += half * n;
1914 for (
size_t i = 0; i < numLocalRows; i++) {
1915 for (
size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
1916 LO j = D0colind[jj];
1918 for (
size_t k = 0; k < dim; k++) {
1920 SC n = nullspace[k][i];
1922 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1923 if (P11colind[m] == jNew)
1925#ifdef HAVE_MUELU_DEBUG
1926 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1928 P11vals[m] += half * n;
1934 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1935 P11Crs->expertStaticFillComplete(blockDomainMap,
SM_Matrix_->getRangeMap());
1938 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");