230#ifdef HAVE_MUELU_CUDA
231 if (
parameterList_.get<
bool>(
"refmaxwell: cuda profile setup",
false)) cudaProfilerStart();
234 std::string timerLabel;
236 timerLabel =
"MueLu RefMaxwell: compute (reuse)";
238 timerLabel =
"MueLu RefMaxwell: compute";
239 RCP<Teuchos::TimeMonitor> tmCompute =
getTimer(timerLabel);
246 RCP<ParameterList> params = rcp(
new ParameterList());;
247 params->set(
"printLoadBalancingInfo",
true);
248 params->set(
"printCommInfo",
true);
282 RCP<MultiVector> CoordsSC;
295 ArrayRCP<ArrayRCP<const Scalar> > localNullspace(
Nullspace_->getNumVectors());
296 for (
size_t i = 0; i <
Nullspace_->getNumVectors(); i++)
298 coordinateType localMinLen = Teuchos::ScalarTraits<coordinateType>::rmax();
299 coordinateType localMeanLen = Teuchos::ScalarTraits<coordinateType>::zero();
300 coordinateType localMaxLen = Teuchos::ScalarTraits<coordinateType>::zero();
301 for (
size_t j=0; j <
Nullspace_->getMap()->getLocalNumElements(); j++) {
302 Scalar lenSC = Teuchos::ScalarTraits<Scalar>::zero();
303 for (
size_t i=0; i <
Nullspace_->getNumVectors(); i++)
304 lenSC += localNullspace[i][j]*localNullspace[i][j];
305 coordinateType len = Teuchos::as<coordinateType>(Teuchos::ScalarTraits<Scalar>::real(Teuchos::ScalarTraits<Scalar>::squareroot(lenSC)));
306 localMinLen = std::min(localMinLen, len);
307 localMaxLen = std::max(localMaxLen, len);
311 RCP<const Teuchos::Comm<int> > comm =
Nullspace_->getMap()->getComm();
315 meanLen /=
Nullspace_->getMap()->getGlobalNumElements();
319 GetOStream(
Statistics0) <<
"Edge length (min/mean/max): " << minLen <<
" / " << meanLen <<
" / " << maxLen << std::endl;
326 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
328 Array<Scalar> normsSC(
Coords_->getNumVectors(), one / Teuchos::as<Scalar>(meanLen));
333 GetOStream(
Errors) <<
"MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
351 std::string label(
"D0*Ms*D0");
365 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): building special prolongator" << std::endl;
370 bool doRebalancing =
false;
372 int rebalanceStriding =
parameterList_.get<
int>(
"refmaxwell: subsolves striding", -1);
373 int numProcsAH, numProcsA22;
374 int numProcs =
SM_Matrix_->getDomainMap()->getComm()->getSize();
376 doRebalancing =
false;
395 ParameterList repartheurParams;
396 repartheurParams.set(
"repartition: start level", 0);
398 int defaultTargetRows = 10000;
399 repartheurParams.set(
"repartition: min rows per proc",
precList11_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
400 repartheurParams.set(
"repartition: target rows per proc",
precList11_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
401 repartheurParams.set(
"repartition: min rows per thread",
precList11_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
402 repartheurParams.set(
"repartition: target rows per thread",
precList11_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
403 repartheurParams.set(
"repartition: max imbalance",
precList11_.get<
double>(
"repartition: max imbalance", 1.1));
404 repartheurFactory->SetParameterList(repartheurParams);
406 level.
Request(
"number of partitions", repartheurFactory.get());
407 repartheurFactory->Build(level);
408 numProcsAH = level.
Get<
int>(
"number of partitions", repartheurFactory.get());
409 numProcsAH = std::min(numProcsAH,numProcs);
422 ParameterList repartheurParams;
423 repartheurParams.set(
"repartition: start level", 0);
424 repartheurParams.set(
"repartition: use map",
true);
426 int defaultTargetRows = 10000;
427 repartheurParams.set(
"repartition: min rows per proc",
precList22_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
428 repartheurParams.set(
"repartition: target rows per proc",
precList22_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
429 repartheurParams.set(
"repartition: min rows per thread",
precList22_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
430 repartheurParams.set(
"repartition: target rows per thread",
precList22_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
432 repartheurFactory->SetParameterList(repartheurParams);
434 level.
Request(
"number of partitions", repartheurFactory.get());
435 repartheurFactory->Build(level);
436 numProcsA22 = level.
Get<
int>(
"number of partitions", repartheurFactory.get());
437 numProcsA22 = std::min(numProcsA22,numProcs);
440 if (rebalanceStriding >= 1) {
441 TEUCHOS_ASSERT(rebalanceStriding*numProcsAH<=numProcs);
442 TEUCHOS_ASSERT(rebalanceStriding*numProcsA22<=numProcs);
443 if (rebalanceStriding*(numProcsAH+numProcsA22)>numProcs) {
444 GetOStream(
Warnings0) <<
"RefMaxwell::compute(): Disabling striding = " << rebalanceStriding <<
", since AH needs " << numProcsAH
445 <<
" procs and A22 needs " << numProcsA22 <<
" procs."<< std::endl;
446 rebalanceStriding = -1;
452 if ((numProcsAH < 0) || (numProcsA22 < 0) || (numProcsAH + numProcsA22 > numProcs)) {
453 GetOStream(
Warnings0) <<
"RefMaxwell::compute(): Disabling rebalancing of subsolves, since partition heuristic resulted "
454 <<
"in undesirable number of partitions: " << numProcsAH <<
", " << numProcsA22 << std::endl;
455 doRebalancing =
false;
461 RCP<Teuchos::TimeMonitor> tm =
getTimer(
"MueLu RefMaxwell: Rebalance AH");
463 Level fineLevel, coarseLevel;
474 coarseLevel.
Set(
"number of partitions", numProcsAH);
475 coarseLevel.
Set(
"repartition: heuristic target rows per process", 1000);
477 coarseLevel.
setlib(
AH_->getDomainMap()->lib());
478 fineLevel.
setlib(
AH_->getDomainMap()->lib());
479 coarseLevel.setObjectLabel(
"RefMaxwell coarse (1,1)");
480 fineLevel.setObjectLabel(
"RefMaxwell coarse (1,1)");
482 std::string partName =
precList11_.get<std::string>(
"repartition: partitioner",
"zoltan2");
483 RCP<Factory> partitioner;
484 if (partName ==
"zoltan") {
485#ifdef HAVE_MUELU_ZOLTAN
492 }
else if (partName ==
"zoltan2") {
493#ifdef HAVE_MUELU_ZOLTAN2
495 ParameterList partParams;
496 RCP<const ParameterList> partpartParams = rcp(
new ParameterList(
precList11_.sublist(
"repartition: params",
false)));
497 partParams.set(
"ParameterList", partpartParams);
498 partitioner->SetParameterList(partParams);
506 ParameterList repartParams;
507 repartParams.set(
"repartition: print partition distribution",
precList11_.get<
bool>(
"repartition: print partition distribution",
false));
508 repartParams.set(
"repartition: remap parts",
precList11_.get<
bool>(
"repartition: remap parts",
true));
509 if (rebalanceStriding >= 1) {
510 bool acceptPart = (
SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
511 if (
SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsAH*rebalanceStriding)
513 repartParams.set(
"repartition: remap accept partition", acceptPart);
515 repartFactory->SetParameterList(repartParams);
517 repartFactory->SetFactory(
"Partition", partitioner);
520 ParameterList newPparams;
521 newPparams.set(
"type",
"Interpolation");
522 newPparams.set(
"repartition: rebalance P and R",
precList11_.get<
bool>(
"repartition: rebalance P and R",
false));
523 newPparams.set(
"repartition: use subcommunicators",
true);
524 newPparams.set(
"repartition: rebalance Nullspace", !
NullspaceH_.is_null());
528 newP->SetParameterList(newPparams);
529 newP->SetFactory(
"Importer", repartFactory);
532 ParameterList rebAcParams;
533 rebAcParams.set(
"repartition: use subcommunicators",
true);
534 newA->SetParameterList(rebAcParams);
535 newA->SetFactory(
"Importer", repartFactory);
537 coarseLevel.
Request(
"P", newP.get());
538 coarseLevel.
Request(
"Importer", repartFactory.get());
539 coarseLevel.
Request(
"A", newA.get());
540 coarseLevel.
Request(
"Coordinates", newP.get());
542 coarseLevel.
Request(
"Nullspace", newP.get());
543 repartFactory->Build(coarseLevel);
545 if (!
precList11_.get<
bool>(
"repartition: rebalance P and R",
false))
546 ImporterH_ = coarseLevel.
Get< RCP<const Import> >(
"Importer", repartFactory.get());
547 P11_ = coarseLevel.
Get< RCP<Matrix> >(
"P", newP.get());
548 AH_ = coarseLevel.
Get< RCP<Matrix> >(
"A", newA.get());
549 CoordsH_ = coarseLevel.
Get< RCP<RealValuedMultiVector> >(
"Coordinates", newP.get());
551 NullspaceH_ = coarseLevel.
Get< RCP<MultiVector> >(
"Nullspace", newP.get());
558 RCP<const Import> ImporterH = coarseLevel.
Get< RCP<const Import> >(
"Importer", repartFactory.get());
559 RCP<const Map> targetMap = ImporterH->getTargetMap();
560 ParameterList XpetraList;
561 XpetraList.set(
"Restrict Communicator",
true);
572 GetOStream(
Warnings0) <<
"RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
573 <<
"support BlockSize > 1 and drop tol != 0.0" << std::endl;
588 if (!
AH_.is_null()) {
591 AH_->SetFixedBlockSize(dim);
592 AH_->setObjectLabel(
"RefMaxwell coarse (1,1)");
596 RCP<ParameterList> params = rcp(
new ParameterList());;
597 params->set(
"printLoadBalancingInfo",
true);
598 params->set(
"printCommInfo",
true);
601#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
602 if (
precList11_.isType<std::string>(
"Preconditioner Type")) {
604 if (
precList11_.get<std::string>(
"Preconditioner Type") ==
"MueLu") {
605 ParameterList& userParamList =
precList11_.sublist(
"Preconditioner Types").sublist(
"MueLu").sublist(
"user data");
606 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates",
CoordsH_);
618 ParameterList& userParamList =
precList11_.sublist(
"user data");
619 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates",
CoordsH_);
621 userParamList.set<RCP<MultiVector> >(
"Nullspace",
NullspaceH_);
624 RCP<MueLu::Level> level0 =
HierarchyH_->GetLevel(0);
625 level0->Set(
"A",
AH_);
640 if (
D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
641 replaceWith= Teuchos::ScalarTraits<SC>::eps();
643 replaceWith = Teuchos::ScalarTraits<SC>::zero();
655 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
658 RCP<Teuchos::TimeMonitor> tm =
getTimer(
"MueLu RefMaxwell: Build A22");
660 Level fineLevel, coarseLevel;
672 coarseLevel.setObjectLabel(
"RefMaxwell (2,2)");
673 fineLevel.setObjectLabel(
"RefMaxwell (2,2)");
675 RCP<RAPFactory> rapFact = rcp(
new RAPFactory());
676 ParameterList rapList = *(rapFact->GetValidParameterList());
677 rapList.set(
"transpose: use implicit",
true);
678 rapList.set(
"rap: fix zero diagonals",
parameterList_.get<
bool>(
"rap: fix zero diagonals",
true));
679 rapList.set(
"rap: fix zero diagonals threshold",
parameterList_.get<
double>(
"rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
680 rapList.set(
"rap: triple product",
parameterList_.get<
bool>(
"rap: triple product",
false));
681 rapFact->SetParameterList(rapList);
684 coarseLevel.
AddKeepFlag(
"AP reuse data", rapFact.get());
685 coarseLevel.
Set<Teuchos::RCP<Teuchos::ParameterList> >(
"AP reuse data",
A22_AP_reuse_data_, rapFact.get());
688 coarseLevel.
AddKeepFlag(
"RAP reuse data", rapFact.get());
689 coarseLevel.
Set<Teuchos::RCP<Teuchos::ParameterList> >(
"RAP reuse data",
A22_RAP_reuse_data_, rapFact.get());
695 coarseLevel.
Set(
"number of partitions", numProcsA22);
696 coarseLevel.
Set(
"repartition: heuristic target rows per process", 1000);
698 std::string partName =
precList22_.get<std::string>(
"repartition: partitioner",
"zoltan2");
699 RCP<Factory> partitioner;
700 if (partName ==
"zoltan") {
701#ifdef HAVE_MUELU_ZOLTAN
703 partitioner->SetFactory(
"A", rapFact);
709 }
else if (partName ==
"zoltan2") {
710#ifdef HAVE_MUELU_ZOLTAN2
712 ParameterList partParams;
713 RCP<const ParameterList> partpartParams = rcp(
new ParameterList(
precList22_.sublist(
"repartition: params",
false)));
714 partParams.set(
"ParameterList", partpartParams);
715 partitioner->SetParameterList(partParams);
716 partitioner->SetFactory(
"A", rapFact);
724 ParameterList repartParams;
725 repartParams.set(
"repartition: print partition distribution",
precList22_.get<
bool>(
"repartition: print partition distribution",
false));
726 repartParams.set(
"repartition: remap parts",
precList22_.get<
bool>(
"repartition: remap parts",
true));
727 if (rebalanceStriding >= 1) {
728 bool acceptPart = ((
SM_Matrix_->getDomainMap()->getComm()->getSize()-1-
SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
729 if (
SM_Matrix_->getDomainMap()->getComm()->getSize()-1-
SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22*rebalanceStriding)
732 TEUCHOS_ASSERT(
AH_.is_null());
733 repartParams.set(
"repartition: remap accept partition", acceptPart);
735 repartParams.set(
"repartition: remap accept partition",
AH_.is_null());
736 repartFactory->SetParameterList(repartParams);
737 repartFactory->SetFactory(
"A", rapFact);
739 repartFactory->SetFactory(
"Partition", partitioner);
742 ParameterList newPparams;
743 newPparams.set(
"type",
"Interpolation");
744 newPparams.set(
"repartition: rebalance P and R",
precList22_.get<
bool>(
"repartition: rebalance P and R",
false));
745 newPparams.set(
"repartition: use subcommunicators",
true);
746 newPparams.set(
"repartition: rebalance Nullspace",
false);
748 newP->SetParameterList(newPparams);
749 newP->SetFactory(
"Importer", repartFactory);
752 ParameterList rebAcParams;
753 rebAcParams.set(
"repartition: use subcommunicators",
true);
754 newA->SetParameterList(rebAcParams);
755 newA->SetFactory(
"A", rapFact);
756 newA->SetFactory(
"Importer", repartFactory);
758 coarseLevel.
Request(
"P", newP.get());
759 coarseLevel.
Request(
"Importer", repartFactory.get());
760 coarseLevel.
Request(
"A", newA.get());
761 coarseLevel.
Request(
"Coordinates", newP.get());
762 rapFact->Build(fineLevel,coarseLevel);
763 repartFactory->Build(coarseLevel);
765 if (!
precList22_.get<
bool>(
"repartition: rebalance P and R",
false))
766 Importer22_ = coarseLevel.
Get< RCP<const Import> >(
"Importer", repartFactory.get());
767 D0_Matrix_ = coarseLevel.
Get< RCP<Matrix> >(
"P", newP.get());
768 A22_ = coarseLevel.
Get< RCP<Matrix> >(
"A", newA.get());
769 Coords_ = coarseLevel.
Get< RCP<RealValuedMultiVector> >(
"Coordinates", newP.get());
774 coarseLevel.
Request(
"A", rapFact.get());
776 coarseLevel.
Request(
"AP reuse data", rapFact.get());
777 coarseLevel.
Request(
"RAP reuse data", rapFact.get());
780 A22_ = coarseLevel.
Get< RCP<Matrix> >(
"A", rapFact.get());
783 if (coarseLevel.
IsAvailable(
"AP reuse data", rapFact.get()))
785 if (coarseLevel.
IsAvailable(
"RAP reuse data", rapFact.get()))
790 RCP<Teuchos::TimeMonitor> tm =
getTimer(
"MueLu RefMaxwell: Build A22");
793 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
SM_Matrix_,
false,*
D0_Matrix_,
false,temp,
GetOStream(
Runtime0),
true,
true);
795 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_T_Matrix_,
false,*temp,
false,
A22_,
GetOStream(
Runtime0),
true,
true);
797 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_Matrix_,
true,*temp,
false,
A22_,
GetOStream(
Runtime0),
true,
true);
800 RCP<const Import> D0importer = rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
803 RCP<Matrix> temp, temp2;
804 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
SM_Matrix_,
false,*
D0_Matrix_,
false,temp,
GetOStream(
Runtime0),
true,
true);
806 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_T_Matrix_,
false,*temp,
false,temp2,
GetOStream(
Runtime0),
true,
true);
808 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_Matrix_,
true,*temp,
false,temp2,
GetOStream(
Runtime0),
true,
true);
811 rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(
Importer22_->getTargetMap(), D0importer);
813 ParameterList XpetraList;
814 XpetraList.set(
"Restrict Communicator",
true);
815 XpetraList.set(
"Timer Label",
"MueLu::RebalanceA22");
816 RCP<const Map> targetMap =
Importer22_->getTargetMap();
829 if (!
A22_.is_null()) {
831 A22_->setObjectLabel(
"RefMaxwell (2,2)");
834 RCP<ParameterList> params = rcp(
new ParameterList());;
835 params->set(
"printLoadBalancingInfo",
true);
836 params->set(
"printCommInfo",
true);
839#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
840 if (
precList22_.isType<std::string>(
"Preconditioner Type")) {
842 if (
precList22_.get<std::string>(
"Preconditioner Type") ==
"MueLu") {
843 ParameterList& userParamList =
precList22_.sublist(
"Preconditioner Types").sublist(
"MueLu").sublist(
"user data");
844 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates",
Coords_);
852 ParameterList& userParamList =
precList22_.sublist(
"user data");
853 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates",
Coords_);
856 std::string coarseType =
"";
858 coarseType =
precList22_.get<std::string>(
"coarse: type");
860 std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
861 std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
865 coarseType ==
"Klu" ||
866 coarseType ==
"Klu2") &&
868 !
precList22_.sublist(
"coarse: params").isParameter(
"fix nullspace")))
869 precList22_.sublist(
"coarse: params").set(
"fix nullspace",
true);
873 level0->Set(
"A",
A22_);
888 if (
D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
889 replaceWith= Teuchos::ScalarTraits<SC>::eps();
891 replaceWith = Teuchos::ScalarTraits<SC>::zero();
905 RCP<const Import> ImporterP11 = ImportFactory::Build(
ImporterH_->getTargetMap(),
P11_->getColMap());
906 rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix()->replaceDomainMapAndImporter(
ImporterH_->getTargetMap(), ImporterP11);
914 RCP<const Import> ImporterD0 = ImportFactory::Build(
Importer22_->getTargetMap(),
D0_Matrix_->getColMap());
915 rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(
Importer22_->getTargetMap(), ImporterD0);
918#ifdef HAVE_MUELU_TPETRA
921 (!rcp_dynamic_cast<CrsMatrixWrap>(
D0_T_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
922 (!rcp_dynamic_cast<CrsMatrixWrap>(
R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
923 (
D0_T_Matrix_->getColMap()->lib() == Xpetra::UseTpetra) &&
924 (
R11_->getColMap()->lib() == Xpetra::UseTpetra))
930 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): D0_T and R11 have matching colMaps" << std::endl;
937 RCP<ParameterList> matvecParams = rcpFromRef(
parameterList_.sublist(
"matvec params"));
946 RCP<ParameterList> importerParams = rcpFromRef(
parameterList_.sublist(
"refmaxwell: ImporterH params"));
947 ImporterH_->setDistributorParameters(importerParams);
950 RCP<ParameterList> importerParams = rcpFromRef(
parameterList_.sublist(
"refmaxwell: Importer22 params"));
951 Importer22_->setDistributorParameters(importerParams);
957#ifdef HAVE_MUELU_CUDA
958 if (
parameterList_.get<
bool>(
"refmaxwell: cuda profile setup",
false)) cudaProfilerStop();
1175 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1176 const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1177 const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1179 size_t numLocalRows =
SM_Matrix_->getLocalNumRows();
1181 RCP<Matrix> P_nodal;
1182 RCP<CrsMatrix> P_nodal_imported;
1183 RCP<MultiVector> Nullspace_nodal;
1187 bool read_P_from_file =
parameterList_.get(
"refmaxwell: read_P_from_file",
false);
1188 if (read_P_from_file) {
1191 std::string P_filename =
parameterList_.get(
"refmaxwell: P_filename",std::string(
"P.m"));
1192 std::string domainmap_filename =
parameterList_.get(
"refmaxwell: P_domainmap_filename",std::string(
"domainmap_P.m"));
1193 std::string colmap_filename =
parameterList_.get(
"refmaxwell: P_colmap_filename",std::string(
"colmap_P.m"));
1194 std::string coords_filename =
parameterList_.get(
"refmaxwell: CoordsH",std::string(
"coordsH.m"));
1195 RCP<const Map> colmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(colmap_filename,
A_nodal_Matrix_->getDomainMap()->lib(),
A_nodal_Matrix_->getDomainMap()->getComm());
1196 RCP<const Map> domainmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(domainmap_filename,
A_nodal_Matrix_->getDomainMap()->lib(),
A_nodal_Matrix_->getDomainMap()->getComm());
1198 CoordsH_ = Xpetra::IO<typename RealValuedMultiVector::scalar_type, LO, GO, NO>::ReadMultiVector(coords_filename, domainmap);
1200 Level fineLevel, coarseLevel;
1208 fineLevel.
Set(
"DofsPerNode",1);
1211 coarseLevel.setObjectLabel(
"RefMaxwell (1,1) A_nodal");
1212 fineLevel.setObjectLabel(
"RefMaxwell (1,1) A_nodal");
1215 RCP<MultiVector> nullSpace = MultiVectorFactory::Build(
A_nodal_Matrix_->getRowMap(),NSdim);
1216 nullSpace->putScalar(SC_ONE);
1217 fineLevel.
Set(
"Nullspace",nullSpace);
1219 RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1226 if (
parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa")
1236 if (
parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa")
1240 dropFact->SetFactory(
"UnAmalgamationInfo", amalgFact);
1241 double dropTol =
parameterList_.get(
"aggregation: drop tol",0.0);
1242 std::string dropScheme =
parameterList_.get(
"aggregation: drop scheme",
"classical");
1243 std::string distLaplAlgo =
parameterList_.get(
"aggregation: distance laplacian algo",
"default");
1244 dropFact->SetParameter(
"aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1245 dropFact->SetParameter(
"aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme));
1247 dropFact->SetParameter(
"aggregation: distance laplacian algo",Teuchos::ParameterEntry(distLaplAlgo));
1249 UncoupledAggFact->SetFactory(
"Graph", dropFact);
1250 int minAggSize =
parameterList_.get(
"aggregation: min agg size",2);
1251 UncoupledAggFact->SetParameter(
"aggregation: min agg size",Teuchos::ParameterEntry(minAggSize));
1252 int maxAggSize =
parameterList_.get(
"aggregation: max agg size",-1);
1253 UncoupledAggFact->SetParameter(
"aggregation: max agg size",Teuchos::ParameterEntry(maxAggSize));
1255 coarseMapFact->SetFactory(
"Aggregates", UncoupledAggFact);
1257 TentativePFact->SetFactory(
"Aggregates", UncoupledAggFact);
1258 TentativePFact->SetFactory(
"UnAmalgamationInfo", amalgFact);
1259 TentativePFact->SetFactory(
"CoarseMap", coarseMapFact);
1261 Tfact->SetFactory(
"Aggregates", UncoupledAggFact);
1262 Tfact->SetFactory(
"CoarseMap", coarseMapFact);
1264 if (
parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa") {
1265 SaPFact->SetFactory(
"P", TentativePFact);
1266 coarseLevel.
Request(
"P", SaPFact.get());
1268 coarseLevel.
Request(
"P",TentativePFact.get());
1269 coarseLevel.
Request(
"Nullspace",TentativePFact.get());
1270 coarseLevel.
Request(
"Coordinates",Tfact.get());
1272 RCP<AggregationExportFactory> aggExport;
1273 if (
parameterList_.get(
"aggregation: export visualization data",
false)) {
1275 ParameterList aggExportParams;
1276 aggExportParams.set(
"aggregation: output filename",
"aggs.vtk");
1277 aggExportParams.set(
"aggregation: output file: agg style",
"Jacks");
1278 aggExport->SetParameterList(aggExportParams);
1280 aggExport->SetFactory(
"Aggregates", UncoupledAggFact);
1281 aggExport->SetFactory(
"UnAmalgamationInfo", amalgFact);
1282 fineLevel.
Request(
"Aggregates",UncoupledAggFact.get());
1283 fineLevel.
Request(
"UnAmalgamationInfo",amalgFact.get());
1286 if (
parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa")
1287 coarseLevel.
Get(
"P",P_nodal,SaPFact.get());
1289 coarseLevel.
Get(
"P",P_nodal,TentativePFact.get());
1290 coarseLevel.
Get(
"Nullspace",Nullspace_nodal,TentativePFact.get());
1291 coarseLevel.
Get(
"Coordinates",
CoordsH_,Tfact.get());
1294 if (
parameterList_.get(
"aggregation: export visualization data",
false))
1295 aggExport->Build(fineLevel, coarseLevel);
1297 dump(*P_nodal,
"P_nodal.m");
1298 dump(*Nullspace_nodal,
"Nullspace_nodal.m");
1301 RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix();
1304 int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1306 RCP<CrsMatrixWrap> P_nodal_temp;
1307 RCP<const Map> targetMap = D0Crs->getColMap();
1308 P_nodal_temp = rcp(
new CrsMatrixWrap(targetMap));
1309 RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1310 P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1311 P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->
getDomainMap(),
1312 rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->
getRangeMap());
1313 P_nodal_imported = P_nodal_temp->getCrsMatrix();
1314 dump(*P_nodal_temp,
"P_nodal_imported.m");
1316 P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1322 using ATS = Kokkos::ArithTraits<SC>;
1323 using impl_Scalar =
typename ATS::val_type;
1324 using impl_ATS = Kokkos::ArithTraits<impl_Scalar>;
1325 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1327 typedef typename Matrix::local_matrix_type KCRS;
1328 typedef typename KCRS::StaticCrsGraphType graph_t;
1329 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1330 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1331 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1333 const impl_Scalar impl_SC_ZERO = impl_ATS::zero();
1334 const impl_Scalar impl_SC_ONE = impl_ATS::one();
1335 const impl_Scalar impl_half = impl_SC_ONE / (impl_SC_ONE + impl_SC_ONE);
1341 std::string defaultAlgo =
"mat-mat";
1342 std::string algo =
parameterList_.get(
"refmaxwell: prolongator compute algorithm",defaultAlgo);
1347 if (algo ==
"mat-mat") {
1348 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(
SM_Matrix_->getRowMap());
1349 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_Matrix_,
false,*P_nodal,
false,*D0_P_nodal,
true,
true);
1351#ifdef HAVE_MUELU_DEBUG
1352 TEUCHOS_ASSERT(D0_P_nodal->getColMap()->isSameAs(*P_nodal_imported->getColMap()));
1356 auto localD0P = D0_P_nodal->getLocalMatrixDevice();
1359 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1360 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1362 size_t nnzEstimate = dim*localD0P.graph.entries.size();
1363 lno_view_t P11rowptr(
"P11_rowptr", numLocalRows+1);
1364 lno_nnz_view_t P11colind(
"P11_colind",nnzEstimate);
1365 scalar_view_t P11vals(
"P11_vals",nnzEstimate);
1368 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1369 KOKKOS_LAMBDA(
const size_t i) {
1370 P11rowptr(i) = dim*localD0P.graph.row_map(i);
1374 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1375 KOKKOS_LAMBDA(
const size_t jj) {
1376 for (
size_t k = 0; k < dim; k++) {
1377 P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1378 P11vals(dim*jj+k) = impl_SC_ZERO;
1382 auto localNullspace =
Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1385 if (
D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1389 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1391 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1393 auto localD0 =
D0_Matrix_->getLocalMatrixDevice();
1394 auto localP = P_nodal_imported->getLocalMatrixDevice();
1395 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1396 KOKKOS_LAMBDA(
const size_t i) {
1397 for (
size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1398 LO l = localD0.graph.entries(ll);
1399 impl_Scalar p = localD0.values(ll);
1400 if (impl_ATS::magnitude(p) < tol)
1402 for (
size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1403 LO j = localP.graph.entries(jj);
1404 impl_Scalar v = localP.values(jj);
1405 for (
size_t k = 0; k < dim; k++) {
1407 impl_Scalar n = localNullspace(i,k);
1409 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1410 if (P11colind(m) == jNew)
1412#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1413 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1415 P11vals(m) += impl_half * v * n;
1422 auto localD0 =
D0_Matrix_->getLocalMatrixDevice();
1423 auto localP = P_nodal_imported->getLocalMatrixDevice();
1424 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1425 KOKKOS_LAMBDA(
const size_t i) {
1426 for (
size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1427 LO l = localD0.graph.entries(ll);
1428 for (
size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1429 LO j = localP.graph.entries(jj);
1430 impl_Scalar v = localP.values(jj);
1431 for (
size_t k = 0; k < dim; k++) {
1433 impl_Scalar n = localNullspace(i,k);
1435 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1436 if (P11colind(m) == jNew)
1438#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1439 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1441 P11vals(m) += impl_half * v * n;
1448 P11_ = rcp(
new CrsMatrixWrap(
SM_Matrix_->getRowMap(), blockColMap, 0));
1449 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix();
1450 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1451 P11Crs->expertStaticFillComplete(blockDomainMap,
SM_Matrix_->getRangeMap());
1454 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1458 auto localNullspace_nodal = Nullspace_nodal->getDeviceLocalView(Xpetra::Access::ReadOnly);
1459 auto localNullspaceH =
NullspaceH_->getDeviceLocalView(Xpetra::Access::ReadWrite);
1460 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_nullspace", range_type(0,Nullspace_nodal->getLocalLength()),
1461 KOKKOS_LAMBDA(
const size_t i) {
1462 impl_Scalar val = localNullspace_nodal(i,0);
1463 for (
size_t j = 0; j < dim; j++)
1464 localNullspaceH(dim*i+j, j) = val;
1469 auto localD0 =
D0_Matrix_->getLocalMatrixDevice();
1473 if (algo ==
"mat-mat") {
1476 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(
D0_Matrix_->getColMap(), dim);
1477 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(
D0_Matrix_->getDomainMap(), dim);
1479 size_t nnzEstimate = dim*localD0.graph.entries.size();
1480 lno_view_t P11rowptr(
"P11_rowptr", numLocalRows+1);
1481 lno_nnz_view_t P11colind(
"P11_colind",nnzEstimate);
1482 scalar_view_t P11vals(
"P11_vals",nnzEstimate);
1485 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1486 KOKKOS_LAMBDA(
const size_t i) {
1487 P11rowptr(i) = dim*localD0.graph.row_map(i);
1491 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0.graph.entries.size()),
1492 KOKKOS_LAMBDA(
const size_t jj) {
1493 for (
size_t k = 0; k < dim; k++) {
1494 P11colind(dim*jj+k) = dim*localD0.graph.entries(jj)+k;
1495 P11vals(dim*jj+k) = impl_SC_ZERO;
1499 auto localNullspace =
Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1502 if (
D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1506 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1508 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1510 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1511 KOKKOS_LAMBDA(
const size_t i) {
1512 for (
size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1513 LO j = localD0.graph.entries(jj);
1514 impl_Scalar p = localD0.values(jj);
1515 if (impl_ATS::magnitude(p) < tol)
1517 for (
size_t k = 0; k < dim; k++) {
1519 impl_Scalar n = localNullspace(i,k);
1521 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1522 if (P11colind(m) == jNew)
1524#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1525 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1527 P11vals(m) += impl_half * n;
1533 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1534 KOKKOS_LAMBDA(
const size_t i) {
1535 for (
size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1536 LO j = localD0.graph.entries(jj);
1537 for (
size_t k = 0; k < dim; k++) {
1539 impl_Scalar n = localNullspace(i,k);
1541 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1542 if (P11colind(m) == jNew)
1544#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1545 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1547 P11vals(m) += impl_half * n;
1553 P11_ = rcp(
new CrsMatrixWrap(
SM_Matrix_->getRowMap(), blockColMap, 0));
1554 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix();
1555 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1556 P11Crs->expertStaticFillComplete(blockDomainMap,
SM_Matrix_->getRangeMap());
1558 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1564 ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1565 ArrayRCP<ArrayView<const SC> > nullspace(dim);
1566 for(
size_t i=0; i<dim; i++) {
1568 nullspace[i] = nullspaceRCP[i]();
1572 ArrayRCP<size_t> P11rowptr_RCP;
1573 ArrayRCP<LO> P11colind_RCP;
1574 ArrayRCP<SC> P11vals_RCP;
1583 std::string defaultAlgo =
"mat-mat";
1584 std::string algo =
parameterList_.get(
"refmaxwell: prolongator compute algorithm",defaultAlgo);
1588 if (algo ==
"mat-mat") {
1589 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(
SM_Matrix_->getRowMap());
1590 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*
D0_Matrix_,
false,*P_nodal,
false,*D0_P_nodal,
true,
true);
1593 ArrayRCP<const size_t> D0rowptr_RCP;
1594 ArrayRCP<const LO> D0colind_RCP;
1595 ArrayRCP<const SC> D0vals_RCP;
1596 rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1600 ArrayView<const size_t> D0rowptr;
1601 ArrayView<const LO> D0colind;
1602 ArrayView<const SC> D0vals;
1603 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1606 ArrayRCP<const size_t> Prowptr_RCP;
1607 ArrayRCP<const LO> Pcolind_RCP;
1608 ArrayRCP<const SC> Pvals_RCP;
1609 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1610 ArrayView<const size_t> Prowptr;
1611 ArrayView<const LO> Pcolind;
1612 ArrayView<const SC> Pvals;
1613 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1616 ArrayRCP<const size_t> D0Prowptr_RCP;
1617 ArrayRCP<const LO> D0Pcolind_RCP;
1618 ArrayRCP<const SC> D0Pvals_RCP;
1619 rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1624 ArrayView<const size_t> D0Prowptr;
1625 ArrayView<const LO> D0Pcolind;
1626 D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1629 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1630 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1631 P11_ = rcp(
new CrsMatrixWrap(
SM_Matrix_->getRowMap(), blockColMap, 0));
1632 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix();
1633 size_t nnzEstimate = dim*D0Prowptr[numLocalRows];
1634 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1636 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1637 ArrayView<LO> P11colind = P11colind_RCP();
1638 ArrayView<SC> P11vals = P11vals_RCP();
1641 for (
size_t i = 0; i < numLocalRows+1; i++) {
1642 P11rowptr[i] = dim*D0Prowptr[i];
1646 for (
size_t jj = 0; jj < (size_t) D0Prowptr[numLocalRows]; jj++)
1647 for (
size_t k = 0; k < dim; k++) {
1648 P11colind[dim*jj+k] = dim*D0Pcolind[jj]+k;
1649 P11vals[dim*jj+k] = SC_ZERO;
1652 RCP<const Map> P_nodal_imported_colmap = P_nodal_imported->getColMap();
1653 RCP<const Map> D0_P_nodal_colmap = D0_P_nodal->getColMap();
1655 if (
D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1659 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1661 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1662 for (
size_t i = 0; i < numLocalRows; i++) {
1663 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1664 LO l = D0colind[ll];
1666 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1668 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1670 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1672 for (
size_t k = 0; k < dim; k++) {
1674 SC n = nullspace[k][i];
1676 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1677 if (P11colind[m] == jNew)
1679#ifdef HAVE_MUELU_DEBUG
1680 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1682 P11vals[m] += half * v * n;
1689 for (
size_t i = 0; i < numLocalRows; i++) {
1690 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1691 LO l = D0colind[ll];
1692 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1694 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1696 for (
size_t k = 0; k < dim; k++) {
1698 SC n = nullspace[k][i];
1700 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1701 if (P11colind[m] == jNew)
1703#ifdef HAVE_MUELU_DEBUG
1704 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1706 P11vals[m] += half * v * n;
1713 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1714 P11Crs->expertStaticFillComplete(blockDomainMap,
SM_Matrix_->getRangeMap());
1716 }
else if (algo ==
"gustavson") {
1717 ArrayRCP<const size_t> D0rowptr_RCP;
1718 ArrayRCP<const LO> D0colind_RCP;
1719 ArrayRCP<const SC> D0vals_RCP;
1720 rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1724 ArrayView<const size_t> D0rowptr;
1725 ArrayView<const LO> D0colind;
1726 ArrayView<const SC> D0vals;
1727 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1730 ArrayRCP<const size_t> Prowptr_RCP;
1731 ArrayRCP<const LO> Pcolind_RCP;
1732 ArrayRCP<const SC> Pvals_RCP;
1733 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1734 ArrayView<const size_t> Prowptr;
1735 ArrayView<const LO> Pcolind;
1736 ArrayView<const SC> Pvals;
1737 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1739 LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1740 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1741 Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1743 size_t nnz_alloc = dim*D0vals_RCP.size();
1746 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1747 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1748 P11_ = rcp(
new CrsMatrixWrap(
SM_Matrix_->getRowMap(), blockColMap, 0));
1749 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix();
1750 P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1752 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1753 ArrayView<LO> P11colind = P11colind_RCP();
1754 ArrayView<SC> P11vals = P11vals_RCP();
1757 if (
D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1761 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1763 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1766 for (
size_t i = 0; i < numLocalRows; i++) {
1768 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1769 LO l = D0colind[ll];
1771 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1773 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1776 for (
size_t k = 0; k < dim; k++) {
1778 SC n = nullspace[k][i];
1780 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1781 P11_status[jNew] = nnz;
1782 P11colind[nnz] = jNew;
1783 P11vals[nnz] = half * v * n;
1788 P11vals[P11_status[jNew]] += half * v * n;
1797 P11rowptr[numLocalRows] = nnz;
1801 for (
size_t i = 0; i < numLocalRows; i++) {
1803 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1804 LO l = D0colind[ll];
1805 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1808 for (
size_t k = 0; k < dim; k++) {
1810 SC n = nullspace[k][i];
1812 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1813 P11_status[jNew] = nnz;
1814 P11colind[nnz] = jNew;
1815 P11vals[nnz] = half * v * n;
1820 P11vals[P11_status[jNew]] += half * v * n;
1829 P11rowptr[numLocalRows] = nnz;
1832 if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1836 P11vals_RCP.resize(nnz);
1837 P11colind_RCP.resize(nnz);
1840 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1841 P11Crs->expertStaticFillComplete(blockDomainMap,
SM_Matrix_->getRangeMap());
1843 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1847 ArrayRCP<const Scalar> ns_rcp = Nullspace_nodal->getData(0);
1848 ArrayView<const Scalar> ns_view = ns_rcp();
1849 for (
size_t i = 0; i < Nullspace_nodal->getLocalLength(); i++) {
1851 for (
size_t j = 0; j < dim; j++)
1857 ArrayRCP<const size_t> D0rowptr_RCP;
1858 ArrayRCP<const LO> D0colind_RCP;
1859 ArrayRCP<const SC> D0vals_RCP;
1860 rcp_dynamic_cast<CrsMatrixWrap>(
D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1864 ArrayView<const size_t> D0rowptr;
1865 ArrayView<const LO> D0colind;
1866 ArrayView<const SC> D0vals;
1867 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1870 if (algo ==
"mat-mat") {
1873 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(
D0_Matrix_->getColMap(), dim);
1874 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(
D0_Matrix_->getDomainMap(), dim);
1875 P11_ = rcp(
new CrsMatrixWrap(
SM_Matrix_->getRowMap(), blockColMap, 0));
1876 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(
P11_)->getCrsMatrix();
1877 size_t nnzEstimate = dim*D0rowptr[numLocalRows];
1878 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1880 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1881 ArrayView<LO> P11colind = P11colind_RCP();
1882 ArrayView<SC> P11vals = P11vals_RCP();
1885 for (
size_t i = 0; i < numLocalRows+1; i++) {
1886 P11rowptr[i] = dim*D0rowptr[i];
1890 for (
size_t jj = 0; jj < (size_t) D0rowptr[numLocalRows]; jj++)
1891 for (
size_t k = 0; k < dim; k++) {
1892 P11colind[dim*jj+k] = dim*D0colind[jj]+k;
1893 P11vals[dim*jj+k] = SC_ZERO;
1897 if (
D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1901 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1903 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1904 for (
size_t i = 0; i < numLocalRows; i++) {
1905 for (
size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
1906 LO j = D0colind[jj];
1908 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1910 for (
size_t k = 0; k < dim; k++) {
1912 SC n = nullspace[k][i];
1914 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1915 if (P11colind[m] == jNew)
1917#ifdef HAVE_MUELU_DEBUG
1918 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1920 P11vals[m] += half * n;
1926 for (
size_t i = 0; i < numLocalRows; i++) {
1927 for (
size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
1928 LO j = D0colind[jj];
1930 for (
size_t k = 0; k < dim; k++) {
1932 SC n = nullspace[k][i];
1934 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1935 if (P11colind[m] == jNew)
1937#ifdef HAVE_MUELU_DEBUG
1938 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1940 P11vals[m] += half * n;
1946 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1947 P11Crs->expertStaticFillComplete(blockDomainMap,
SM_Matrix_->getRangeMap());
1950 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");