140 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
141 typedef Xpetra::MultiVector<coordinate_type,LO,GO,NO> RealValuedMultiVector;
142 typedef Xpetra::MultiVectorFactory<coordinate_type,LO,GO,NO> RealValuedMultiVectorFactory;
145 std::string nspName =
"Nullspace";
146 if(pL.isParameter(
"Nullspace name")) nspName = pL.get<std::string>(
"Nullspace name");
149 RCP<Matrix> Ptentative;
154 if ( aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
155 Ptentative = Teuchos::null;
156 Set(coarseLevel,
"P", Ptentative);
163 RCP<RealValuedMultiVector> fineCoords;
175 TEUCHOS_TEST_FOR_EXCEPTION( A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
178 RCP<MultiVector> coarseNullspace;
179 RCP<RealValuedMultiVector> coarseCoords;
184 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
186 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
187 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
189 GO indexBase = coarseMap->getIndexBase();
190 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
191 Array<GO> nodeList(numCoarseNodes);
192 const int numDimensions = fineCoords->getNumVectors();
194 for (LO i = 0; i < numCoarseNodes; i++) {
195 nodeList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase;
197 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
198 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
201 fineCoords->getMap()->getComm());
202 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
205 RCP<RealValuedMultiVector> ghostedCoords;
206 if (aggregates->AggregatesCrossProcessors()) {
207 RCP<const Map> aggMap = aggregates->GetMap();
208 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
210 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
211 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
213 ghostedCoords = fineCoords;
217 int myPID = coarseCoordsMap->getComm()->getRank();
218 LO numAggs = aggregates->GetNumAggregates();
219 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
220 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
221 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
224 for (
int dim = 0; dim < numDimensions; ++dim) {
225 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
226 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
228 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
229 if (procWinner[lnode] == myPID &&
230 lnode < fineCoordsData.size() &&
231 vertex2AggID[lnode] < coarseCoordsData.size() &&
232 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) ==
false) {
233 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
236 for (LO agg = 0; agg < numAggs; agg++) {
237 coarseCoordsData[agg] /= aggSizes[agg];
242 if (!aggregates->AggregatesCrossProcessors()) {
243 if(Xpetra::Helpers<SC,LO,GO,NO>::isTpetraBlockCrs(A)) {
247 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,coarseLevel.
GetLevelID());
251 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
261 if (A->IsView(
"stridedMaps") ==
true)
262 Ptentative->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), coarseMap);
264 Ptentative->CreateView(
"stridedMaps", Ptentative->getRangeMap(), coarseMap);
267 Set(coarseLevel,
"Coordinates", coarseCoords);
269 Set(coarseLevel,
"Nullspace", coarseNullspace);
270 Set(coarseLevel,
"P", Ptentative);
273 RCP<ParameterList> params = rcp(
new ParameterList());
274 params->set(
"printLoadBalancingInfo",
true);
281 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
282 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
292 RCP<const Map> rowMap = A->getRowMap();
293 RCP<const Map> rangeMap = A->getRangeMap();
294 RCP<const Map> colMap = A->getColMap();
296 const size_t numFineBlockRows = rowMap->getLocalNumElements();
298 typedef Teuchos::ScalarTraits<SC> STS;
300 const SC zero = STS::zero();
301 const SC one = STS::one();
302 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
304 const GO numAggs = aggregates->GetNumAggregates();
305 const size_t NSDim = fineNullspace->getNumVectors();
306 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
312 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
313 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
314 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
316 coarsePointMap->getIndexBase(),
317 coarsePointMap->getComm());
320 const bool &doQRStep = pL.get<
bool>(
"tentative: calculate qr");
321 const bool &constantColSums = pL.get<
bool>(
"tentative: constant column sums");
324 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
337 ArrayRCP<LO> aggStart;
338 ArrayRCP<LO> aggToRowMapLO;
339 ArrayRCP<GO> aggToRowMapGO;
341 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
342 GetOStream(
Runtime1) <<
"Column map is consistent with the row map, good." << std::endl;
344 throw std::runtime_error(
"TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
347 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
350 ArrayRCP<ArrayRCP<const SC> > fineNS (NSDim);
351 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
352 for (
size_t i = 0; i < NSDim; i++) {
353 fineNS[i] = fineNullspace->getData(i);
354 if (coarsePointMap->getLocalNumElements() > 0)
355 coarseNS[i] = coarseNullspace->getDataNonConst(i);
361 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap,coarseBlockMap,0);
362 ArrayRCP<size_t> iaPtent;
363 ArrayRCP<LO> jaPtent;
364 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
365 ArrayView<size_t> ia = iaPtent();
366 ArrayView<LO> ja = jaPtent();
368 for (
size_t i = 0; i < numFineBlockRows; i++) {
372 ia[numCoarseBlockRows] = numCoarseBlockRows;
375 for (GO agg = 0; agg < numAggs; agg++) {
376 LO aggSize = aggStart[agg+1] - aggStart[agg];
377 Xpetra::global_size_t offset = agg;
379 for (LO j = 0; j < aggSize; j++) {
381 const LO localRow = aggToRowMapLO[aggStart[agg]+j];
382 const size_t rowStart = ia[localRow];
383 ja[rowStart] = offset;
389 size_t ia_tmp = 0, nnz = 0;
390 for (
size_t i = 0; i < numFineBlockRows; i++) {
391 for (
size_t j = ia_tmp; j < ia[i+1]; j++)
392 if (ja[j] != INVALID) {
400 if (rowMap->lib() == Xpetra::UseTpetra) {
404 jaPtent .resize(nnz);
408 BlockGraph->setAllIndices(iaPtent, jaPtent);
412 RCP<ParameterList> FCparams;
413 if(pL.isSublist(
"matrixmatrix: kernel params"))
414 FCparams=rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
416 FCparams= rcp(
new ParameterList);
419 FCparams->set(
"compute global constants",FCparams->get(
"compute global constants",
true));
420 std::string levelIDs =
toString(levelID);
421 FCparams->set(
"Timer Label",std::string(
"MueLu::TentativeP-")+levelIDs);
422 RCP<const Export> dummy_e;
423 RCP<const Import> dummy_i;
424 BlockGraph->expertStaticFillComplete(coarseBlockMap,rowMap,dummy_i,dummy_e,FCparams);
429 RCP<Xpetra::CrsMatrix<SC,LO,GO,NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC,LO,GO,NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap,NSDim);
430 RCP<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> >(P_xpetra);
431 if(P_tpetra.is_null())
throw std::runtime_error(
"BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
432 RCP<CrsMatrixWrap> P_wrap = rcp(
new CrsMatrixWrap(P_xpetra));
441 Teuchos::Array<Scalar> block(NSDim*NSDim, zero);
442 Teuchos::Array<LO> bcol(1);
445 for (LO agg = 0; agg < numAggs; agg++) {
447 const LO aggSize = aggStart[agg+1] - aggStart[agg];
448 Xpetra::global_size_t offset = agg*NSDim;
452 for (LO j = 0; j < aggSize; j++) {
453 const LO localBlockRow = aggToRowMapLO[aggStart[agg]+j];
455 for (
size_t r = 0; r < NSDim; r++) {
456 LO localPointRow = localBlockRow*NSDim + r;
457 for (
size_t c = 0; c < NSDim; c++)
458 block[r*NSDim+c] = fineNS[c][localPointRow];
461 P_tpetra->replaceLocalValues(localBlockRow,bcol(),block());
465 for (
size_t j = 0; j < NSDim; j++)
466 coarseNS[j][offset+j] = one;
475 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
476 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace)
const {
477 typedef Teuchos::ScalarTraits<SC> STS;
478 typedef typename STS::magnitudeType Magnitude;
479 const SC zero = STS::zero();
480 const SC one = STS::one();
483 GO numAggs = aggregates->GetNumAggregates();
489 ArrayRCP<LO> aggStart;
490 ArrayRCP< GO > aggToRowMap;
491 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
495 for (GO i=0; i<numAggs; ++i) {
496 LO sizeOfThisAgg = aggStart[i+1] - aggStart[i];
497 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
501 const size_t NSDim = fineNullspace->getNumVectors();
504 GO indexBase=A->getRowMap()->getIndexBase();
506 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
507 const RCP<const Map> uniqueMap = A->getDomainMap();
508 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
509 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap,NSDim);
510 fineNullspaceWithOverlap->doImport(*fineNullspace,*importer,Xpetra::INSERT);
513 ArrayRCP< ArrayRCP<const SC> > fineNS(NSDim);
514 for (
size_t i=0; i<NSDim; ++i)
515 fineNS[i] = fineNullspaceWithOverlap->getData(i);
518 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
520 ArrayRCP< ArrayRCP<SC> > coarseNS(NSDim);
521 for (
size_t i=0; i<NSDim; ++i)
522 if (coarseMap->getLocalNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
527 RCP<const Map > rowMapForPtent = A->getRowMap();
528 const Map& rowMapForPtentRef = *rowMapForPtent;
532 RCP<const Map> colMap = A->getColMap();
534 RCP<const Map > ghostQMap;
535 RCP<MultiVector> ghostQvalues;
536 Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > ghostQcolumns;
537 RCP<Xpetra::Vector<GO,LO,GO,Node> > ghostQrowNums;
538 ArrayRCP< ArrayRCP<SC> > ghostQvals;
539 ArrayRCP< ArrayRCP<GO> > ghostQcols;
540 ArrayRCP< GO > ghostQrows;
543 for (LO j=0; j<numAggs; ++j) {
544 for (LO k=aggStart[j]; k<aggStart[j+1]; ++k) {
545 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) ==
false) {
546 ghostGIDs.push_back(aggToRowMap[k]);
550 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
551 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
553 indexBase, A->getRowMap()->getComm());
555 ghostQvalues = MultiVectorFactory::Build(ghostQMap,NSDim);
559 ghostQcolumns.resize(NSDim);
560 for (
size_t i=0; i<NSDim; ++i)
561 ghostQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
562 ghostQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
563 if (ghostQvalues->getLocalLength() > 0) {
564 ghostQvals.resize(NSDim);
565 ghostQcols.resize(NSDim);
566 for (
size_t i=0; i<NSDim; ++i) {
567 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
568 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
570 ghostQrows = ghostQrowNums->getDataNonConst(0);
574 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
577 Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
580 Array<GO> globalColPtr(maxAggSize*NSDim,0);
581 Array<LO> localColPtr(maxAggSize*NSDim,0);
582 Array<SC> valPtr(maxAggSize*NSDim,0.);
585 const Map& coarseMapRef = *coarseMap;
588 ArrayRCP<size_t> ptent_rowptr;
589 ArrayRCP<LO> ptent_colind;
590 ArrayRCP<Scalar> ptent_values;
593 ArrayView<size_t> rowptr_v;
594 ArrayView<LO> colind_v;
595 ArrayView<Scalar> values_v;
598 Array<size_t> rowptr_temp;
599 Array<LO> colind_temp;
600 Array<Scalar> values_temp;
602 RCP<CrsMatrix> PtentCrs;
604 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(
new CrsMatrixWrap(rowMapForPtent, NSDim));
605 PtentCrs = PtentCrsWrap->getCrsMatrix();
606 Ptentative = PtentCrsWrap;
612 const Map& nonUniqueMapRef = *nonUniqueMap;
614 size_t total_nnz_count=0;
616 for (GO agg=0; agg<numAggs; ++agg)
618 LO myAggSize = aggStart[agg+1]-aggStart[agg];
621 Teuchos::SerialDenseMatrix<LO,SC> localQR(myAggSize, NSDim);
622 for (
size_t j=0; j<NSDim; ++j) {
623 bool bIsZeroNSColumn =
true;
624 for (LO k=0; k<myAggSize; ++k)
629 SC nsVal = fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ];
630 localQR(k,j) = nsVal;
631 if (nsVal != zero) bIsZeroNSColumn =
false;
634 GetOStream(
Runtime1,-1) <<
"length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
635 GetOStream(
Runtime1,-1) <<
"length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
641 GetOStream(
Runtime1,-1) <<
"aggToRowMap["<<agg<<
"][" << k <<
"] = " << aggToRowMap[aggStart[agg]+k] << std::endl;
642 GetOStream(
Runtime1,-1) <<
"id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg]+k] <<
" is global element in nonUniqueMap = " <<
643 nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
644 GetOStream(
Runtime1,-1) <<
"colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
645 GetOStream(
Runtime1,-1) <<
"fineNS...=" << fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ] << std::endl;
649 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn ==
true,
Exceptions::RuntimeError,
"MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
652 Xpetra::global_size_t offset=agg*NSDim;
654 if(myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
659 SC tau = localQR(0,0);
664 for (
size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
665 Magnitude tmag = STS::magnitude(localQR(k,0));
668 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
670 localQR(0,0) = dtemp;
672 qrSolver.setMatrix( Teuchos::rcp(&localQR,
false) );
679 for (
size_t j=0; j<NSDim; ++j) {
680 for (
size_t k=0; k<=j; ++k) {
682 if (coarseMapRef.isNodeLocalElement(offset+k)) {
683 coarseNS[j][offset+k] = localQR(k, j);
687 GetOStream(
Errors,-1) <<
"caught error in coarseNS insert, j="<<j<<
", offset+k = "<<offset+k<<std::endl;
697 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0,0));
701 localQR(i,0) *= dtemp ;
705 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
706 for (
size_t j=0; j<NSDim; j++) {
707 for (
size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
708 localQR(i,j) = (*qFactor)(i,j);
718 for (
size_t j = 0; j < NSDim; j++)
719 for (
size_t k = 0; k < NSDim; k++) {
721 "Caught error in coarseNS insert, j=" << j <<
", offset+k = " << offset+k);
723 if (k < as<size_t>(myAggSize))
724 coarseNS[j][offset+k] = localQR(k,j);
726 coarseNS[j][offset+k] = (k == j ? one : zero);
730 for (
size_t i = 0; i < as<size_t>(myAggSize); i++)
731 for (
size_t j = 0; j < NSDim; j++)
732 localQR(i,j) = (j == i ? one : zero);
739 for (GO j=0; j<myAggSize; ++j) {
743 GO globalRow = aggToRowMap[aggStart[agg]+j];
746 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) ==
false ) {
747 ghostQrows[qctr] = globalRow;
748 for (
size_t k=0; k<NSDim; ++k) {
749 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg*NSDim+k);
750 ghostQvals[k][qctr] = localQR(j,k);
755 for (
size_t k=0; k<NSDim; ++k) {
757 if (localQR(j,k) != Teuchos::ScalarTraits<SC>::zero()) {
758 localColPtr[nnz] = agg * NSDim + k;
759 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
760 valPtr[nnz] = localQR(j,k);
766 GetOStream(
Errors,-1) <<
"caught error in colPtr/valPtr insert, current index="<<nnz<<std::endl;
771 Ptentative->insertGlobalValues(globalRow,globalColPtr.view(0,nnz),valPtr.view(0,nnz));
775 <<
"caught error during Ptent row insertion, global row "
776 << globalRow << std::endl;
787 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates may cross process boundaries" << std::endl;
790 RCP<Xpetra::Vector<GO,LO,GO,Node> > targetQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(rowMapForPtent);
791 targetQrowNums->putScalar(-1);
792 targetQrowNums->doImport(*ghostQrowNums,*importer,Xpetra::INSERT);
793 ArrayRCP< GO > targetQrows = targetQrowNums->getDataNonConst(0);
796 Array<GO> gidsToImport;
797 gidsToImport.reserve(targetQrows.size());
798 for (
typename ArrayRCP<GO>::iterator r=targetQrows.begin(); r!=targetQrows.end(); ++r) {
800 gidsToImport.push_back(*r);
803 RCP<const Map > reducedMap = MapFactory::Build( A->getRowMap()->lib(),
804 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
805 gidsToImport, indexBase, A->getRowMap()->getComm() );
808 importer = ImportFactory::Build(ghostQMap, reducedMap);
810 Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > targetQcolumns(NSDim);
811 for (
size_t i=0; i<NSDim; ++i) {
812 targetQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(reducedMap);
813 targetQcolumns[i]->doImport(*(ghostQcolumns[i]),*importer,Xpetra::INSERT);
815 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap,NSDim);
816 targetQvalues->doImport(*ghostQvalues,*importer,Xpetra::INSERT);
818 ArrayRCP< ArrayRCP<SC> > targetQvals;
819 ArrayRCP<ArrayRCP<GO> > targetQcols;
820 if (targetQvalues->getLocalLength() > 0) {
821 targetQvals.resize(NSDim);
822 targetQcols.resize(NSDim);
823 for (
size_t i=0; i<NSDim; ++i) {
824 targetQvals[i] = targetQvalues->getDataNonConst(i);
825 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
829 valPtr = Array<SC>(NSDim,0.);
830 globalColPtr = Array<GO>(NSDim,0);
831 for (
typename Array<GO>::iterator r=gidsToImport.begin(); r!=gidsToImport.end(); ++r) {
832 if (targetQvalues->getLocalLength() > 0) {
833 for (
size_t j=0; j<NSDim; ++j) {
834 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
835 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
837 Ptentative->insertGlobalValues(*r, globalColPtr.view(0,NSDim), valPtr.view(0,NSDim));
841 Ptentative->fillComplete(coarseMap, A->getDomainMap());
848 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
849 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
850 RCP<const Map> rowMap = A->getRowMap();
851 RCP<const Map> colMap = A->getColMap();
852 const size_t numRows = rowMap->getLocalNumElements();
854 typedef Teuchos::ScalarTraits<SC> STS;
855 typedef typename STS::magnitudeType Magnitude;
856 const SC zero = STS::zero();
857 const SC one = STS::one();
858 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
860 const GO numAggs = aggregates->GetNumAggregates();
861 const size_t NSDim = fineNullspace->getNumVectors();
862 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
867 const bool &doQRStep = pL.get<
bool>(
"tentative: calculate qr");
868 const bool &constantColSums = pL.get<
bool>(
"tentative: constant column sums");
871 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
882 ArrayRCP<LO> aggStart;
883 ArrayRCP<LO> aggToRowMapLO;
884 ArrayRCP<GO> aggToRowMapGO;
886 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
887 GetOStream(
Runtime1) <<
"Column map is consistent with the row map, good." << std::endl;
890 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
892 <<
"using GO->LO conversion with performance penalty" << std::endl;
894 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
897 ArrayRCP<ArrayRCP<const SC> > fineNS (NSDim);
898 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
899 for (
size_t i = 0; i < NSDim; i++) {
900 fineNS[i] = fineNullspace->getData(i);
901 if (coarseMap->getLocalNumElements() > 0)
902 coarseNS[i] = coarseNullspace->getDataNonConst(i);
905 size_t nnzEstimate = numRows * NSDim;
908 Ptentative = rcp(
new CrsMatrixWrap(rowMap, coarseMap, 0));
909 RCP<CrsMatrix> PtentCrs = rcp_dynamic_cast<CrsMatrixWrap>(Ptentative)->getCrsMatrix();
911 ArrayRCP<size_t> iaPtent;
912 ArrayRCP<LO> jaPtent;
913 ArrayRCP<SC> valPtent;
915 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
917 ArrayView<size_t> ia = iaPtent();
918 ArrayView<LO> ja = jaPtent();
919 ArrayView<SC> val = valPtent();
922 for (
size_t i = 1; i <= numRows; i++)
923 ia[i] = ia[i-1] + NSDim;
925 for (
size_t j = 0; j < nnzEstimate; j++) {
935 for (GO agg = 0; agg < numAggs; agg++) {
936 LO aggSize = aggStart[agg+1] - aggStart[agg];
938 Xpetra::global_size_t offset = agg*NSDim;
943 Teuchos::SerialDenseMatrix<LO,SC> localQR(aggSize, NSDim);
945 for (
size_t j = 0; j < NSDim; j++)
946 for (LO k = 0; k < aggSize; k++)
947 localQR(k,j) = fineNS[j][aggToRowMapLO[aggStart[agg]+k]];
949 for (
size_t j = 0; j < NSDim; j++)
950 for (LO k = 0; k < aggSize; k++)
951 localQR(k,j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+k])];
955 for (
size_t j = 0; j < NSDim; j++) {
956 bool bIsZeroNSColumn =
true;
958 for (LO k = 0; k < aggSize; k++)
959 if (localQR(k,j) != zero)
960 bIsZeroNSColumn =
false;
963 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
968 if (aggSize >= Teuchos::as<LO>(NSDim)) {
972 Magnitude norm = STS::magnitude(zero);
973 for (
size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
974 norm += STS::magnitude(localQR(k,0)*localQR(k,0));
975 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
978 coarseNS[0][offset] = norm;
981 for (LO i = 0; i < aggSize; i++)
982 localQR(i,0) /= norm;
985 Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
986 qrSolver.setMatrix(Teuchos::rcp(&localQR,
false));
990 for (
size_t j = 0; j < NSDim; j++)
991 for (
size_t k = 0; k <= j; k++)
992 coarseNS[j][offset+k] = localQR(k,j);
997 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
998 for (
size_t j = 0; j < NSDim; j++)
999 for (
size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
1000 localQR(i,j) = (*qFactor)(i,j);
1031 for (
size_t j = 0; j < NSDim; j++)
1032 for (
size_t k = 0; k < NSDim; k++)
1033 if (k < as<size_t>(aggSize))
1034 coarseNS[j][offset+k] = localQR(k,j);
1036 coarseNS[j][offset+k] = (k == j ? one : zero);
1039 for (
size_t i = 0; i < as<size_t>(aggSize); i++)
1040 for (
size_t j = 0; j < NSDim; j++)
1041 localQR(i,j) = (j == i ? one : zero);
1047 for (LO j = 0; j < aggSize; j++) {
1048 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg]+j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]));
1050 size_t rowStart = ia[localRow];
1051 for (
size_t k = 0, lnnz = 0; k < NSDim; k++) {
1053 if (localQR(j,k) != zero) {
1054 ja [rowStart+lnnz] = offset + k;
1055 val[rowStart+lnnz] = localQR(j,k);
1065 GetOStream(
Warnings0) <<
"TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1075 for (GO agg = 0; agg < numAggs; agg++) {
1076 const LO aggSize = aggStart[agg+1] - aggStart[agg];
1077 Xpetra::global_size_t offset = agg*NSDim;
1081 for (LO j = 0; j < aggSize; j++) {
1086 const LO localRow = aggToRowMapLO[aggStart[agg]+j];
1088 const size_t rowStart = ia[localRow];
1090 for (
size_t k = 0, lnnz = 0; k < NSDim; k++) {
1092 SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg]+j]];
1093 if(constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1094 if (qr_jk != zero) {
1095 ja [rowStart+lnnz] = offset + k;
1096 val[rowStart+lnnz] = qr_jk;
1101 for (
size_t j = 0; j < NSDim; j++)
1102 coarseNS[j][offset+j] = one;
1106 for (GO agg = 0; agg < numAggs; agg++) {
1107 const LO aggSize = aggStart[agg+1] - aggStart[agg];
1108 Xpetra::global_size_t offset = agg*NSDim;
1109 for (LO j = 0; j < aggSize; j++) {
1111 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]);
1113 const size_t rowStart = ia[localRow];
1115 for (
size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1117 SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j])];
1118 if(constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1119 if (qr_jk != zero) {
1120 ja [rowStart+lnnz] = offset + k;
1121 val[rowStart+lnnz] = qr_jk;
1126 for (
size_t j = 0; j < NSDim; j++)
1127 coarseNS[j][offset+j] = one;
1136 size_t ia_tmp = 0, nnz = 0;
1137 for (
size_t i = 0; i < numRows; i++) {
1138 for (
size_t j = ia_tmp; j < ia[i+1]; j++)
1139 if (ja[j] != INVALID) {
1147 if (rowMap->lib() == Xpetra::UseTpetra) {
1151 jaPtent .resize(nnz);
1152 valPtent.resize(nnz);
1155 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1157 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1161 RCP<ParameterList> FCparams;
1162 if(pL.isSublist(
"matrixmatrix: kernel params"))
1163 FCparams=rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
1165 FCparams= rcp(
new ParameterList);
1167 FCparams->set(
"compute global constants",FCparams->get(
"compute global constants",
false));
1168 std::string levelIDs =
toString(levelID);
1169 FCparams->set(
"Timer Label",std::string(
"MueLu::TentativeP-")+levelIDs);
1170 RCP<const Export> dummy_e;
1171 RCP<const Import> dummy_i;
1173 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(),dummy_i,dummy_e,FCparams);