119 if (
Get<bool>(currentLevel,
"Filtering") ==
false) {
120 GetOStream(
Runtime0) <<
"Filtered matrix is not being constructed as no filtering is being done" << std::endl;
121 Set(currentLevel,
"A", A);
126 bool lumping = pL.get<
bool>(
"filtered matrix: use lumping");
130 bool use_spread_lumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
131 if (use_spread_lumping && (!lumping) )
132 throw std::runtime_error(
"Must also request 'filtered matrix: use lumping' in order to use spread lumping");
134 if (use_spread_lumping) {
138 double DdomAllowGrowthRate = 1.1;
139 double DdomCap = 2.0;
140 if (use_spread_lumping) {
141 DdomAllowGrowthRate = pL.get<
double>(
"filtered matrix: spread lumping diag dom growth factor");
142 DdomCap = pL.get<
double>(
"filtered matrix: spread lumping diag dom cap");
144 bool use_root_stencil = lumping && pL.get<
bool>(
"filtered matrix: use root stencil");
145 if (use_root_stencil)
147 double dirichlet_threshold = pL.get<
double>(
"filtered matrix: Dirichlet threshold");
148 if(dirichlet_threshold >= 0.0)
149 GetOStream(
Runtime0) <<
"Filtering Dirichlet threshold of "<<dirichlet_threshold << std::endl;
151 if(use_root_stencil || pL.get<
bool>(
"filtered matrix: reuse graph"))
160 FILE * f = fopen(
"graph.dat",
"w");
161 size_t numGRows = G->GetNodeNumVertices();
162 for (
size_t i = 0; i < numGRows; i++) {
164 ArrayView<const LO> indsG = G->getNeighborVertices(i);
165 for(
size_t j=0; j<(size_t)indsG.size(); j++) {
166 fprintf(f,
"%d %d 1.0\n",(
int)i,(
int)indsG[j]);
172 RCP<ParameterList> fillCompleteParams(
new ParameterList);
173 fillCompleteParams->set(
"No Nonlocal Changes",
true);
175 RCP<Matrix> filteredA;
176 if(use_root_stencil) {
177 filteredA = MatrixFactory::Build(A->getCrsGraph());
178 filteredA->fillComplete(fillCompleteParams);
179 filteredA->resumeFill();
180 BuildNewUsingRootStencil(*A, *G, dirichlet_threshold, currentLevel,*filteredA, use_spread_lumping,DdomAllowGrowthRate, DdomCap);
181 filteredA->fillComplete(fillCompleteParams);
184 else if (pL.get<
bool>(
"filtered matrix: reuse graph")) {
185 filteredA = MatrixFactory::Build(A->getCrsGraph());
186 filteredA->resumeFill();
187 BuildReuse(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold,*filteredA);
192 filteredA->fillComplete(fillCompleteParams);
196 filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries());
197 BuildNew(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold,*filteredA);
201 filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams);
208 Xpetra::IO<SC,LO,GO,NO>::Write(
"filteredA.dat", *filteredA);
211 Xpetra::IO<SC,LO,GO,NO>::Write(
"A.dat", *A);
212 RCP<Matrix> origFilteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries());
213 BuildNew(*A, *G, lumping, dirichlet_threshold,*origFilteredA);
214 if (use_spread_lumping)
ExperimentalLumping(*A, *origFilteredA, DdomAllowGrowthRate, DdomCap);
215 origFilteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams);
216 Xpetra::IO<SC,LO,GO,NO>::Write(
"origFilteredA.dat", *origFilteredA);
220 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
222 if (pL.get<
bool>(
"filtered matrix: reuse eigenvalue")) {
227 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
230 Set(currentLevel,
"A", filteredA);
250 BuildReuse(
const Matrix& A,
const GraphBase& G,
const bool lumping,
double dirichletThresh, Matrix& filteredA)
const {
251 using TST =
typename Teuchos::ScalarTraits<SC>;
252 SC zero = TST::zero();
255 size_t blkSize = A.GetFixedBlockSize();
257 ArrayView<const LO> inds;
258 ArrayView<const SC> valsA;
259#ifdef ASSUME_DIRECT_ACCESS_TO_ROW
265 Array<char> filter( std::max(blkSize*G.
GetImportMap()->getLocalNumElements(),
266 A.getColMap()->getLocalNumElements()),
270 for (
size_t i = 0; i < numGRows; i++) {
273 for (
size_t j = 0; j < as<size_t>(indsG.size()); j++)
274 for (
size_t k = 0; k < blkSize; k++)
275 filter[indsG[j]*blkSize+k] = 1;
277 for (
size_t k = 0; k < blkSize; k++) {
278 LO row = i*blkSize + k;
280 A.getLocalRowView(row, inds, valsA);
282 size_t nnz = inds.size();
286#ifdef ASSUME_DIRECT_ACCESS_TO_ROW
288 ArrayView<const SC> vals1;
289 filteredA.getLocalRowView(row, inds, vals1);
290 vals = ArrayView<SC>(
const_cast<SC*
>(vals1.getRawPtr()), nnz);
292 memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz*
sizeof(SC));
294 vals = Array<SC>(valsA);
297 SC ZERO = Teuchos::ScalarTraits<SC>::zero();
299 SC A_rowsum = ZERO, F_rowsum = ZERO;
300 for(LO l = 0; l < (LO)inds.size(); l++)
301 A_rowsum += valsA[l];
303 if (lumping ==
false) {
304 for (
size_t j = 0; j < nnz; j++)
305 if (!filter[inds[j]])
312 for (
size_t j = 0; j < nnz; j++) {
313 if (filter[inds[j]]) {
314 if (inds[j] == row) {
321 diagExtra += vals[j];
331 if (diagIndex != -1) {
333 vals[diagIndex] += diagExtra;
334 if(dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) {
337 for(LO l = 0; l < (LO)nnz; l++)
340 vals[diagIndex] = TST::one();
346#ifndef ASSUME_DIRECT_ACCESS_TO_ROW
349 filteredA.replaceLocalValues(row, inds, vals);
354 for (
size_t j = 0; j < as<size_t> (indsG.size()); j++)
355 for (
size_t k = 0; k < blkSize; k++)
356 filter[indsG[j]*blkSize+k] = 0;
460 BuildNewUsingRootStencil(
const Matrix& A,
const GraphBase& G,
double dirichletThresh,
Level& currentLevel, Matrix& filteredA,
bool use_spread_lumping,
double DdomAllowGrowthRate,
double DdomCap)
const {
461 using TST =
typename Teuchos::ScalarTraits<SC>;
462 using Teuchos::arcp_const_cast;
463 SC ZERO = Teuchos::ScalarTraits<SC>::zero();
464 SC ONE = Teuchos::ScalarTraits<SC>::one();
465 LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
468 size_t blkSize = A.GetFixedBlockSize();
469 size_t numRows = A.getMap()->getLocalNumElements();
470 ArrayView<const LO> indsA;
471 ArrayView<const SC> valsA;
472 ArrayRCP<const size_t> rowptr;
473 ArrayRCP<const LO> inds;
474 ArrayRCP<const SC> vals_const;
480 RCP<CrsMatrix> filteredAcrs =
dynamic_cast<const CrsMatrixWrap*
>(&filteredA)->getCrsMatrix();
481 filteredAcrs->getAllValues(rowptr,inds,vals_const);
482 vals = arcp_const_cast<SC>(vals_const);
483 Array<bool> vals_dropped_indicator(vals.size(),
false);
488 LO numAggs = aggregates->GetNumAggregates();
491 RCP<const Map> rowMap = A.getRowMap();
492 RCP<const Map> colMap = A.getColMap();
497 Array<LO> diagIndex(numRows,INVALID);
498 Array<SC> diagExtra(numRows,ZERO);
504 typename Aggregates::LO_view ptr, nodes, unaggregated;
505 typename Aggregates::LO_view::HostMirror ptr_h, nodes_h, unaggregated_h;
507 aggregates->ComputeNodesInAggregate(nodesInAgg.ptr, nodesInAgg.nodes, nodesInAgg.unaggregated);
508 nodesInAgg.ptr_h = Kokkos::create_mirror_view(nodesInAgg.ptr);
509 nodesInAgg.nodes_h = Kokkos::create_mirror_view(nodesInAgg.nodes);
510 nodesInAgg.unaggregated_h = Kokkos::create_mirror_view(nodesInAgg.unaggregated);
511 Kokkos::deep_copy(nodesInAgg.ptr_h, nodesInAgg.ptr);
512 Kokkos::deep_copy(nodesInAgg.nodes_h, nodesInAgg.nodes);
513 Kokkos::deep_copy(nodesInAgg.unaggregated_h, nodesInAgg.unaggregated);
514 Teuchos::ArrayRCP<const LO> vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
516 LO graphNumCols = G.
GetImportMap()->getLocalNumElements();
517 Array<bool> filter(graphNumCols,
false);
520 for(LO i=0; i< (LO)nodesInAgg.unaggregated_h.extent(0); i++) {
521 for (LO m = 0; m < (LO)blkSize; m++) {
522 LO row = amalgInfo->ComputeLocalDOF(nodesInAgg.unaggregated_h(i),m);
523 if (row >= (LO)numRows)
continue;
524 size_t index_start = rowptr[row];
525 A.getLocalRowView(row, indsA, valsA);
526 for(LO k=0; k<(LO)indsA.size(); k++) {
527 if(row == indsA[k]) {
528 vals[index_start+k] = ONE;
532 vals[index_start+k] = ZERO;
538 std::vector<LO> badCount(numAggs,0);
542 for(LO i=0; i<numAggs; i++)
543 maxAggSize = std::max(maxAggSize,nodesInAgg.ptr_h(i+1) - nodesInAgg.ptr_h(i));
550 size_t numNewDrops=0;
551 size_t numOldDrops=0;
552 size_t numFixedDiags=0;
553 size_t numSymDrops = 0;
555 for(LO i=0; i<numAggs; i++) {
556 LO numNodesInAggregate = nodesInAgg.ptr_h(i+1) - nodesInAgg.ptr_h(i);
557 if(numNodesInAggregate == 0)
continue;
560 LO root_node = INVALID;
561 for (LO k=nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i+1); k++) {
562 if(aggregates->IsRoot(nodesInAgg.nodes_h(k))) {
563 root_node = nodesInAgg.nodes_h(k);
break;
567 TEUCHOS_TEST_FOR_EXCEPTION(root_node == INVALID,
574 goodAggNeighbors.resize(0);
575 for(LO k=0; k<(LO) goodNodeNeighbors.size(); k++) {
576 goodAggNeighbors.push_back(vertex2AggId[goodNodeNeighbors[k]]);
583 badAggNeighbors.resize(0);
584 for(LO j = 0; j < (LO)blkSize; j++) {
585 LO row = amalgInfo->ComputeLocalDOF(root_node,j);
586 if (row >= (LO)numRows)
continue;
587 A.getLocalRowView(row, indsA, valsA);
588 for(LO k=0; k<(LO)indsA.size(); k++) {
589 if ( (indsA[k] < (LO)numRows) && (TST::magnitude(valsA[k]) != TST::magnitude(ZERO))) {
590 LO node = amalgInfo->ComputeLocalNode(indsA[k]);
591 LO agg = vertex2AggId[node];
592 if(!std::binary_search(goodAggNeighbors.begin(),goodAggNeighbors.end(),agg))
593 badAggNeighbors.push_back(agg);
602 for (LO k=nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i+1); k++) {
604 for (LO kk=0; kk < nodeNeighbors.size(); kk++) {
605 if ( (vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs))
606 (badCount[vertex2AggId[nodeNeighbors[kk]]])++;
610 reallyBadAggNeighbors.resize(0);
611 for (LO k=0; k < (LO) badAggNeighbors.size(); k++) {
612 if (badCount[badAggNeighbors[k]] <= 1 ) reallyBadAggNeighbors.push_back(badAggNeighbors[k]);
614 for (LO k=nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i+1); k++) {
616 for (LO kk=0; kk < nodeNeighbors.size(); kk++) {
617 if ( (vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs))
618 badCount[vertex2AggId[nodeNeighbors[kk]]] = 0;
624 for(LO b=0; b<(LO)reallyBadAggNeighbors.size(); b++) {
625 LO bad_agg = reallyBadAggNeighbors[b];
626 for (LO k=nodesInAgg.ptr_h(bad_agg); k < nodesInAgg.ptr_h(bad_agg+1); k++) {
627 LO bad_node = nodesInAgg.nodes_h(k);
628 for(LO j = 0; j < (LO)blkSize; j++) {
629 LO bad_row = amalgInfo->ComputeLocalDOF(bad_node,j);
630 if (bad_row >= (LO)numRows)
continue;
631 size_t index_start = rowptr[bad_row];
632 A.getLocalRowView(bad_row, indsA, valsA);
633 for(LO l = 0; l < (LO)indsA.size(); l++) {
634 if(indsA[l] < (LO)numRows && vertex2AggId[amalgInfo->ComputeLocalNode(indsA[l])] == i && vals_dropped_indicator[index_start+l] ==
false) {
635 vals_dropped_indicator[index_start + l] =
true;
636 vals[index_start + l] = ZERO;
637 diagExtra[bad_row] += valsA[l];
648 for(LO k=nodesInAgg.ptr_h(i); k<nodesInAgg.ptr_h(i+1); k++) {
649 LO row_node = nodesInAgg.nodes_h(k);
653 for (
size_t j = 0; j < as<size_t>(indsG.size()); j++)
654 filter[indsG[j]]=
true;
656 for (LO m = 0; m < (LO)blkSize; m++) {
657 LO row = amalgInfo->ComputeLocalDOF(row_node,m);
658 if (row >= (LO)numRows)
continue;
659 size_t index_start = rowptr[row];
660 A.getLocalRowView(row, indsA, valsA);
662 for(LO l = 0; l < (LO)indsA.size(); l++) {
663 int col_node = amalgInfo->ComputeLocalNode(indsA[l]);
664 bool is_good = filter[col_node];
665 if (indsA[l] == row) {
667 vals[index_start + l] = valsA[l];
672 if(vals_dropped_indicator[index_start +l] ==
true) {
673 if(is_good) numOldDrops++;
681 if(is_good && indsA[l] < (LO)numRows) {
682 int agg = vertex2AggId[col_node];
683 if(std::binary_search(reallyBadAggNeighbors.begin(),reallyBadAggNeighbors.end(),agg))
688 vals[index_start+l] = valsA[l];
691 if(!filter[col_node]) numOldDrops++;
693 diagExtra[row] += valsA[l];
694 vals[index_start+l]=ZERO;
695 vals_dropped_indicator[index_start+l]=
true;
702 for (
size_t j = 0; j < as<size_t>(indsG.size()); j++)
703 filter[indsG[j]]=
false;
708 if (!use_spread_lumping) {
710 for(LO row=0; row <(LO)numRows; row++) {
711 if (diagIndex[row] != INVALID) {
712 size_t index_start = rowptr[row];
713 size_t diagIndexInMatrix = index_start + diagIndex[row];
715 vals[diagIndexInMatrix] += diagExtra[row];
716 SC A_rowsum=ZERO, A_absrowsum = ZERO, F_rowsum = ZERO;
719 if( (dirichletThresh >= 0.0 && TST::real(vals[diagIndexInMatrix]) <= dirichletThresh) || TST::real(vals[diagIndexInMatrix]) == ZERO) {
722 A.getLocalRowView(row, indsA, valsA);
726 for(LO l = 0; l < (LO)indsA.size(); l++) {
727 A_rowsum += valsA[l];
728 A_absrowsum+=std::abs(valsA[l]);
730 for(LO l = 0; l < (LO)indsA.size(); l++)
731 F_rowsum += vals[index_start+l];
745 for(
size_t l=rowptr[row]; l<rowptr[row+1]; l++) {
748 vals[diagIndexInMatrix] = TST::one();
759 for(LO row=0; row<(LO)numRows; row++) {
760 filteredA.replaceLocalValues(row, inds(rowptr[row],rowptr[row+1]-rowptr[row]), vals(rowptr[row],rowptr[row+1]-rowptr[row]));
764 size_t g_newDrops = 0, g_oldDrops = 0, g_fixedDiags=0;
766 MueLu_sumAll(A.getRowMap()->getComm(), numNewDrops, g_newDrops);
767 MueLu_sumAll(A.getRowMap()->getComm(), numOldDrops, g_oldDrops);
768 MueLu_sumAll(A.getRowMap()->getComm(), numFixedDiags, g_fixedDiags);
769 GetOStream(
Runtime0)<<
"Filtering out "<<g_newDrops<<
" edges, in addition to the "<<g_oldDrops<<
" edges dropped earlier"<<std::endl;
787 using TST =
typename Teuchos::ScalarTraits<SC>;
788 SC zero = TST::zero();
791 ArrayView<const LO> inds;
792 ArrayView<const SC> vals;
793 ArrayView<const LO> finds;
796 SC PosOffSum, NegOffSum, PosOffDropSum, NegOffDropSum;
797 SC diag, gamma, alpha;
798 LO NumPosKept, NumNegKept;
802 SC PosFilteredSum, NegFilteredSum;
805 SC rho = as<Scalar>(irho);
806 SC rho2 = as<Scalar>(irho2);
808 for (LO row = 0; row < (LO) A.getRowMap()->getLocalNumElements(); row++) {
809 noLumpDdom = as<Scalar>(10000.0);
822 ArrayView<const SC> tvals;
823 A.getLocalRowView(row, inds, vals);
824 size_t nnz = inds.size();
825 if (nnz == 0)
continue;
826 filteredA.getLocalRowView(row, finds, tvals);
828 fvals = ArrayView<SC>(
const_cast<SC*
>(tvals.getRawPtr()), nnz);
830 LO diagIndex = -1, fdiagIndex = -1;
832 PosOffSum=zero; NegOffSum=zero; PosOffDropSum=zero; NegOffDropSum=zero;
833 diag=zero; NumPosKept=0; NumNegKept=0;
836 for (
size_t j = 0; j < nnz; j++) {
837 if (inds[j] == row) {
842 if (TST::real(vals[j]) > TST::real(zero) ) PosOffSum += vals[j];
843 else NegOffSum += vals[j];
846 PosOffDropSum = PosOffSum;
847 NegOffDropSum = NegOffSum;
851 for (
size_t jj = 0; jj < (size_t) finds.size(); jj++) {
852 while( inds[j] != finds[jj] ) j++;
854 if (finds[jj] == row) fdiagIndex = jj;
856 if (TST::real(vals[j]) > TST::real(zero) ) {
857 PosOffDropSum -= fvals[jj];
858 if (TST::real(fvals[jj]) != TST::real(zero) ) NumPosKept++;
861 NegOffDropSum -= fvals[jj];
862 if (TST::real(fvals[jj]) != TST::real(zero) ) NumNegKept++;
868 if (TST::magnitude(diag) != TST::magnitude(zero) )
869 noLumpDdom = (PosOffSum - NegOffSum)/diag;
874 Target = rho*noLumpDdom;
875 if (TST::magnitude(Target) <= TST::magnitude(rho)) Target = rho2;
877 PosFilteredSum = PosOffSum - PosOffDropSum;
878 NegFilteredSum = NegOffSum - NegOffDropSum;
888 diag += PosOffDropSum;
891 gamma = -NegOffDropSum - PosFilteredSum;
893 if (TST::real(gamma) < TST::real(zero) ) {
901 if (fdiagIndex != -1) fvals[fdiagIndex] = diag;
903 for(LO jj = 0; jj < (LO)finds.size(); jj++) {
904 while( inds[j] != finds[jj] ) j++;
906 if ((j != diagIndex)&&(TST::real(vals[j]) > TST::real(zero) ) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)))
907 fvals[jj] = -gamma*(vals[j]/PosFilteredSum);
921 bool flipPosOffDiagsToNeg =
false;
933 if (( TST::real(diag) > TST::real(gamma)) &&
934 ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) {
941 if (fdiagIndex != -1) fvals[fdiagIndex] = diag - gamma;
943 else if (NumNegKept > 0) {
949 numer = -NegFilteredSum - Target*(diag-gamma);
950 denom = gamma*(Target - TST::one());
972 if ( TST::magnitude(denom) < TST::magnitude(numer) ) alpha = TST::one();
973 else alpha = numer/denom;
974 if ( TST::real(alpha) < TST::real(zero)) alpha = zero;
975 if ( TST::real(diag) < TST::real((one-alpha)*gamma) ) alpha = TST::one();
979 if (fdiagIndex != -1) fvals[fdiagIndex] = diag - (one-alpha)*gamma;
989 SC temp = (NegFilteredSum+alpha*gamma)/NegFilteredSum;
991 for(LO jj = 0; jj < (LO)finds.size(); jj++) {
992 while( inds[j] != finds[jj] ) j++;
994 if ( (jj != fdiagIndex)&&(TST::magnitude(fvals[jj]) != TST::magnitude(zero) ) &&
995 ( TST::real(vals[j]) < TST::real(zero) ) )
996 fvals[jj] = temp*vals[j];
1002 if (NumPosKept > 0) {
1005 flipPosOffDiagsToNeg =
true;
1008 for(LO jj = 0; jj < (LO)finds.size(); jj++) {
1009 while( inds[j] != finds[jj] ) j++;
1011 if ( (j != diagIndex)&&(TST::magnitude(fvals[jj]) != TST::magnitude(zero) ) &&
1012 (TST::real(vals[j]) > TST::real(zero) ))
1013 fvals[jj] = -gamma/( (SC) NumPosKept);
1019 if (!flipPosOffDiagsToNeg) {
1024 for(LO jj = 0; jj < (LO)finds.size(); jj++) {
1025 while( inds[j] != finds[jj] ) j++;
1027 if ((jj != fdiagIndex)&& (TST::real(vals[j]) > TST::real(zero))) fvals[jj] = zero;