46 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
47 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
49 #include <Xpetra_Map.hpp>
50 #include <Xpetra_Vector.hpp>
51 #include <Xpetra_MultiVectorFactory.hpp>
52 #include <Xpetra_MapFactory.hpp>
53 #include <Xpetra_VectorFactory.hpp>
55 #include "KokkosKernels_Handle.hpp"
56 #include "KokkosSparse_spgemm.hpp"
57 #include "KokkosSparse_spmv.hpp"
61 #include "MueLu_Aggregates.hpp"
67 #include "MueLu_Utilities.hpp"
69 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
70 #include "MueLu_Utilities_kokkos.hpp"
75 namespace NotayUtils {
76 template <
class LocalOrdinal>
78 return min + as<LocalOrdinal>((max-min+1) * (
static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
81 template <
class LocalOrdinal>
84 LO n = Teuchos::as<LO>(list.size());
85 for(LO i = 0; i < n-1; i++)
90 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
92 RCP<ParameterList> validParamList = rcp(
new ParameterList());
94 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
96 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
102 #undef SET_VALID_ENTRY
105 validParamList->set< RCP<const FactoryBase> >(
"A",
null,
"Generating factory of the matrix");
106 validParamList->set< RCP<const FactoryBase> >(
"Graph",
null,
"Generating factory of the graph");
107 validParamList->set< RCP<const FactoryBase> >(
"DofsPerNode",
null,
"Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
108 validParamList->set< RCP<const FactoryBase> >(
"AggregateQualities",
null,
"Generating factory for variable \'AggregateQualities\'");
111 return validParamList;
114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
116 const ParameterList& pL = GetParameterList();
118 Input(currentLevel,
"A");
119 Input(currentLevel,
"Graph");
120 Input(currentLevel,
"DofsPerNode");
121 if (pL.get<
bool>(
"aggregation: compute aggregate qualities")) {
122 Input(currentLevel,
"AggregateQualities");
129 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
132 using STS = Teuchos::ScalarTraits<Scalar>;
133 using MT =
typename STS::magnitudeType;
135 const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
137 RCP<Teuchos::FancyOStream> out;
138 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
139 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
140 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
142 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
145 const ParameterList& pL = GetParameterList();
147 const MT kappa =
static_cast<MT
>(pL.get<
double>(
"aggregation: Dirichlet threshold"));
148 TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
150 "Pairwise requires kappa > 2"
151 " otherwise all rows are considered as Dirichlet rows.");
155 if (pL.isParameter(
"aggregation: pairwise: size"))
156 maxNumIter = pL.get<
int>(
"aggregation: pairwise: size");
157 TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
159 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
160 " must be a strictly positive integer");
163 RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel,
"Graph");
164 RCP<const Matrix> A = Get< RCP<Matrix> >(currentLevel,
"A");
168 RCP<Aggregates> aggregates = rcp(
new Aggregates(*graph));
169 aggregates->setObjectLabel(
"PW");
171 const LO numRows = graph->GetNodeNumVertices();
174 std::vector<unsigned> aggStat(numRows,
READY);
177 const int DofsPerNode = Get<int>(currentLevel,
"DofsPerNode");
179 "Pairwise only supports one dof per node");
186 std::string orderingStr = pL.get<std::string>(
"aggregation: ordering");
193 ordering = O_NATURAL;
194 if (orderingStr ==
"random" ) ordering = O_RANDOM;
195 else if(orderingStr ==
"natural") {}
196 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
197 else if(orderingStr ==
"cuthill-mckee" || orderingStr ==
"cm") ordering = O_CUTHILL_MCKEE;
207 Array<LO> orderingVector(numRows);
208 for (LO i = 0; i < numRows; i++)
209 orderingVector[i] = i;
210 if (ordering == O_RANDOM)
212 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
213 else if (ordering == O_CUTHILL_MCKEE) {
215 auto localVector = rcmVector->getData(0);
216 for (LO i = 0; i < numRows; i++)
217 orderingVector[i] = localVector[i];
222 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
223 BuildInitialAggregates(pL, A, orderingVector(), kappa,
224 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
226 "Initial pairwise aggregation failed to aggregate all nodes");
227 LO numLocalAggregates = aggregates->GetNumAggregates();
228 GetOStream(
Statistics0) <<
"Init : " << numLocalAggregates <<
" - "
229 << A->getNodeNumRows() / numLocalAggregates << std::endl;
238 LO numLocalDirichletNodes = numDirichletNodes;
239 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
240 BuildOnRankLocalMatrix(A->getLocalMatrix(), coarseLocalA);
241 for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
243 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
244 localVertex2AggId(), intermediateP);
247 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
252 std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
253 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
254 for(LO i=0; i<(LO)numRows; i++) {
257 LO agg=vertex2AggId[i];
258 agg2vertex[agg].push_back(i);
261 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
262 for(LO i = 0; i < numRows; i++) {
266 int agg = vertex2AggId[i];
267 std::vector<LO> & myagg = agg2vertex[agg];
269 size_t nnz = A->getNumEntriesInLocalRow(i);
270 ArrayView<const LO> indices;
271 ArrayView<const SC> vals;
272 A->getLocalRowView(i, indices, vals);
274 SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
275 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
277 if(indices[colidx] < numRows) {
278 for(LO j=0; j<(LO)myagg.size(); j++)
279 if (vertex2AggId[indices[colidx]] == agg)
283 *out <<
"- ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
284 mysum += vals[colidx];
287 *out <<
"- NOT ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
291 rowSum_h[agg] = mysum;
297 Array<LO> localOrderingVector(numRows);
298 for (LO i = 0; i < numRows; i++)
299 localOrderingVector[i] = i;
300 if (ordering == O_RANDOM)
302 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
303 else if (ordering == O_CUTHILL_MCKEE) {
305 auto localVector = rcmVector->getData(0);
306 for (LO i = 0; i < numRows; i++)
307 localOrderingVector[i] = localVector[i];
312 numLocalAggregates = 0;
313 numNonAggregatedNodes =
static_cast<LO
>(coarseLocalA.numRows());
314 std::vector<LO> localAggStat(numNonAggregatedNodes,
READY);
315 localVertex2AggId.resize(numNonAggregatedNodes, -1);
316 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
317 localAggStat, localVertex2AggId,
318 numLocalAggregates, numNonAggregatedNodes);
322 numLocalDirichletNodes = 0;
325 RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
326 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
327 for(
size_t vertexIdx = 0; vertexIdx < A->getNodeNumRows(); ++vertexIdx) {
328 LO oldAggIdx = vertex2AggId[vertexIdx];
329 if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
330 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
335 GetOStream(
Statistics0) <<
"Iter " << aggregationIter <<
": " << numLocalAggregates <<
" - "
336 << A->getNodeNumRows() / numLocalAggregates << std::endl;
338 aggregates->SetNumAggregates(numLocalAggregates);
339 aggregates->AggregatesCrossProcessors(
false);
340 aggregates->ComputeAggregateSizes(
true);
343 Set(currentLevel,
"Aggregates", aggregates);
344 GetOStream(
Statistics0) << aggregates->description() << std::endl;
348 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
351 const RCP<const Matrix>& A,
352 const Teuchos::ArrayView<const LO> & orderingVector,
353 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
355 std::vector<unsigned>& aggStat,
356 LO& numNonAggregatedNodes,
357 LO& numDirichletNodes)
const {
359 Monitor m(*
this,
"BuildInitialAggregates");
360 using STS = Teuchos::ScalarTraits<Scalar>;
361 using MT =
typename STS::magnitudeType;
362 using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
364 RCP<Teuchos::FancyOStream> out;
365 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
366 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
367 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
369 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
373 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
374 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
375 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
376 const MT MT_TWO = MT_ONE + MT_ONE;
377 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
378 const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
380 const MT kappa_init = kappa / (kappa - MT_TWO);
381 const LO numRows = aggStat.size();
382 const int myRank = A->getMap()->getComm()->getRank();
386 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
387 double tie_less = 1.0 - tie_criterion;
388 double tie_more = 1.0 + tie_criterion;
399 const ArrayRCP<const SC> D = ghostedDiag->getData(0);
400 const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
401 const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
404 ArrayRCP<LO> vertex2AggId_rcp = aggregates.
GetVertex2AggId()->getDataNonConst(0);
405 ArrayRCP<LO> procWinner_rcp = aggregates.
GetProcWinner() ->getDataNonConst(0);
406 ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
407 ArrayView<LO> procWinner = procWinner_rcp();
413 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
414 MT aii = STS::magnitude(D[row]);
415 MT rowsum = AbsRs[row];
417 if(aii >= kappa_init * rowsum) {
418 *out <<
"Flagging index " << row <<
" as dirichlet "
419 "aii >= kappa*rowsum = " << aii <<
" >= " << kappa_init <<
" " << rowsum << std::endl;
421 --numNonAggregatedNodes;
428 LO aggIndex = LO_ZERO;
429 for(LO i = 0; i < numRows; i++) {
430 LO current_idx = orderingVector[i];
432 if(aggStat[current_idx] !=
READY)
435 MT best_mu = MT_ZERO;
436 LO best_idx = LO_INVALID;
438 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
439 ArrayView<const LO> indices;
440 ArrayView<const SC> vals;
441 A->getLocalRowView(current_idx, indices, vals);
443 MT aii = STS::real(D[current_idx]);
444 MT si = STS::real(S[current_idx]);
445 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
447 LO col = indices[colidx];
448 SC val = vals[colidx];
449 if(current_idx == col || aggStat[col] !=
READY || col > numRows || val == SC_ZERO)
452 MT aij = STS::real(val);
453 MT ajj = STS::real(D[col]);
454 MT sj = - STS::real(S[col]);
455 if(aii - si + ajj - sj >= MT_ZERO) {
457 MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
458 MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
459 MT mu = mu_top / mu_bottom;
462 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
463 (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
466 *out <<
"[" << current_idx <<
"] Column UPDATED " << col <<
": "
467 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
468 <<
" = " << aii - si + ajj - sj<<
", aij = "<<aij <<
", mu = " << mu << std::endl;
471 *out <<
"[" << current_idx <<
"] Column NOT UPDATED " << col <<
": "
472 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
473 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
477 *out <<
"[" << current_idx <<
"] Column FAILED " << col <<
": "
478 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
479 <<
" = " << aii - si + ajj - sj << std::endl;
483 if(best_idx == LO_INVALID) {
484 *out <<
"No node buddy found for index " << current_idx
485 <<
" [agg " << aggIndex <<
"]\n" << std::endl;
490 aggStat[current_idx] =
ONEPT;
491 vertex2AggId[current_idx] = aggIndex;
492 procWinner[current_idx] = myRank;
493 numNonAggregatedNodes--;
498 if(best_mu <= kappa) {
499 *out <<
"Node buddies (" << current_idx <<
"," << best_idx <<
") [agg " << aggIndex <<
"]" << std::endl;
503 vertex2AggId[current_idx] = aggIndex;
504 vertex2AggId[best_idx] = aggIndex;
505 procWinner[current_idx] = myRank;
506 procWinner[best_idx] = myRank;
507 numNonAggregatedNodes-=2;
511 *out <<
"No buddy found for index " << current_idx <<
","
512 " but aggregating as singleton [agg " << aggIndex <<
"]" << std::endl;
514 aggStat[current_idx] =
ONEPT;
515 vertex2AggId[current_idx] = aggIndex;
516 procWinner[current_idx] = myRank;
517 numNonAggregatedNodes--;
523 *out <<
"vertex2aggid :";
524 for(
int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
525 *out << i <<
"(" << vertex2AggId[i] <<
")";
533 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
536 const RCP<const Matrix>& A,
537 const Teuchos::ArrayView<const LO> & orderingVector,
538 const typename Matrix::local_matrix_type& coarseA,
539 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
540 const Kokkos::View<
typename Kokkos::ArithTraits<Scalar>::val_type*,
542 typename Matrix::local_matrix_type::device_type>& rowSum,
543 std::vector<LocalOrdinal>& localAggStat,
544 Teuchos::Array<LocalOrdinal>& localVertex2AggID,
545 LO& numLocalAggregates,
546 LO& numNonAggregatedNodes)
const {
547 Monitor m(*
this,
"BuildFurtherAggregates");
550 RCP<Teuchos::FancyOStream> out;
551 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
552 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
553 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
555 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
558 using value_type =
typename local_matrix_type::value_type;
559 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
560 const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
561 const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
563 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
567 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
568 double tie_less = 1.0 - tie_criterion;
569 double tie_more = 1.0 + tie_criterion;
571 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
577 const LO numRows =
static_cast<LO
>(coarseA.numRows());
578 typename local_matrix_type::values_type::HostMirror diagA_h(
"diagA host", numRows);
579 typename local_matrix_type::row_map_type::HostMirror row_map_h
580 = Kokkos::create_mirror_view(coarseA.graph.row_map);
582 typename local_matrix_type::index_type::HostMirror entries_h
583 = Kokkos::create_mirror_view(coarseA.graph.entries);
585 typename local_matrix_type::values_type::HostMirror values_h
586 = Kokkos::create_mirror_view(coarseA.values);
588 for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
589 for(LO entryIdx =
static_cast<LO
>(row_map_h(rowIdx));
590 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
592 if(rowIdx ==
static_cast<LO
>(entries_h(entryIdx))) {
593 diagA_h(rowIdx) = values_h(entryIdx);
598 for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
599 if(localAggStat[currentIdx] !=
READY) {
603 LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
604 magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
605 const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
606 const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
607 for(
auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
608 const LO colIdx =
static_cast<LO
>(entries_h(entryIdx));
609 if(currentIdx == colIdx || localAggStat[colIdx] !=
READY || values_h(entryIdx) == KAT_zero || colIdx > numRows) {
613 const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
614 const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
615 const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx));
616 if(aii - si + ajj - sj >= MT_zero) {
617 const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
618 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
622 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
623 (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
626 *out <<
"[" << currentIdx <<
"] Column UPDATED " << colIdx <<
": "
627 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
628 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
" mu = " << mu << std::endl;
631 *out <<
"[" << currentIdx <<
"] Column NOT UPDATED " << colIdx <<
": "
632 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
633 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
636 *out <<
"[" << currentIdx <<
"] Column FAILED " << colIdx <<
": "
637 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
638 <<
" = " << aii - si + ajj - sj << std::endl;
642 if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
643 localAggStat[currentIdx] =
ONEPT;
644 localVertex2AggID[currentIdx] = numLocalAggregates;
645 --numNonAggregatedNodes;
646 ++numLocalAggregates;
648 if(best_mu <= kappa) {
649 *out <<
"Node buddies (" << currentIdx <<
"," << bestIdx <<
") [agg " << numLocalAggregates <<
"]" << std::endl;
652 localVertex2AggID[currentIdx] = numLocalAggregates;
653 --numNonAggregatedNodes;
656 localVertex2AggID[bestIdx] = numLocalAggregates;
657 --numNonAggregatedNodes;
659 ++numLocalAggregates;
661 *out <<
"No buddy found for index " << currentIdx <<
","
662 " but aggregating as singleton [agg " << numLocalAggregates <<
"]" << std::endl;
664 localAggStat[currentIdx] =
ONEPT;
665 localVertex2AggID[currentIdx] = numLocalAggregates;
666 --numNonAggregatedNodes;
667 ++numLocalAggregates;
674 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
677 typename Matrix::local_matrix_type& onrankA)
const {
678 Monitor m(*
this,
"BuildOnRankLocalMatrix");
681 RCP<Teuchos::FancyOStream> out;
682 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
683 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
684 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
686 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
689 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
690 using values_type =
typename local_matrix_type::values_type;
691 using size_type =
typename local_graph_type::size_type;
692 using col_index_type =
typename local_graph_type::data_type;
693 using array_layout =
typename local_graph_type::array_layout;
694 using memory_traits =
typename local_graph_type::memory_traits;
701 const int numRows =
static_cast<int>(localA.numRows());
702 row_pointer_type rowPtr(
"onrankA row pointer", numRows + 1);
703 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
704 typename local_graph_type::row_map_type::HostMirror origRowPtr_h
705 = Kokkos::create_mirror_view(localA.graph.row_map);
706 typename local_graph_type::entries_type::HostMirror origColind_h
707 = Kokkos::create_mirror_view(localA.graph.entries);
708 typename values_type::HostMirror origValues_h
709 = Kokkos::create_mirror_view(localA.values);
716 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
717 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
718 if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
720 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
724 const LO nnzOnrankA = rowPtr_h(numRows);
727 col_indices_type colInd(
"onrankA column indices", rowPtr_h(numRows));
728 values_type values(
"onrankA values", rowPtr_h(numRows));
729 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
730 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
732 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
734 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
735 if(origColind_h(entryIdx) < numRows) {
736 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
737 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
746 nnzOnrankA, values, rowPtr, colInd);
749 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
754 const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
755 typename Matrix::local_matrix_type& intermediateP)
const {
756 Monitor m(*
this,
"BuildIntermediateProlongator");
759 RCP<Teuchos::FancyOStream> out;
760 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
761 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
762 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
764 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
767 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
768 using values_type =
typename local_matrix_type::values_type;
769 using size_type =
typename local_graph_type::size_type;
770 using col_index_type =
typename local_graph_type::data_type;
771 using array_layout =
typename local_graph_type::array_layout;
772 using memory_traits =
typename local_graph_type::memory_traits;
776 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
778 const int intermediatePnnz = numRows - numDirichletNodes;
779 row_pointer_type rowPtr(
"intermediateP row pointer", numRows + 1);
780 col_indices_type colInd(
"intermediateP column indices", intermediatePnnz);
781 values_type values(
"intermediateP values", intermediatePnnz);
782 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
783 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
786 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
788 if(localVertex2AggID[rowIdx] == LO_INVALID) {
789 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
791 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
792 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
798 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
801 numRows, numLocalAggregates, intermediatePnnz,
802 values, rowPtr, colInd);
805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
808 typename Matrix::local_matrix_type& coarseA)
const {
809 Monitor m(*
this,
"BuildCoarseLocalMatrix");
811 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
812 using values_type =
typename local_matrix_type::values_type;
813 using size_type =
typename local_graph_type::size_type;
814 using col_index_type =
typename local_graph_type::data_type;
815 using array_layout =
typename local_graph_type::array_layout;
816 using memory_traits =
typename local_graph_type::memory_traits;
821 localSpGEMM(coarseA, intermediateP,
"AP", AP);
834 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt row pointer"),
835 intermediateP.numCols() + 1);
836 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt column indices"),
837 intermediateP.nnz());
838 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt values"),
839 intermediateP.nnz());
841 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
843 for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
844 rowPtrPt_h(intermediateP.graph.entries(entryIdx) + 1) += 1;
846 for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
847 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
851 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
853 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
855 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
857 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
858 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
859 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
862 col_index_type colIdx = 0;
863 for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
864 for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
865 colIdx = intermediateP.graph.entries(entryIdxP);
866 for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
867 if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
868 colIndPt_h(entryIdxPt) = rowIdx;
869 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
880 intermediateP.numCols(),
881 intermediateP.numRows(),
883 valuesPt, rowPtrPt, colIndPt);
886 localSpGEMM(intermediatePt, AP,
"coarseA", coarseA);
889 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
891 localSpGEMM(
const typename Matrix::local_matrix_type& A,
892 const typename Matrix::local_matrix_type& B,
893 const std::string matrixLabel,
894 typename Matrix::local_matrix_type& C)
const {
896 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
897 using values_type =
typename local_matrix_type::values_type;
898 using size_type =
typename local_graph_type::size_type;
899 using col_index_type =
typename local_graph_type::data_type;
900 using array_layout =
typename local_graph_type::array_layout;
901 using memory_space =
typename device_type::memory_space;
902 using memory_traits =
typename local_graph_type::memory_traits;
907 int team_work_size = 16;
908 std::string myalg(
"SPGEMM_KK_MEMORY");
909 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
910 KokkosKernels::Experimental::KokkosKernelsHandle<
typename row_pointer_type::const_value_type,
911 typename col_indices_type::const_value_type,
912 typename values_type::const_value_type,
916 kh.create_spgemm_handle(alg_enum);
917 kh.set_team_work_size(team_work_size);
920 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing(
"C row pointer"),
922 col_indices_type colIndC;
926 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
927 B.numRows(), B.numCols(),
928 A.graph.row_map, A.graph.entries,
false,
929 B.graph.row_map, B.graph.entries,
false,
933 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
935 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing(
"C column inds"), nnzC);
936 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing(
"C values"), nnzC);
940 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
941 B.numRows(), B.numCols(),
942 A.graph.row_map, A.graph.entries, A.values,
false,
943 B.graph.row_map, B.graph.entries, B.values,
false,
944 rowPtrC, colIndC, valuesC);
945 kh.destroy_spgemm_handle();
947 C =
local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Container class for aggregation information.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
Timer to be used in non-factories.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void BuildInitialAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
void BuildFurtherAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
typename device_type::execution_space execution_space
typename Matrix::local_matrix_type local_matrix_type
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level ¤tLevel) const
Input.
void Build(Level ¤tLevel) const
Build aggregates.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
static RCP< Xpetra::Vector< Magnitude, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
void deep_copy(const View< DT, DP... > &dst, typename ViewTraits< DT, DP... >::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP... >::specialize, void >::value >::type *=nullptr)
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
Namespace for MueLu classes and methods.
@ Statistics0
Print statistics that do not involve significant additional computation.
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)