128 using STS = Teuchos::ScalarTraits<Scalar>;
129 using MT =
typename STS::magnitudeType;
131 const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
133 RCP<Teuchos::FancyOStream> out;
134 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
135 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
136 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
138 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
143 const MT kappa =
static_cast<MT
>(pL.get<
double>(
"aggregation: Dirichlet threshold"));
144 TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
146 "Pairwise requires kappa > 2"
147 " otherwise all rows are considered as Dirichlet rows.");
151 if (pL.isParameter(
"aggregation: pairwise: size"))
152 maxNumIter = pL.get<
int>(
"aggregation: pairwise: size");
153 TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
155 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
156 " must be a strictly positive integer");
163 RCP<Aggregates> aggregates = rcp(
new Aggregates(*graph));
164 aggregates->setObjectLabel(
"PW");
166 const LO numRows = graph->GetNodeNumVertices();
169 std::vector<unsigned> aggStat(numRows,
READY);
172 const int DofsPerNode =
Get<int>(currentLevel,
"DofsPerNode");
174 "Pairwise only supports one dof per node");
181 std::string orderingStr = pL.get<std::string>(
"aggregation: ordering");
188 ordering = O_NATURAL;
189 if (orderingStr ==
"random" ) ordering = O_RANDOM;
190 else if(orderingStr ==
"natural") {}
191 else if(orderingStr ==
"cuthill-mckee" || orderingStr ==
"cm") ordering = O_CUTHILL_MCKEE;
200 Array<LO> orderingVector(numRows);
201 for (LO i = 0; i < numRows; i++)
202 orderingVector[i] = i;
203 if (ordering == O_RANDOM)
205 else if (ordering == O_CUTHILL_MCKEE) {
207 auto localVector = rcmVector->getData(0);
208 for (LO i = 0; i < numRows; i++)
209 orderingVector[i] = localVector[i];
213 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
215 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
217 "Initial pairwise aggregation failed to aggregate all nodes");
218 LO numLocalAggregates = aggregates->GetNumAggregates();
220 << A->getLocalNumRows() / numLocalAggregates << std::endl;
229 LO numLocalDirichletNodes = numDirichletNodes;
230 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
232 for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
235 localVertex2AggId(), intermediateP);
243 std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
244 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
245 for(LO i=0; i<(LO)numRows; i++) {
248 LO agg=vertex2AggId[i];
249 agg2vertex[agg].push_back(i);
252 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
253 for(LO i = 0; i < numRows; i++) {
257 int agg = vertex2AggId[i];
258 std::vector<LO> & myagg = agg2vertex[agg];
260 size_t nnz = A->getNumEntriesInLocalRow(i);
261 ArrayView<const LO> indices;
262 ArrayView<const SC> vals;
263 A->getLocalRowView(i, indices, vals);
265 SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
266 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
268 if(indices[colidx] < numRows) {
269 for(LO j=0; j<(LO)myagg.size(); j++)
270 if (vertex2AggId[indices[colidx]] == agg)
274 *out <<
"- ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
275 mysum += vals[colidx];
278 *out <<
"- NOT ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
282 rowSum_h[agg] = mysum;
284 Kokkos::deep_copy(rowSum, rowSum_h);
288 Array<LO> localOrderingVector(numRows);
289 for (LO i = 0; i < numRows; i++)
290 localOrderingVector[i] = i;
291 if (ordering == O_RANDOM)
293 else if (ordering == O_CUTHILL_MCKEE) {
295 auto localVector = rcmVector->getData(0);
296 for (LO i = 0; i < numRows; i++)
297 localOrderingVector[i] = localVector[i];
301 numLocalAggregates = 0;
302 numNonAggregatedNodes =
static_cast<LO
>(coarseLocalA.numRows());
303 std::vector<LO> localAggStat(numNonAggregatedNodes,
READY);
304 localVertex2AggId.resize(numNonAggregatedNodes, -1);
306 localAggStat, localVertex2AggId,
307 numLocalAggregates, numNonAggregatedNodes);
311 numLocalDirichletNodes = 0;
314 RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
315 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
316 for(
size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
317 LO oldAggIdx = vertex2AggId[vertexIdx];
318 if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
319 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
325 << A->getLocalNumRows() / numLocalAggregates << std::endl;
327 aggregates->SetNumAggregates(numLocalAggregates);
328 aggregates->AggregatesCrossProcessors(
false);
329 aggregates->ComputeAggregateSizes(
true);
332 Set(currentLevel,
"Aggregates", aggregates);
340 const RCP<const Matrix>& A,
341 const Teuchos::ArrayView<const LO> & orderingVector,
342 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
344 std::vector<unsigned>& aggStat,
345 LO& numNonAggregatedNodes,
346 LO& numDirichletNodes)
const {
348 Monitor m(*
this,
"BuildInitialAggregates");
349 using STS = Teuchos::ScalarTraits<Scalar>;
350 using MT =
typename STS::magnitudeType;
351 using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
353 RCP<Teuchos::FancyOStream> out;
354 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
355 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
356 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
358 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
362 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
363 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
364 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
365 const MT MT_TWO = MT_ONE + MT_ONE;
366 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
367 const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
369 const MT kappa_init = kappa / (kappa - MT_TWO);
370 const LO numRows = aggStat.size();
371 const int myRank = A->getMap()->getComm()->getRank();
375 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
376 double tie_less = 1.0 - tie_criterion;
377 double tie_more = 1.0 + tie_criterion;
388 const ArrayRCP<const SC> D = ghostedDiag->getData(0);
389 const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
390 const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
393 ArrayRCP<LO> vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0);
394 ArrayRCP<LO> procWinner_rcp = aggregates.GetProcWinner() ->getDataNonConst(0);
395 ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
396 ArrayView<LO> procWinner = procWinner_rcp();
402 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
403 MT aii = STS::magnitude(D[row]);
404 MT rowsum = AbsRs[row];
406 if(aii >= kappa_init * rowsum) {
407 *out <<
"Flagging index " << row <<
" as dirichlet "
408 "aii >= kappa*rowsum = " << aii <<
" >= " << kappa_init <<
" " << rowsum << std::endl;
410 --numNonAggregatedNodes;
417 LO aggIndex = LO_ZERO;
418 for(LO i = 0; i < numRows; i++) {
419 LO current_idx = orderingVector[i];
421 if(aggStat[current_idx] !=
READY)
424 MT best_mu = MT_ZERO;
425 LO best_idx = LO_INVALID;
427 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
428 ArrayView<const LO> indices;
429 ArrayView<const SC> vals;
430 A->getLocalRowView(current_idx, indices, vals);
432 MT aii = STS::real(D[current_idx]);
433 MT si = STS::real(S[current_idx]);
434 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
436 LO col = indices[colidx];
437 SC val = vals[colidx];
438 if(current_idx == col || col >= numRows || aggStat[col] !=
READY || val == SC_ZERO)
441 MT aij = STS::real(val);
442 MT ajj = STS::real(D[col]);
443 MT sj = - STS::real(S[col]);
444 if(aii - si + ajj - sj >= MT_ZERO) {
446 MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
447 MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
448 MT mu = mu_top / mu_bottom;
451 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
452 (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
455 *out <<
"[" << current_idx <<
"] Column UPDATED " << col <<
": "
456 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
457 <<
" = " << aii - si + ajj - sj<<
", aij = "<<aij <<
", mu = " << mu << std::endl;
460 *out <<
"[" << current_idx <<
"] Column NOT UPDATED " << col <<
": "
461 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
462 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
466 *out <<
"[" << current_idx <<
"] Column FAILED " << col <<
": "
467 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
468 <<
" = " << aii - si + ajj - sj << std::endl;
472 if(best_idx == LO_INVALID) {
473 *out <<
"No node buddy found for index " << current_idx
474 <<
" [agg " << aggIndex <<
"]\n" << std::endl;
479 aggStat[current_idx] =
ONEPT;
480 vertex2AggId[current_idx] = aggIndex;
481 procWinner[current_idx] = myRank;
482 numNonAggregatedNodes--;
487 if(best_mu <= kappa) {
488 *out <<
"Node buddies (" << current_idx <<
"," << best_idx <<
") [agg " << aggIndex <<
"]" << std::endl;
492 vertex2AggId[current_idx] = aggIndex;
493 vertex2AggId[best_idx] = aggIndex;
494 procWinner[current_idx] = myRank;
495 procWinner[best_idx] = myRank;
496 numNonAggregatedNodes-=2;
500 *out <<
"No buddy found for index " << current_idx <<
","
501 " but aggregating as singleton [agg " << aggIndex <<
"]" << std::endl;
503 aggStat[current_idx] =
ONEPT;
504 vertex2AggId[current_idx] = aggIndex;
505 procWinner[current_idx] = myRank;
506 numNonAggregatedNodes--;
512 *out <<
"vertex2aggid :";
513 for(
int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
514 *out << i <<
"(" << vertex2AggId[i] <<
")";
519 aggregates.SetNumAggregates(aggIndex);
525 const RCP<const Matrix>& A,
526 const Teuchos::ArrayView<const LO> & orderingVector,
527 const typename Matrix::local_matrix_type& coarseA,
528 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
529 const Kokkos::View<
typename Kokkos::ArithTraits<Scalar>::val_type*,
531 typename Matrix::local_matrix_type::device_type>& rowSum,
532 std::vector<LocalOrdinal>& localAggStat,
533 Teuchos::Array<LocalOrdinal>& localVertex2AggID,
534 LO& numLocalAggregates,
535 LO& numNonAggregatedNodes)
const {
536 Monitor m(*
this,
"BuildFurtherAggregates");
539 RCP<Teuchos::FancyOStream> out;
540 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
541 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
542 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
544 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
547 using value_type =
typename local_matrix_type::value_type;
548 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
549 const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
550 const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
552 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
556 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
557 double tie_less = 1.0 - tie_criterion;
558 double tie_more = 1.0 + tie_criterion;
560 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
561 Kokkos::deep_copy(rowSum_h, rowSum);
566 const LO numRows =
static_cast<LO
>(coarseA.numRows());
567 typename local_matrix_type::values_type::HostMirror diagA_h(
"diagA host", numRows);
568 typename local_matrix_type::row_map_type::HostMirror row_map_h
569 = Kokkos::create_mirror_view(coarseA.graph.row_map);
570 Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
571 typename local_matrix_type::index_type::HostMirror entries_h
572 = Kokkos::create_mirror_view(coarseA.graph.entries);
573 Kokkos::deep_copy(entries_h, coarseA.graph.entries);
574 typename local_matrix_type::values_type::HostMirror values_h
575 = Kokkos::create_mirror_view(coarseA.values);
576 Kokkos::deep_copy(values_h, coarseA.values);
577 for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
578 for(LO entryIdx =
static_cast<LO
>(row_map_h(rowIdx));
579 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
581 if(rowIdx ==
static_cast<LO
>(entries_h(entryIdx))) {
582 diagA_h(rowIdx) = values_h(entryIdx);
587 for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
588 if(localAggStat[currentIdx] !=
READY) {
592 LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
593 magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
594 const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
595 const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
596 for(
auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
597 const LO colIdx =
static_cast<LO
>(entries_h(entryIdx));
598 if(currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] !=
READY || values_h(entryIdx) == KAT_zero) {
602 const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
603 const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
604 const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx));
605 if(aii - si + ajj - sj >= MT_zero) {
606 const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
607 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
611 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
612 (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
615 *out <<
"[" << currentIdx <<
"] Column UPDATED " << colIdx <<
": "
616 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
617 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
" mu = " << mu << std::endl;
620 *out <<
"[" << currentIdx <<
"] Column NOT UPDATED " << colIdx <<
": "
621 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
622 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
625 *out <<
"[" << currentIdx <<
"] Column FAILED " << colIdx <<
": "
626 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
627 <<
" = " << aii - si + ajj - sj << std::endl;
631 if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
632 localAggStat[currentIdx] =
ONEPT;
633 localVertex2AggID[currentIdx] = numLocalAggregates;
634 --numNonAggregatedNodes;
635 ++numLocalAggregates;
637 if(best_mu <= kappa) {
638 *out <<
"Node buddies (" << currentIdx <<
"," << bestIdx <<
") [agg " << numLocalAggregates <<
"]" << std::endl;
641 localVertex2AggID[currentIdx] = numLocalAggregates;
642 --numNonAggregatedNodes;
645 localVertex2AggID[bestIdx] = numLocalAggregates;
646 --numNonAggregatedNodes;
648 ++numLocalAggregates;
650 *out <<
"No buddy found for index " << currentIdx <<
","
651 " but aggregating as singleton [agg " << numLocalAggregates <<
"]" << std::endl;
653 localAggStat[currentIdx] =
ONEPT;
654 localVertex2AggID[currentIdx] = numLocalAggregates;
655 --numNonAggregatedNodes;
656 ++numLocalAggregates;
666 typename Matrix::local_matrix_type& onrankA)
const {
667 Monitor m(*
this,
"BuildOnRankLocalMatrix");
670 RCP<Teuchos::FancyOStream> out;
671 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
672 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
673 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
675 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
678 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
679 using values_type =
typename local_matrix_type::values_type;
680 using size_type =
typename local_graph_type::size_type;
681 using col_index_type =
typename local_graph_type::data_type;
682 using array_layout =
typename local_graph_type::array_layout;
683 using memory_traits =
typename local_graph_type::memory_traits;
684 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
685 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
690 const int numRows =
static_cast<int>(localA.numRows());
691 row_pointer_type rowPtr(
"onrankA row pointer", numRows + 1);
692 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
693 typename local_graph_type::row_map_type::HostMirror origRowPtr_h
694 = Kokkos::create_mirror_view(localA.graph.row_map);
695 typename local_graph_type::entries_type::HostMirror origColind_h
696 = Kokkos::create_mirror_view(localA.graph.entries);
697 typename values_type::HostMirror origValues_h
698 = Kokkos::create_mirror_view(localA.values);
699 Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
700 Kokkos::deep_copy(origColind_h, localA.graph.entries);
701 Kokkos::deep_copy(origValues_h, localA.values);
705 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
706 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
707 if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
709 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
711 Kokkos::deep_copy(rowPtr, rowPtr_h);
713 const LO nnzOnrankA = rowPtr_h(numRows);
716 col_indices_type colInd(
"onrankA column indices", rowPtr_h(numRows));
717 values_type values(
"onrankA values", rowPtr_h(numRows));
718 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
719 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
721 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
723 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
724 if(origColind_h(entryIdx) < numRows) {
725 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
726 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
731 Kokkos::deep_copy(colInd, colInd_h);
732 Kokkos::deep_copy(values, values_h);
735 nnzOnrankA, values, rowPtr, colInd);
743 const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
744 typename Matrix::local_matrix_type& intermediateP)
const {
745 Monitor m(*
this,
"BuildIntermediateProlongator");
748 RCP<Teuchos::FancyOStream> out;
749 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
750 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
751 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
753 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
756 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
757 using values_type =
typename local_matrix_type::values_type;
758 using size_type =
typename local_graph_type::size_type;
759 using col_index_type =
typename local_graph_type::data_type;
760 using array_layout =
typename local_graph_type::array_layout;
761 using memory_traits =
typename local_graph_type::memory_traits;
762 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
763 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
765 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
767 const int intermediatePnnz = numRows - numDirichletNodes;
768 row_pointer_type rowPtr(
"intermediateP row pointer", numRows + 1);
769 col_indices_type colInd(
"intermediateP column indices", intermediatePnnz);
770 values_type values(
"intermediateP values", intermediatePnnz);
771 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
772 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
775 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
777 if(localVertex2AggID[rowIdx] == LO_INVALID) {
778 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
780 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
781 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
785 Kokkos::deep_copy(rowPtr, rowPtr_h);
786 Kokkos::deep_copy(colInd, colInd_h);
787 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
790 numRows, numLocalAggregates, intermediatePnnz,
791 values, rowPtr, colInd);
797 typename Matrix::local_matrix_type& coarseA)
const {
798 Monitor m(*
this,
"BuildCoarseLocalMatrix");
800 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
801 using values_type =
typename local_matrix_type::values_type;
802 using size_type =
typename local_graph_type::size_type;
803 using col_index_type =
typename local_graph_type::data_type;
804 using array_layout =
typename local_graph_type::array_layout;
805 using memory_traits =
typename local_graph_type::memory_traits;
806 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
807 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
823 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt row pointer"),
824 intermediateP.numCols() + 1);
825 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt column indices"),
826 intermediateP.nnz());
827 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt values"),
828 intermediateP.nnz());
830 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
831 typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
832 Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
833 Kokkos::deep_copy(rowPtrPt_h, 0);
834 for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
835 rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
837 for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
838 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
840 Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
842 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
843 Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
844 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
845 Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
846 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
847 Kokkos::deep_copy(valuesP_h, intermediateP.values);
848 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
849 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
850 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
851 Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
853 col_index_type colIdx = 0;
854 for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
855 for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
856 colIdx = entries_h(entryIdxP);
857 for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
858 if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
859 colIndPt_h(entryIdxPt) = rowIdx;
860 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
867 Kokkos::deep_copy(colIndPt, colIndPt_h);
868 Kokkos::deep_copy(valuesPt, valuesPt_h);
872 intermediateP.numCols(),
873 intermediateP.numRows(),
875 valuesPt, rowPtrPt, colIndPt);
878 localSpGEMM(intermediatePt, AP,
"coarseA", coarseA);
883 localSpGEMM(
const typename Matrix::local_matrix_type& A,
884 const typename Matrix::local_matrix_type& B,
885 const std::string matrixLabel,
886 typename Matrix::local_matrix_type& C)
const {
888 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
889 using values_type =
typename local_matrix_type::values_type;
890 using size_type =
typename local_graph_type::size_type;
891 using col_index_type =
typename local_graph_type::data_type;
892 using array_layout =
typename local_graph_type::array_layout;
893 using memory_space =
typename device_type::memory_space;
894 using memory_traits =
typename local_graph_type::memory_traits;
895 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
896 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
899 int team_work_size = 16;
900 std::string myalg(
"SPGEMM_KK_MEMORY");
901 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
902 KokkosKernels::Experimental::KokkosKernelsHandle<
typename row_pointer_type::const_value_type,
903 typename col_indices_type::const_value_type,
904 typename values_type::const_value_type,
908 kh.create_spgemm_handle(alg_enum);
909 kh.set_team_work_size(team_work_size);
912 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing(
"C row pointer"),
914 col_indices_type colIndC;
918 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
919 B.numRows(), B.numCols(),
920 A.graph.row_map, A.graph.entries,
false,
921 B.graph.row_map, B.graph.entries,
false,
925 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
927 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing(
"C column inds"), nnzC);
928 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing(
"C values"), nnzC);
932 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
933 B.numRows(), B.numCols(),
934 A.graph.row_map, A.graph.entries, A.values,
false,
935 B.graph.row_map, B.graph.entries, B.values,
false,
936 rowPtrC, colIndC, valuesC);
937 kh.destroy_spgemm_handle();
939 C =
local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);