MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_NotayAggregationFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
47#define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
48
49#include <Xpetra_Map.hpp>
50#include <Xpetra_Vector.hpp>
51#include <Xpetra_MultiVectorFactory.hpp>
52#include <Xpetra_MapFactory.hpp>
53#include <Xpetra_VectorFactory.hpp>
54
55#include "KokkosKernels_Handle.hpp"
56#include "KokkosSparse_spgemm.hpp"
57#include "KokkosSparse_spmv.hpp"
58
60
61#include "MueLu_Aggregates.hpp"
62#include "MueLu_GraphBase.hpp"
63#include "MueLu_Level.hpp"
64#include "MueLu_MasterList.hpp"
65#include "MueLu_Monitor.hpp"
66#include "MueLu_Types.hpp"
67#include "MueLu_Utilities.hpp"
68
69
70namespace MueLu {
71
72 namespace NotayUtils {
73 template <class LocalOrdinal>
75 return min + as<LocalOrdinal>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
76 }
77
78 template <class LocalOrdinal>
79 void RandomReorder(Teuchos::Array<LocalOrdinal> & list) {
80 typedef LocalOrdinal LO;
81 LO n = Teuchos::as<LO>(list.size());
82 for(LO i = 0; i < n-1; i++)
83 std::swap(list[i], list[RandomOrdinal(i,n-1)]);
84 }
85 }
86
87 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
89 RCP<ParameterList> validParamList = rcp(new ParameterList());
90
91
92#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
93 SET_VALID_ENTRY("aggregation: pairwise: size");
94 SET_VALID_ENTRY("aggregation: pairwise: tie threshold");
95 SET_VALID_ENTRY("aggregation: compute aggregate qualities");
96 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
97 SET_VALID_ENTRY("aggregation: ordering");
98#undef SET_VALID_ENTRY
99
100 // general variables needed in AggregationFactory
101 validParamList->set< RCP<const FactoryBase> >("A", null, "Generating factory of the matrix");
102 validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
103 validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
104 validParamList->set< RCP<const FactoryBase> >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'");
105
106
107 return validParamList;
108 }
109
110 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
112 const ParameterList& pL = GetParameterList();
113
114 Input(currentLevel, "A");
115 Input(currentLevel, "Graph");
116 Input(currentLevel, "DofsPerNode");
117 if (pL.get<bool>("aggregation: compute aggregate qualities")) {
118 Input(currentLevel, "AggregateQualities");
119 }
120
121
122 }
123
124
125 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
127 FactoryMonitor m(*this, "Build", currentLevel);
128 using STS = Teuchos::ScalarTraits<Scalar>;
129 using MT = typename STS::magnitudeType;
130
131 const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
132
133 RCP<Teuchos::FancyOStream> out;
134 if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
135 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
136 out->setShowAllFrontMatter(false).setShowProcRank(true);
137 } else {
138 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
139 }
140
141 const ParameterList& pL = GetParameterList();
142
143 const MT kappa = static_cast<MT>(pL.get<double>("aggregation: Dirichlet threshold"));
144 TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
146 "Pairwise requires kappa > 2"
147 " otherwise all rows are considered as Dirichlet rows.");
148
149 // Parameters
150 int maxNumIter = 3;
151 if (pL.isParameter("aggregation: pairwise: size"))
152 maxNumIter = pL.get<int>("aggregation: pairwise: size");
153 TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
155 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
156 " must be a strictly positive integer");
157
158
159 RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph");
160 RCP<const Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
161
162 // Setup aggregates & aggStat objects
163 RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
164 aggregates->setObjectLabel("PW");
165
166 const LO numRows = graph->GetNodeNumVertices();
167
168 // construct aggStat information
169 std::vector<unsigned> aggStat(numRows, READY);
170
171
172 const int DofsPerNode = Get<int>(currentLevel,"DofsPerNode");
173 TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError,
174 "Pairwise only supports one dof per node");
175
176 // This follows the paper:
177 // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations",
178 // SISC 34(3), pp. A2288-2316.
179
180 // Handle Ordering
181 std::string orderingStr = pL.get<std::string>("aggregation: ordering");
182 enum {
183 O_NATURAL,
184 O_RANDOM,
185 O_CUTHILL_MCKEE,
186 } ordering;
187
188 ordering = O_NATURAL;
189 if (orderingStr == "random" ) ordering = O_RANDOM;
190 else if(orderingStr == "natural") {}
191 else if(orderingStr == "cuthill-mckee" || orderingStr == "cm") ordering = O_CUTHILL_MCKEE;
192 else {
193 TEUCHOS_TEST_FOR_EXCEPTION(1,Exceptions::RuntimeError,"Invalid ordering type");
194 }
195
196 // Get an ordering vector
197 // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns
198 // will get ignored in the aggregation phases, so we don't need to worry about
199 // running off the end.
200 Array<LO> orderingVector(numRows);
201 for (LO i = 0; i < numRows; i++)
202 orderingVector[i] = i;
203 if (ordering == O_RANDOM)
204 MueLu::NotayUtils::RandomReorder(orderingVector);
205 else if (ordering == O_CUTHILL_MCKEE) {
206 RCP<Xpetra::Vector<LO,LO,GO,NO> > rcmVector = MueLu::Utilities<SC,LO,GO,NO>::CuthillMcKee(*A);
207 auto localVector = rcmVector->getData(0);
208 for (LO i = 0; i < numRows; i++)
209 orderingVector[i] = localVector[i];
210 }
211
212 // Get the party stated
213 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
214 BuildInitialAggregates(pL, A, orderingVector(), kappa,
215 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
216 TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError,
217 "Initial pairwise aggregation failed to aggregate all nodes");
218 LO numLocalAggregates = aggregates->GetNumAggregates();
219 GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - "
220 << A->getLocalNumRows() / numLocalAggregates << std::endl;
221
222 // Temporary data storage for further aggregation steps
223 local_matrix_type intermediateP;
224 local_matrix_type coarseLocalA;
225
226 // Compute the on rank part of the local matrix
227 // that the square submatrix that only contains
228 // columns corresponding to local rows.
229 LO numLocalDirichletNodes = numDirichletNodes;
230 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
231 BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
232 for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
233 // Compute the intermediate prolongator
234 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
235 localVertex2AggId(), intermediateP);
236
237 // Compute the coarse local matrix and coarse row sum
238 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
239
240 // Directly compute rowsum from A, rather than coarseA
241 row_sum_type rowSum("rowSum", numLocalAggregates);
242 {
243 std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
244 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
245 for(LO i=0; i<(LO)numRows; i++) {
246 if(aggStat[i] != AGGREGATED)
247 continue;
248 LO agg=vertex2AggId[i];
249 agg2vertex[agg].push_back(i);
250 }
251
252 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
253 for(LO i = 0; i < numRows; i++) {
254 // If not aggregated already, skip this guy
255 if(aggStat[i] != AGGREGATED)
256 continue;
257 int agg = vertex2AggId[i];
258 std::vector<LO> & myagg = agg2vertex[agg];
259
260 size_t nnz = A->getNumEntriesInLocalRow(i);
261 ArrayView<const LO> indices;
262 ArrayView<const SC> vals;
263 A->getLocalRowView(i, indices, vals);
264
265 SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
266 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
267 bool found = false;
268 if(indices[colidx] < numRows) {
269 for(LO j=0; j<(LO)myagg.size(); j++)
270 if (vertex2AggId[indices[colidx]] == agg)
271 found=true;
272 }
273 if(!found) {
274 *out << "- ADDING col "<<indices[colidx]<<" = "<<vals[colidx] << std::endl;
275 mysum += vals[colidx];
276 }
277 else {
278 *out << "- NOT ADDING col "<<indices[colidx]<<" = "<<vals[colidx] << std::endl;
279 }
280 }
281
282 rowSum_h[agg] = mysum;
283 }
284 Kokkos::deep_copy(rowSum, rowSum_h);
285 }
286
287 // Get local orderingVector
288 Array<LO> localOrderingVector(numRows);
289 for (LO i = 0; i < numRows; i++)
290 localOrderingVector[i] = i;
291 if (ordering == O_RANDOM)
292 MueLu::NotayUtils::RandomReorder(localOrderingVector);
293 else if (ordering == O_CUTHILL_MCKEE) {
294 RCP<Xpetra::Vector<LO,LO,GO,NO> > rcmVector = MueLu::Utilities<SC,LO,GO,NO>::CuthillMcKee(*A);
295 auto localVector = rcmVector->getData(0);
296 for (LO i = 0; i < numRows; i++)
297 localOrderingVector[i] = localVector[i];
298 }
299
300 // Compute new aggregates
301 numLocalAggregates = 0;
302 numNonAggregatedNodes = static_cast<LO>(coarseLocalA.numRows());
303 std::vector<LO> localAggStat(numNonAggregatedNodes, READY);
304 localVertex2AggId.resize(numNonAggregatedNodes, -1);
305 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
306 localAggStat, localVertex2AggId,
307 numLocalAggregates, numNonAggregatedNodes);
308
309 // After the first initial pairwise aggregation
310 // the Dirichlet nodes have been removed.
311 numLocalDirichletNodes = 0;
312
313 // Update the aggregates
314 RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
315 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
316 for(size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
317 LO oldAggIdx = vertex2AggId[vertexIdx];
318 if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
319 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
320 }
321 }
322
323 // We could probably print some better statistics at some point
324 GetOStream(Statistics0) << "Iter " << aggregationIter << ": " << numLocalAggregates << " - "
325 << A->getLocalNumRows() / numLocalAggregates << std::endl;
326 }
327 aggregates->SetNumAggregates(numLocalAggregates);
328 aggregates->AggregatesCrossProcessors(false);
329 aggregates->ComputeAggregateSizes(true/*forceRecompute*/);
330
331 // DO stuff
332 Set(currentLevel, "Aggregates", aggregates);
333 GetOStream(Statistics0) << aggregates->description() << std::endl;
334 }
335
336
337 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
339 BuildInitialAggregates(const Teuchos::ParameterList& params,
340 const RCP<const Matrix>& A,
341 const Teuchos::ArrayView<const LO> & orderingVector,
342 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
343 Aggregates& aggregates,
344 std::vector<unsigned>& aggStat,
345 LO& numNonAggregatedNodes,
346 LO& numDirichletNodes) const {
347
348 Monitor m(*this, "BuildInitialAggregates");
349 using STS = Teuchos::ScalarTraits<Scalar>;
350 using MT = typename STS::magnitudeType;
351 using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
352
353 RCP<Teuchos::FancyOStream> out;
354 if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
355 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
356 out->setShowAllFrontMatter(false).setShowProcRank(true);
357 } else {
358 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
359 }
360
361
362 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
363 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
364 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
365 const MT MT_TWO = MT_ONE + MT_ONE;
366 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
367 const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
368
369 const MT kappa_init = kappa / (kappa - MT_TWO);
370 const LO numRows = aggStat.size();
371 const int myRank = A->getMap()->getComm()->getRank();
372
373 // For finding "ties" where we fall back to the ordering. Making this larger than
374 // hard zero substantially increases code robustness.
375 double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
376 double tie_less = 1.0 - tie_criterion;
377 double tie_more = 1.0 + tie_criterion;
378
379 // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(),
380 // and so we're not doing again here.
381 // This should probably be fixed at some point.
382
383 // Extract diagonal, rowsums, etc
384 // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
387 RCP<RealValuedVector> ghostedAbsRowSum = MueLu::Utilities<SC,LO,GO,NO>::GetMatrixOverlappedAbsDeletedRowsum(*A);
388 const ArrayRCP<const SC> D = ghostedDiag->getData(0);
389 const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
390 const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
391
392 // Aggregates stuff
393 ArrayRCP<LO> vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0);
394 ArrayRCP<LO> procWinner_rcp = aggregates.GetProcWinner() ->getDataNonConst(0);
395 ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
396 ArrayView<LO> procWinner = procWinner_rcp();
397
398 // Algorithm 4.2
399
400 // 0,1 : Initialize: Flag boundary conditions
401 // Modification: We assume symmetry here aij = aji
402 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
403 MT aii = STS::magnitude(D[row]);
404 MT rowsum = AbsRs[row];
405
406 if(aii >= kappa_init * rowsum) {
407 *out << "Flagging index " << row << " as dirichlet "
408 "aii >= kappa*rowsum = " << aii << " >= " << kappa_init << " " << rowsum << std::endl;
409 aggStat[row] = IGNORED;
410 --numNonAggregatedNodes;
411 ++numDirichletNodes;
412 }
413 }
414
415
416 // 2 : Iteration
417 LO aggIndex = LO_ZERO;
418 for(LO i = 0; i < numRows; i++) {
419 LO current_idx = orderingVector[i];
420 // If we're aggregated already, skip this guy
421 if(aggStat[current_idx] != READY)
422 continue;
423
424 MT best_mu = MT_ZERO;
425 LO best_idx = LO_INVALID;
426
427 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
428 ArrayView<const LO> indices;
429 ArrayView<const SC> vals;
430 A->getLocalRowView(current_idx, indices, vals);
431
432 MT aii = STS::real(D[current_idx]);
433 MT si = STS::real(S[current_idx]);
434 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
435 // Skip aggregated neighbors, off-rank neighbors, hard zeros and self
436 LO col = indices[colidx];
437 SC val = vals[colidx];
438 if(current_idx == col || col >= numRows || aggStat[col] != READY || val == SC_ZERO)
439 continue;
440
441 MT aij = STS::real(val);
442 MT ajj = STS::real(D[col]);
443 MT sj = - STS::real(S[col]); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
444 if(aii - si + ajj - sj >= MT_ZERO) {
445 // Modification: We assume symmetry here aij = aji
446 MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
447 MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
448 MT mu = mu_top / mu_bottom;
449
450 // Modification: Explicitly check the tie criterion here
451 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
452 (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
453 best_mu = mu;
454 best_idx = col;
455 *out << "[" << current_idx << "] Column UPDATED " << col << ": "
456 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
457 << " = " << aii - si + ajj - sj<< ", aij = "<<aij << ", mu = " << mu << std::endl;
458 }
459 else {
460 *out << "[" << current_idx << "] Column NOT UPDATED " << col << ": "
461 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
462 << " = " << aii - si + ajj - sj << ", aij = "<<aij<< ", mu = " << mu << std::endl;
463 }
464 }
465 else {
466 *out << "[" << current_idx << "] Column FAILED " << col << ": "
467 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
468 << " = " << aii - si + ajj - sj << std::endl;
469 }
470 }// end column for loop
471
472 if(best_idx == LO_INVALID) {
473 *out << "No node buddy found for index " << current_idx
474 << " [agg " << aggIndex << "]\n" << std::endl;
475 // We found no potential node-buddy, so let's just make this a singleton
476 // NOTE: The behavior of what to do if you have no un-aggregated neighbors is not specified in
477 // the paper
478
479 aggStat[current_idx] = ONEPT;
480 vertex2AggId[current_idx] = aggIndex;
481 procWinner[current_idx] = myRank;
482 numNonAggregatedNodes--;
483 aggIndex++;
484
485 } else {
486 // We have a buddy, so aggregate, either as a singleton or as a pair, depending on mu
487 if(best_mu <= kappa) {
488 *out << "Node buddies (" << current_idx << "," << best_idx << ") [agg " << aggIndex << "]" << std::endl;
489
490 aggStat[current_idx] = AGGREGATED;
491 aggStat[best_idx] = AGGREGATED;
492 vertex2AggId[current_idx] = aggIndex;
493 vertex2AggId[best_idx] = aggIndex;
494 procWinner[current_idx] = myRank;
495 procWinner[best_idx] = myRank;
496 numNonAggregatedNodes-=2;
497 aggIndex++;
498
499 } else {
500 *out << "No buddy found for index " << current_idx << ","
501 " but aggregating as singleton [agg " << aggIndex << "]" << std::endl;
502
503 aggStat[current_idx] = ONEPT;
504 vertex2AggId[current_idx] = aggIndex;
505 procWinner[current_idx] = myRank;
506 numNonAggregatedNodes--;
507 aggIndex++;
508 } // best_mu
509 } // best_idx
510 }// end Algorithm 4.2
511
512 *out << "vertex2aggid :";
513 for(int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
514 *out << i << "(" << vertex2AggId[i] << ")";
515 }
516 *out << std::endl;
517
518 // update aggregate object
519 aggregates.SetNumAggregates(aggIndex);
520 } // BuildInitialAggregates
521
522 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
524 BuildFurtherAggregates(const Teuchos::ParameterList& params,
525 const RCP<const Matrix>& A,
526 const Teuchos::ArrayView<const LO> & orderingVector,
527 const typename Matrix::local_matrix_type& coarseA,
528 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
529 const Kokkos::View<typename Kokkos::ArithTraits<Scalar>::val_type*,
530 Kokkos::LayoutLeft,
531 typename Matrix::local_matrix_type::device_type>& rowSum,
532 std::vector<LocalOrdinal>& localAggStat,
533 Teuchos::Array<LocalOrdinal>& localVertex2AggID,
534 LO& numLocalAggregates,
535 LO& numNonAggregatedNodes) const {
536 Monitor m(*this, "BuildFurtherAggregates");
537
538 // Set debug outputs based on environment variable
539 RCP<Teuchos::FancyOStream> out;
540 if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
541 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
542 out->setShowAllFrontMatter(false).setShowProcRank(true);
543 } else {
544 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
545 }
546
547 using value_type = typename local_matrix_type::value_type;
548 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
549 const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
550 const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
551 const magnitude_type MT_two = MT_one + MT_one;
552 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
553
554 // For finding "ties" where we fall back to the ordering. Making this larger than
555 // hard zero substantially increases code robustness.
556 double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
557 double tie_less = 1.0 - tie_criterion;
558 double tie_more = 1.0 + tie_criterion;
559
560 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
561 Kokkos::deep_copy(rowSum_h, rowSum);
562
563 // Extracting the diagonal of a KokkosSparse::CrsMatrix
564 // is not currently provided in kokkos-kernels so here
565 // is an ugly way to get that done...
566 const LO numRows = static_cast<LO>(coarseA.numRows());
567 typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows);
568 typename local_matrix_type::row_map_type::HostMirror row_map_h
569 = Kokkos::create_mirror_view(coarseA.graph.row_map);
570 Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
571 typename local_matrix_type::index_type::HostMirror entries_h
572 = Kokkos::create_mirror_view(coarseA.graph.entries);
573 Kokkos::deep_copy(entries_h, coarseA.graph.entries);
574 typename local_matrix_type::values_type::HostMirror values_h
575 = Kokkos::create_mirror_view(coarseA.values);
576 Kokkos::deep_copy(values_h, coarseA.values);
577 for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
578 for(LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
579 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
580 ++entryIdx) {
581 if(rowIdx == static_cast<LO>(entries_h(entryIdx))) {
582 diagA_h(rowIdx) = values_h(entryIdx);
583 }
584 }
585 }
586
587 for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
588 if(localAggStat[currentIdx] != READY) {
589 continue;
590 }
591
592 LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
593 magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
594 const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
595 const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
596 for(auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
597 const LO colIdx = static_cast<LO>(entries_h(entryIdx));
598 if(currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) {
599 continue;
600 }
601
602 const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
603 const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
604 const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx)); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
605 if(aii - si + ajj - sj >= MT_zero) {
606 const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
607 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
608 const magnitude_type mu = mu_top / mu_bottom;
609
610 // Modification: Explicitly check the tie criterion here
611 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
612 (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
613 best_mu = mu;
614 bestIdx = colIdx;
615 *out << "[" << currentIdx << "] Column UPDATED " << colIdx << ": "
616 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
617 << " = " << aii - si + ajj - sj << ", aij = "<<aij<<" mu = " << mu << std::endl;
618 }
619 else {
620 *out << "[" << currentIdx << "] Column NOT UPDATED " << colIdx << ": "
621 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
622 << " = " << aii - si + ajj - sj << ", aij = "<<aij<<", mu = " << mu << std::endl;
623 }
624 } else {
625 *out << "[" << currentIdx << "] Column FAILED " << colIdx << ": "
626 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
627 << " = " << aii - si + ajj - sj << std::endl;
628 }
629 } // end loop over row entries
630
631 if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
632 localAggStat[currentIdx] = ONEPT;
633 localVertex2AggID[currentIdx] = numLocalAggregates;
634 --numNonAggregatedNodes;
635 ++numLocalAggregates;
636 } else {
637 if(best_mu <= kappa) {
638 *out << "Node buddies (" << currentIdx << "," << bestIdx << ") [agg " << numLocalAggregates << "]" << std::endl;
639
640 localAggStat[currentIdx] = AGGREGATED;
641 localVertex2AggID[currentIdx] = numLocalAggregates;
642 --numNonAggregatedNodes;
643
644 localAggStat[bestIdx] = AGGREGATED;
645 localVertex2AggID[bestIdx] = numLocalAggregates;
646 --numNonAggregatedNodes;
647
648 ++numLocalAggregates;
649 } else {
650 *out << "No buddy found for index " << currentIdx << ","
651 " but aggregating as singleton [agg " << numLocalAggregates << "]" << std::endl;
652
653 localAggStat[currentIdx] = ONEPT;
654 localVertex2AggID[currentIdx] = numLocalAggregates;
655 --numNonAggregatedNodes;
656 ++numLocalAggregates;
657 }
658 }
659 } // end loop over matrix rows
660
661 } // BuildFurtherAggregates
662
663 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
665 BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA,
666 typename Matrix::local_matrix_type& onrankA) const {
667 Monitor m(*this, "BuildOnRankLocalMatrix");
668
669 // Set debug outputs based on environment variable
670 RCP<Teuchos::FancyOStream> out;
671 if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
672 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
673 out->setShowAllFrontMatter(false).setShowProcRank(true);
674 } else {
675 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
676 }
677
678 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
679 using values_type = typename local_matrix_type::values_type;
680 using size_type = typename local_graph_type::size_type;
681 using col_index_type = typename local_graph_type::data_type;
682 using array_layout = typename local_graph_type::array_layout;
683 using memory_traits = typename local_graph_type::memory_traits;
684 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
685 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
686 // Extract on rank part of A
687 // Simply check that the column index is less than the number of local rows
688 // otherwise remove it.
689
690 const int numRows = static_cast<int>(localA.numRows());
691 row_pointer_type rowPtr("onrankA row pointer", numRows + 1);
692 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
693 typename local_graph_type::row_map_type::HostMirror origRowPtr_h
694 = Kokkos::create_mirror_view(localA.graph.row_map);
695 typename local_graph_type::entries_type::HostMirror origColind_h
696 = Kokkos::create_mirror_view(localA.graph.entries);
697 typename values_type::HostMirror origValues_h
698 = Kokkos::create_mirror_view(localA.values);
699 Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
700 Kokkos::deep_copy(origColind_h, localA.graph.entries);
701 Kokkos::deep_copy(origValues_h, localA.values);
702
703 // Compute the number of nnz entries per row
704 rowPtr_h(0) = 0;
705 for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
706 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
707 if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
708 }
709 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
710 }
711 Kokkos::deep_copy(rowPtr, rowPtr_h);
712
713 const LO nnzOnrankA = rowPtr_h(numRows);
714
715 // Now use nnz per row to allocate matrix views and store column indices and values
716 col_indices_type colInd("onrankA column indices", rowPtr_h(numRows));
717 values_type values("onrankA values", rowPtr_h(numRows));
718 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
719 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
720 int entriesInRow;
721 for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
722 entriesInRow = 0;
723 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
724 if(origColind_h(entryIdx) < numRows) {
725 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
726 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
727 ++entriesInRow;
728 }
729 }
730 }
731 Kokkos::deep_copy(colInd, colInd_h);
732 Kokkos::deep_copy(values, values_h);
733
734 onrankA = local_matrix_type("onrankA", numRows, numRows,
735 nnzOnrankA, values, rowPtr, colInd);
736 }
737
738 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
741 const LocalOrdinal numDirichletNodes,
742 const LocalOrdinal numLocalAggregates,
743 const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
744 typename Matrix::local_matrix_type& intermediateP) const {
745 Monitor m(*this, "BuildIntermediateProlongator");
746
747 // Set debug outputs based on environment variable
748 RCP<Teuchos::FancyOStream> out;
749 if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
750 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
751 out->setShowAllFrontMatter(false).setShowProcRank(true);
752 } else {
753 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
754 }
755
756 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
757 using values_type = typename local_matrix_type::values_type;
758 using size_type = typename local_graph_type::size_type;
759 using col_index_type = typename local_graph_type::data_type;
760 using array_layout = typename local_graph_type::array_layout;
761 using memory_traits = typename local_graph_type::memory_traits;
762 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
763 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
764
765 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
766
767 const int intermediatePnnz = numRows - numDirichletNodes;
768 row_pointer_type rowPtr("intermediateP row pointer", numRows + 1);
769 col_indices_type colInd("intermediateP column indices", intermediatePnnz);
770 values_type values("intermediateP values", intermediatePnnz);
771 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
772 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
773
774 rowPtr_h(0) = 0;
775 for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
776 // Skip Dirichlet nodes
777 if(localVertex2AggID[rowIdx] == LO_INVALID) {
778 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
779 } else {
780 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
781 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
782 }
783 }
784
785 Kokkos::deep_copy(rowPtr, rowPtr_h);
786 Kokkos::deep_copy(colInd, colInd_h);
787 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
788
789 intermediateP = local_matrix_type("intermediateP",
790 numRows, numLocalAggregates, intermediatePnnz,
791 values, rowPtr, colInd);
792 } // BuildIntermediateProlongator
793
794 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
796 BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP,
797 typename Matrix::local_matrix_type& coarseA) const {
798 Monitor m(*this, "BuildCoarseLocalMatrix");
799
800 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
801 using values_type = typename local_matrix_type::values_type;
802 using size_type = typename local_graph_type::size_type;
803 using col_index_type = typename local_graph_type::data_type;
804 using array_layout = typename local_graph_type::array_layout;
805 using memory_traits = typename local_graph_type::memory_traits;
806 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
807 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
808
810 localSpGEMM(coarseA, intermediateP, "AP", AP);
811
812 // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle
813 // I am not sure but doing it for safety in case it stashes data from the previous
814 // spgemm computation...
815
816 // Compute Ac = Pt * AP
817 // Two steps needed:
818 // 1. compute Pt
819 // 2. perform multiplication
820
821 // Step 1 compute Pt
822 // Obviously this requires the same amount of storage as P except for the rowPtr
823 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"),
824 intermediateP.numCols() + 1);
825 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"),
826 intermediateP.nnz());
827 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"),
828 intermediateP.nnz());
829
830 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
831 typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
832 Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
833 Kokkos::deep_copy(rowPtrPt_h, 0);
834 for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
835 rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
836 }
837 for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
838 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
839 }
840 Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
841
842 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
843 Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
844 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
845 Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
846 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
847 Kokkos::deep_copy(valuesP_h, intermediateP.values);
848 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
849 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
850 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
851 Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
852
853 col_index_type colIdx = 0;
854 for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
855 for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
856 colIdx = entries_h(entryIdxP);
857 for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
858 if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
859 colIndPt_h(entryIdxPt) = rowIdx;
860 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
861 break;
862 }
863 } // Loop over entries in row of Pt
864 } // Loop over entries in row of P
865 } // Loop over rows of P
866
867 Kokkos::deep_copy(colIndPt, colIndPt_h);
868 Kokkos::deep_copy(valuesPt, valuesPt_h);
869
870
871 local_matrix_type intermediatePt("intermediatePt",
872 intermediateP.numCols(),
873 intermediateP.numRows(),
874 intermediateP.nnz(),
875 valuesPt, rowPtrPt, colIndPt);
876
877 // Create views for coarseA matrix
878 localSpGEMM(intermediatePt, AP, "coarseA", coarseA);
879 } // BuildCoarseLocalMatrix
880
881 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
883 localSpGEMM(const typename Matrix::local_matrix_type& A,
884 const typename Matrix::local_matrix_type& B,
885 const std::string matrixLabel,
886 typename Matrix::local_matrix_type& C) const {
887
888 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
889 using values_type = typename local_matrix_type::values_type;
890 using size_type = typename local_graph_type::size_type;
891 using col_index_type = typename local_graph_type::data_type;
892 using array_layout = typename local_graph_type::array_layout;
893 using memory_space = typename device_type::memory_space;
894 using memory_traits = typename local_graph_type::memory_traits;
895 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
896 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
897
898 // Options
899 int team_work_size = 16;
900 std::string myalg("SPGEMM_KK_MEMORY");
901 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
902 KokkosKernels::Experimental::KokkosKernelsHandle<typename row_pointer_type::const_value_type,
903 typename col_indices_type::const_value_type,
904 typename values_type::const_value_type,
906 memory_space,
907 memory_space> kh;
908 kh.create_spgemm_handle(alg_enum);
909 kh.set_team_work_size(team_work_size);
910
911 // Create views for AP matrix
912 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"),
913 A.numRows() + 1);
914 col_indices_type colIndC;
915 values_type valuesC;
916
917 // Symbolic multiplication
918 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
919 B.numRows(), B.numCols(),
920 A.graph.row_map, A.graph.entries, false,
921 B.graph.row_map, B.graph.entries, false,
922 rowPtrC);
923
924 // allocate column indices and values of AP
925 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
926 if (nnzC) {
927 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC);
928 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC);
929 }
930
931 // Numeric multiplication
932 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
933 B.numRows(), B.numCols(),
934 A.graph.row_map, A.graph.entries, A.values, false,
935 B.graph.row_map, B.graph.entries, B.values, false,
936 rowPtrC, colIndC, valuesC);
937 kh.destroy_spgemm_handle();
938
939 C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
940
941 } // localSpGEMM
942
943} //namespace MueLu
944
945#endif /* MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ */
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Container class for aggregation information.
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
Class that holds all level-specific information.
Timer to be used in non-factories.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void BuildInitialAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
void BuildFurtherAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
typename device_type::execution_space execution_space
typename Matrix::local_matrix_type local_matrix_type
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &currentLevel) const
Input.
void Build(Level &currentLevel) const
Build aggregates.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
virtual const Teuchos::ParameterList & GetParameterList() const
static RCP< Xpetra::Vector< DefaultLocalOrdinal, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > > CuthillMcKee(const Matrix &Op)
static RCP< Xpetra::Vector< Magnitude, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
Namespace for MueLu classes and methods.
@ Statistics0
Print statistics that do not involve significant additional computation.