MueLu  Version of the Day
MueLu_NotayAggregationFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
47 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
48 
49 #include <Xpetra_Map.hpp>
50 #include <Xpetra_Vector.hpp>
51 #include <Xpetra_MultiVectorFactory.hpp>
52 #include <Xpetra_MapFactory.hpp>
53 #include <Xpetra_VectorFactory.hpp>
54 
55 #include "KokkosKernels_Handle.hpp"
56 #include "KokkosSparse_spgemm.hpp"
57 #include "KokkosSparse_spmv.hpp"
58 
60 
61 #include "MueLu_Aggregates.hpp"
62 #include "MueLu_GraphBase.hpp"
63 #include "MueLu_Level.hpp"
64 #include "MueLu_MasterList.hpp"
65 #include "MueLu_Monitor.hpp"
66 #include "MueLu_Types.hpp"
67 #include "MueLu_Utilities.hpp"
68 
69 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
70 #include "MueLu_Utilities_kokkos.hpp"
71 #endif
72 
73 namespace MueLu {
74 
75  namespace NotayUtils {
76  template <class LocalOrdinal>
78  return min + as<LocalOrdinal>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
79  }
80 
81  template <class LocalOrdinal>
82  void RandomReorder(Teuchos::Array<LocalOrdinal> & list) {
83  typedef LocalOrdinal LO;
84  LO n = Teuchos::as<LO>(list.size());
85  for(LO i = 0; i < n-1; i++)
86  std::swap(list[i], list[RandomOrdinal(i,n-1)]);
87  }
88  }
89 
90  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
92  RCP<ParameterList> validParamList = rcp(new ParameterList());
93 
94  typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
95 
96 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
97  SET_VALID_ENTRY("aggregation: pairwise: size");
98  SET_VALID_ENTRY("aggregation: pairwise: tie threshold");
99  SET_VALID_ENTRY("aggregation: compute aggregate qualities");
100  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
101  SET_VALID_ENTRY("aggregation: ordering");
102 #undef SET_VALID_ENTRY
103 
104  // general variables needed in AggregationFactory
105  validParamList->set< RCP<const FactoryBase> >("A", null, "Generating factory of the matrix");
106  validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
107  validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
108  validParamList->set< RCP<const FactoryBase> >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'");
109 
110 
111  return validParamList;
112  }
113 
114  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
116  const ParameterList& pL = GetParameterList();
117 
118  Input(currentLevel, "A");
119  Input(currentLevel, "Graph");
120  Input(currentLevel, "DofsPerNode");
121  if (pL.get<bool>("aggregation: compute aggregate qualities")) {
122  Input(currentLevel, "AggregateQualities");
123  }
124 
125 
126  }
127 
128 
129  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
131  FactoryMonitor m(*this, "Build", currentLevel);
132  using STS = Teuchos::ScalarTraits<Scalar>;
133  using MT = typename STS::magnitudeType;
134 
135  const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
136 
137  RCP<Teuchos::FancyOStream> out;
138  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
139  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
140  out->setShowAllFrontMatter(false).setShowProcRank(true);
141  } else {
142  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
143  }
144 
145  const ParameterList& pL = GetParameterList();
146 
147  const MT kappa = static_cast<MT>(pL.get<double>("aggregation: Dirichlet threshold"));
148  TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
150  "Pairwise requires kappa > 2"
151  " otherwise all rows are considered as Dirichlet rows.");
152 
153  // Parameters
154  int maxNumIter = 3;
155  if (pL.isParameter("aggregation: pairwise: size"))
156  maxNumIter = pL.get<int>("aggregation: pairwise: size");
157  TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
159  "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
160  " must be a strictly positive integer");
161 
162 
163  RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph");
164  RCP<const Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
165  local_matrix_type localA = A->getLocalMatrix();
166 
167  // Setup aggregates & aggStat objects
168  RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
169  aggregates->setObjectLabel("PW");
170 
171  const LO numRows = graph->GetNodeNumVertices();
172 
173  // construct aggStat information
174  std::vector<unsigned> aggStat(numRows, READY);
175 
176 
177  const int DofsPerNode = Get<int>(currentLevel,"DofsPerNode");
178  TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError,
179  "Pairwise only supports one dof per node");
180 
181  // This follows the paper:
182  // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations",
183  // SISC 34(3), pp. A2288-2316.
184 
185  // Handle Ordering
186  std::string orderingStr = pL.get<std::string>("aggregation: ordering");
187  enum {
188  O_NATURAL,
189  O_RANDOM,
190  O_CUTHILL_MCKEE,
191  } ordering;
192 
193  ordering = O_NATURAL;
194  if (orderingStr == "random" ) ordering = O_RANDOM;
195  else if(orderingStr == "natural") {}
196 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
197  else if(orderingStr == "cuthill-mckee" || orderingStr == "cm") ordering = O_CUTHILL_MCKEE;
198 #endif
199  else {
200  TEUCHOS_TEST_FOR_EXCEPTION(1,Exceptions::RuntimeError,"Invalid ordering type");
201  }
202 
203  // Get an ordering vector
204  // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns
205  // will get ignored in the aggregation phases, so we don't need to worry about
206  // running off the end.
207  Array<LO> orderingVector(numRows);
208  for (LO i = 0; i < numRows; i++)
209  orderingVector[i] = i;
210  if (ordering == O_RANDOM)
211  MueLu::NotayUtils::RandomReorder(orderingVector);
212 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
213  else if (ordering == O_CUTHILL_MCKEE) {
214  RCP<Xpetra::Vector<LO,LO,GO,NO> > rcmVector = MueLu::Utilities_kokkos<SC,LO,GO,NO>::CuthillMcKee(*A);
215  auto localVector = rcmVector->getData(0);
216  for (LO i = 0; i < numRows; i++)
217  orderingVector[i] = localVector[i];
218  }
219 #endif
220 
221  // Get the party stated
222  LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
223  BuildInitialAggregates(pL, A, orderingVector(), kappa,
224  *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
225  TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError,
226  "Initial pairwise aggregation failed to aggregate all nodes");
227  LO numLocalAggregates = aggregates->GetNumAggregates();
228  GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - "
229  << A->getNodeNumRows() / numLocalAggregates << std::endl;
230 
231  // Temporary data storage for further aggregation steps
232  local_matrix_type intermediateP;
233  local_matrix_type coarseLocalA;
234 
235  // Compute the on rank part of the local matrix
236  // that the square submatrix that only contains
237  // columns corresponding to local rows.
238  LO numLocalDirichletNodes = numDirichletNodes;
239  Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
240  BuildOnRankLocalMatrix(A->getLocalMatrix(), coarseLocalA);
241  for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
242  // Compute the intermediate prolongator
243  BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
244  localVertex2AggId(), intermediateP);
245 
246  // Compute the coarse local matrix and coarse row sum
247  BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
248 
249  // Directly compute rowsum from A, rather than coarseA
250  row_sum_type rowSum("rowSum", numLocalAggregates);
251  {
252  std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
253  auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
254  for(LO i=0; i<(LO)numRows; i++) {
255  if(aggStat[i] != AGGREGATED)
256  continue;
257  LO agg=vertex2AggId[i];
258  agg2vertex[agg].push_back(i);
259  }
260 
261  typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
262  for(LO i = 0; i < numRows; i++) {
263  // If not aggregated already, skip this guy
264  if(aggStat[i] != AGGREGATED)
265  continue;
266  int agg = vertex2AggId[i];
267  std::vector<LO> & myagg = agg2vertex[agg];
268 
269  size_t nnz = A->getNumEntriesInLocalRow(i);
270  ArrayView<const LO> indices;
271  ArrayView<const SC> vals;
272  A->getLocalRowView(i, indices, vals);
273 
274  SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
275  for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
276  bool found = false;
277  if(indices[colidx] < numRows) {
278  for(LO j=0; j<(LO)myagg.size(); j++)
279  if (vertex2AggId[indices[colidx]] == agg)
280  found=true;
281  }
282  if(!found) {
283  *out << "- ADDING col "<<indices[colidx]<<" = "<<vals[colidx] << std::endl;
284  mysum += vals[colidx];
285  }
286  else {
287  *out << "- NOT ADDING col "<<indices[colidx]<<" = "<<vals[colidx] << std::endl;
288  }
289  }
290 
291  rowSum_h[agg] = mysum;
292  }
293  Kokkos::deep_copy(rowSum, rowSum_h);
294  }
295 
296  // Get local orderingVector
297  Array<LO> localOrderingVector(numRows);
298  for (LO i = 0; i < numRows; i++)
299  localOrderingVector[i] = i;
300  if (ordering == O_RANDOM)
301  MueLu::NotayUtils::RandomReorder(localOrderingVector);
302 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
303  else if (ordering == O_CUTHILL_MCKEE) {
304  RCP<Xpetra::Vector<LO,LO,GO,NO> > rcmVector = MueLu::Utilities_kokkos<SC,LO,GO,NO>::CuthillMcKee(*A);
305  auto localVector = rcmVector->getData(0);
306  for (LO i = 0; i < numRows; i++)
307  localOrderingVector[i] = localVector[i];
308  }
309 #endif
310 
311  // Compute new aggregates
312  numLocalAggregates = 0;
313  numNonAggregatedNodes = static_cast<LO>(coarseLocalA.numRows());
314  std::vector<LO> localAggStat(numNonAggregatedNodes, READY);
315  localVertex2AggId.resize(numNonAggregatedNodes, -1);
316  BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
317  localAggStat, localVertex2AggId,
318  numLocalAggregates, numNonAggregatedNodes);
319 
320  // After the first initial pairwise aggregation
321  // the Dirichlet nodes have been removed.
322  numLocalDirichletNodes = 0;
323 
324  // Update the aggregates
325  RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
326  ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
327  for(size_t vertexIdx = 0; vertexIdx < A->getNodeNumRows(); ++vertexIdx) {
328  LO oldAggIdx = vertex2AggId[vertexIdx];
329  if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
330  vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
331  }
332  }
333 
334  // We could probably print some better statistics at some point
335  GetOStream(Statistics0) << "Iter " << aggregationIter << ": " << numLocalAggregates << " - "
336  << A->getNodeNumRows() / numLocalAggregates << std::endl;
337  }
338  aggregates->SetNumAggregates(numLocalAggregates);
339  aggregates->AggregatesCrossProcessors(false);
340  aggregates->ComputeAggregateSizes(true/*forceRecompute*/);
341 
342  // DO stuff
343  Set(currentLevel, "Aggregates", aggregates);
344  GetOStream(Statistics0) << aggregates->description() << std::endl;
345  }
346 
347 
348  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
350  BuildInitialAggregates(const Teuchos::ParameterList& params,
351  const RCP<const Matrix>& A,
352  const Teuchos::ArrayView<const LO> & orderingVector,
353  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
354  Aggregates& aggregates,
355  std::vector<unsigned>& aggStat,
356  LO& numNonAggregatedNodes,
357  LO& numDirichletNodes) const {
358 
359  Monitor m(*this, "BuildInitialAggregates");
360  using STS = Teuchos::ScalarTraits<Scalar>;
361  using MT = typename STS::magnitudeType;
362  using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
363 
364  RCP<Teuchos::FancyOStream> out;
365  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
366  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
367  out->setShowAllFrontMatter(false).setShowProcRank(true);
368  } else {
369  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
370  }
371 
372 
373  const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
374  const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
375  const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
376  const MT MT_TWO = MT_ONE + MT_ONE;
377  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
378  const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
379 
380  const MT kappa_init = kappa / (kappa - MT_TWO);
381  const LO numRows = aggStat.size();
382  const int myRank = A->getMap()->getComm()->getRank();
383 
384  // For finding "ties" where we fall back to the ordering. Making this larger than
385  // hard zero substantially increases code robustness.
386  double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
387  double tie_less = 1.0 - tie_criterion;
388  double tie_more = 1.0 + tie_criterion;
389 
390  // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(),
391  // and so we're not doing again here.
392  // This should probably be fixed at some point.
393 
394  // Extract diagonal, rowsums, etc
395  // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
398  RCP<RealValuedVector> ghostedAbsRowSum = MueLu::Utilities<SC,LO,GO,NO>::GetMatrixOverlappedAbsDeletedRowsum(*A);
399  const ArrayRCP<const SC> D = ghostedDiag->getData(0);
400  const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
401  const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
402 
403  // Aggregates stuff
404  ArrayRCP<LO> vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0);
405  ArrayRCP<LO> procWinner_rcp = aggregates.GetProcWinner() ->getDataNonConst(0);
406  ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
407  ArrayView<LO> procWinner = procWinner_rcp();
408 
409  // Algorithm 4.2
410 
411  // 0,1 : Initialize: Flag boundary conditions
412  // Modification: We assume symmetry here aij = aji
413  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
414  MT aii = STS::magnitude(D[row]);
415  MT rowsum = AbsRs[row];
416 
417  if(aii >= kappa_init * rowsum) {
418  *out << "Flagging index " << row << " as dirichlet "
419  "aii >= kappa*rowsum = " << aii << " >= " << kappa_init << " " << rowsum << std::endl;
420  aggStat[row] = IGNORED;
421  --numNonAggregatedNodes;
422  ++numDirichletNodes;
423  }
424  }
425 
426 
427  // 2 : Iteration
428  LO aggIndex = LO_ZERO;
429  for(LO i = 0; i < numRows; i++) {
430  LO current_idx = orderingVector[i];
431  // If we're aggregated already, skip this guy
432  if(aggStat[current_idx] != READY)
433  continue;
434 
435  MT best_mu = MT_ZERO;
436  LO best_idx = LO_INVALID;
437 
438  size_t nnz = A->getNumEntriesInLocalRow(current_idx);
439  ArrayView<const LO> indices;
440  ArrayView<const SC> vals;
441  A->getLocalRowView(current_idx, indices, vals);
442 
443  MT aii = STS::real(D[current_idx]);
444  MT si = STS::real(S[current_idx]);
445  for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
446  // Skip aggregated neighbors, off-rank neighbors, hard zeros and self
447  LO col = indices[colidx];
448  SC val = vals[colidx];
449  if(current_idx == col || aggStat[col] != READY || col > numRows || val == SC_ZERO)
450  continue;
451 
452  MT aij = STS::real(val);
453  MT ajj = STS::real(D[col]);
454  MT sj = - STS::real(S[col]); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
455  if(aii - si + ajj - sj >= MT_ZERO) {
456  // Modification: We assume symmetry here aij = aji
457  MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
458  MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
459  MT mu = mu_top / mu_bottom;
460 
461  // Modification: Explicitly check the tie criterion here
462  if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
463  (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
464  best_mu = mu;
465  best_idx = col;
466  *out << "[" << current_idx << "] Column UPDATED " << col << ": "
467  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
468  << " = " << aii - si + ajj - sj<< ", aij = "<<aij << ", mu = " << mu << std::endl;
469  }
470  else {
471  *out << "[" << current_idx << "] Column NOT UPDATED " << col << ": "
472  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
473  << " = " << aii - si + ajj - sj << ", aij = "<<aij<< ", mu = " << mu << std::endl;
474  }
475  }
476  else {
477  *out << "[" << current_idx << "] Column FAILED " << col << ": "
478  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
479  << " = " << aii - si + ajj - sj << std::endl;
480  }
481  }// end column for loop
482 
483  if(best_idx == LO_INVALID) {
484  *out << "No node buddy found for index " << current_idx
485  << " [agg " << aggIndex << "]\n" << std::endl;
486  // We found no potential node-buddy, so let's just make this a singleton
487  // NOTE: The behavior of what to do if you have no un-aggregated neighbors is not specified in
488  // the paper
489 
490  aggStat[current_idx] = ONEPT;
491  vertex2AggId[current_idx] = aggIndex;
492  procWinner[current_idx] = myRank;
493  numNonAggregatedNodes--;
494  aggIndex++;
495 
496  } else {
497  // We have a buddy, so aggregate, either as a singleton or as a pair, depending on mu
498  if(best_mu <= kappa) {
499  *out << "Node buddies (" << current_idx << "," << best_idx << ") [agg " << aggIndex << "]" << std::endl;
500 
501  aggStat[current_idx] = AGGREGATED;
502  aggStat[best_idx] = AGGREGATED;
503  vertex2AggId[current_idx] = aggIndex;
504  vertex2AggId[best_idx] = aggIndex;
505  procWinner[current_idx] = myRank;
506  procWinner[best_idx] = myRank;
507  numNonAggregatedNodes-=2;
508  aggIndex++;
509 
510  } else {
511  *out << "No buddy found for index " << current_idx << ","
512  " but aggregating as singleton [agg " << aggIndex << "]" << std::endl;
513 
514  aggStat[current_idx] = ONEPT;
515  vertex2AggId[current_idx] = aggIndex;
516  procWinner[current_idx] = myRank;
517  numNonAggregatedNodes--;
518  aggIndex++;
519  } // best_mu
520  } // best_idx
521  }// end Algorithm 4.2
522 
523  *out << "vertex2aggid :";
524  for(int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
525  *out << i << "(" << vertex2AggId[i] << ")";
526  }
527  *out << std::endl;
528 
529  // update aggregate object
530  aggregates.SetNumAggregates(aggIndex);
531  } // BuildInitialAggregates
532 
533  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
535  BuildFurtherAggregates(const Teuchos::ParameterList& params,
536  const RCP<const Matrix>& A,
537  const Teuchos::ArrayView<const LO> & orderingVector,
538  const typename Matrix::local_matrix_type& coarseA,
539  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
540  const Kokkos::View<typename Kokkos::ArithTraits<Scalar>::val_type*,
542  typename Matrix::local_matrix_type::device_type>& rowSum,
543  std::vector<LocalOrdinal>& localAggStat,
544  Teuchos::Array<LocalOrdinal>& localVertex2AggID,
545  LO& numLocalAggregates,
546  LO& numNonAggregatedNodes) const {
547  Monitor m(*this, "BuildFurtherAggregates");
548 
549  // Set debug outputs based on environment variable
550  RCP<Teuchos::FancyOStream> out;
551  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
552  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
553  out->setShowAllFrontMatter(false).setShowProcRank(true);
554  } else {
555  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
556  }
557 
558  using value_type = typename local_matrix_type::value_type;
559  const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
560  const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
561  const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
562  const magnitude_type MT_two = MT_one + MT_one;
563  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
564 
565  // For finding "ties" where we fall back to the ordering. Making this larger than
566  // hard zero substantially increases code robustness.
567  double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
568  double tie_less = 1.0 - tie_criterion;
569  double tie_more = 1.0 + tie_criterion;
570 
571  typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
572  Kokkos::deep_copy(rowSum_h, rowSum);
573 
574  // Extracting the diagonal of a KokkosSparse::CrsMatrix
575  // is not currently provided in kokkos-kernels so here
576  // is an ugly way to get that done...
577  const LO numRows = static_cast<LO>(coarseA.numRows());
578  typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows);
579  typename local_matrix_type::row_map_type::HostMirror row_map_h
580  = Kokkos::create_mirror_view(coarseA.graph.row_map);
581  Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
582  typename local_matrix_type::index_type::HostMirror entries_h
583  = Kokkos::create_mirror_view(coarseA.graph.entries);
584  Kokkos::deep_copy(entries_h, coarseA.graph.entries);
585  typename local_matrix_type::values_type::HostMirror values_h
586  = Kokkos::create_mirror_view(coarseA.values);
587  Kokkos::deep_copy(values_h, coarseA.values);
588  for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
589  for(LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
590  entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
591  ++entryIdx) {
592  if(rowIdx == static_cast<LO>(entries_h(entryIdx))) {
593  diagA_h(rowIdx) = values_h(entryIdx);
594  }
595  }
596  }
597 
598  for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
599  if(localAggStat[currentIdx] != READY) {
600  continue;
601  }
602 
603  LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
604  magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
605  const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
606  const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
607  for(auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
608  const LO colIdx = static_cast<LO>(entries_h(entryIdx));
609  if(currentIdx == colIdx || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero || colIdx > numRows) {
610  continue;
611  }
612 
613  const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
614  const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
615  const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx)); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
616  if(aii - si + ajj - sj >= MT_zero) {
617  const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
618  const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
619  const magnitude_type mu = mu_top / mu_bottom;
620 
621  // Modification: Explicitly check the tie criterion here
622  if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
623  (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
624  best_mu = mu;
625  bestIdx = colIdx;
626  *out << "[" << currentIdx << "] Column UPDATED " << colIdx << ": "
627  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
628  << " = " << aii - si + ajj - sj << ", aij = "<<aij<<" mu = " << mu << std::endl;
629  }
630  else {
631  *out << "[" << currentIdx << "] Column NOT UPDATED " << colIdx << ": "
632  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
633  << " = " << aii - si + ajj - sj << ", aij = "<<aij<<", mu = " << mu << std::endl;
634  }
635  } else {
636  *out << "[" << currentIdx << "] Column FAILED " << colIdx << ": "
637  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
638  << " = " << aii - si + ajj - sj << std::endl;
639  }
640  } // end loop over row entries
641 
642  if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
643  localAggStat[currentIdx] = ONEPT;
644  localVertex2AggID[currentIdx] = numLocalAggregates;
645  --numNonAggregatedNodes;
646  ++numLocalAggregates;
647  } else {
648  if(best_mu <= kappa) {
649  *out << "Node buddies (" << currentIdx << "," << bestIdx << ") [agg " << numLocalAggregates << "]" << std::endl;
650 
651  localAggStat[currentIdx] = AGGREGATED;
652  localVertex2AggID[currentIdx] = numLocalAggregates;
653  --numNonAggregatedNodes;
654 
655  localAggStat[bestIdx] = AGGREGATED;
656  localVertex2AggID[bestIdx] = numLocalAggregates;
657  --numNonAggregatedNodes;
658 
659  ++numLocalAggregates;
660  } else {
661  *out << "No buddy found for index " << currentIdx << ","
662  " but aggregating as singleton [agg " << numLocalAggregates << "]" << std::endl;
663 
664  localAggStat[currentIdx] = ONEPT;
665  localVertex2AggID[currentIdx] = numLocalAggregates;
666  --numNonAggregatedNodes;
667  ++numLocalAggregates;
668  }
669  }
670  } // end loop over matrix rows
671 
672  } // BuildFurtherAggregates
673 
674  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
676  BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA,
677  typename Matrix::local_matrix_type& onrankA) const {
678  Monitor m(*this, "BuildOnRankLocalMatrix");
679 
680  // Set debug outputs based on environment variable
681  RCP<Teuchos::FancyOStream> out;
682  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
683  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
684  out->setShowAllFrontMatter(false).setShowProcRank(true);
685  } else {
686  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
687  }
688 
689  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
690  using values_type = typename local_matrix_type::values_type;
691  using size_type = typename local_graph_type::size_type;
692  using col_index_type = typename local_graph_type::data_type;
693  using array_layout = typename local_graph_type::array_layout;
694  using memory_traits = typename local_graph_type::memory_traits;
697  // Extract on rank part of A
698  // Simply check that the column index is less than the number of local rows
699  // otherwise remove it.
700 
701  const int numRows = static_cast<int>(localA.numRows());
702  row_pointer_type rowPtr("onrankA row pointer", numRows + 1);
703  typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
704  typename local_graph_type::row_map_type::HostMirror origRowPtr_h
705  = Kokkos::create_mirror_view(localA.graph.row_map);
706  typename local_graph_type::entries_type::HostMirror origColind_h
707  = Kokkos::create_mirror_view(localA.graph.entries);
708  typename values_type::HostMirror origValues_h
709  = Kokkos::create_mirror_view(localA.values);
710  Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
711  Kokkos::deep_copy(origColind_h, localA.graph.entries);
712  Kokkos::deep_copy(origValues_h, localA.values);
713 
714  // Compute the number of nnz entries per row
715  rowPtr_h(0) = 0;
716  for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
717  for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
718  if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
719  }
720  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
721  }
722  Kokkos::deep_copy(rowPtr, rowPtr_h);
723 
724  const LO nnzOnrankA = rowPtr_h(numRows);
725 
726  // Now use nnz per row to allocate matrix views and store column indices and values
727  col_indices_type colInd("onrankA column indices", rowPtr_h(numRows));
728  values_type values("onrankA values", rowPtr_h(numRows));
729  typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
730  typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
731  int entriesInRow;
732  for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
733  entriesInRow = 0;
734  for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
735  if(origColind_h(entryIdx) < numRows) {
736  colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
737  values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
738  ++entriesInRow;
739  }
740  }
741  }
742  Kokkos::deep_copy(colInd, colInd_h);
743  Kokkos::deep_copy(values, values_h);
744 
745  onrankA = local_matrix_type("onrankA", numRows, numRows,
746  nnzOnrankA, values, rowPtr, colInd);
747  }
748 
749  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
752  const LocalOrdinal numDirichletNodes,
753  const LocalOrdinal numLocalAggregates,
754  const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
755  typename Matrix::local_matrix_type& intermediateP) const {
756  Monitor m(*this, "BuildIntermediateProlongator");
757 
758  // Set debug outputs based on environment variable
759  RCP<Teuchos::FancyOStream> out;
760  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
761  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
762  out->setShowAllFrontMatter(false).setShowProcRank(true);
763  } else {
764  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
765  }
766 
767  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
768  using values_type = typename local_matrix_type::values_type;
769  using size_type = typename local_graph_type::size_type;
770  using col_index_type = typename local_graph_type::data_type;
771  using array_layout = typename local_graph_type::array_layout;
772  using memory_traits = typename local_graph_type::memory_traits;
775 
776  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
777 
778  const int intermediatePnnz = numRows - numDirichletNodes;
779  row_pointer_type rowPtr("intermediateP row pointer", numRows + 1);
780  col_indices_type colInd("intermediateP column indices", intermediatePnnz);
781  values_type values("intermediateP values", intermediatePnnz);
782  typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
783  typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
784 
785  rowPtr_h(0) = 0;
786  for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
787  // Skip Dirichlet nodes
788  if(localVertex2AggID[rowIdx] == LO_INVALID) {
789  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
790  } else {
791  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
792  colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
793  }
794  }
795 
796  Kokkos::deep_copy(rowPtr, rowPtr_h);
797  Kokkos::deep_copy(colInd, colInd_h);
798  Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
799 
800  intermediateP = local_matrix_type("intermediateP",
801  numRows, numLocalAggregates, intermediatePnnz,
802  values, rowPtr, colInd);
803  } // BuildIntermediateProlongator
804 
805  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
807  BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP,
808  typename Matrix::local_matrix_type& coarseA) const {
809  Monitor m(*this, "BuildCoarseLocalMatrix");
810 
811  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
812  using values_type = typename local_matrix_type::values_type;
813  using size_type = typename local_graph_type::size_type;
814  using col_index_type = typename local_graph_type::data_type;
815  using array_layout = typename local_graph_type::array_layout;
816  using memory_traits = typename local_graph_type::memory_traits;
819 
821  localSpGEMM(coarseA, intermediateP, "AP", AP);
822 
823  // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle
824  // I am not sure but doing it for safety in case it stashes data from the previous
825  // spgemm computation...
826 
827  // Compute Ac = Pt * AP
828  // Two steps needed:
829  // 1. compute Pt
830  // 2. perform multiplication
831 
832  // Step 1 compute Pt
833  // Obviously this requires the same amount of storage as P except for the rowPtr
834  row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"),
835  intermediateP.numCols() + 1);
836  col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"),
837  intermediateP.nnz());
838  values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"),
839  intermediateP.nnz());
840 
841  typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
842  Kokkos::deep_copy(rowPtrPt_h, 0);
843  for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
844  rowPtrPt_h(intermediateP.graph.entries(entryIdx) + 1) += 1;
845  }
846  for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
847  rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
848  }
849  Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
850 
851  typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
852  Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
853  typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
854  Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
855  typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
856  Kokkos::deep_copy(valuesP_h, intermediateP.values);
857  typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
858  typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
859  const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
860  Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
861 
862  col_index_type colIdx = 0;
863  for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
864  for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
865  colIdx = intermediateP.graph.entries(entryIdxP);
866  for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
867  if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
868  colIndPt_h(entryIdxPt) = rowIdx;
869  valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
870  break;
871  }
872  } // Loop over entries in row of Pt
873  } // Loop over entries in row of P
874  } // Loop over rows of P
875 
876  Kokkos::deep_copy(colIndPt, colIndPt_h);
877  Kokkos::deep_copy(valuesPt, valuesPt_h);
878 
879  local_matrix_type intermediatePt("intermediatePt",
880  intermediateP.numCols(),
881  intermediateP.numRows(),
882  intermediateP.nnz(),
883  valuesPt, rowPtrPt, colIndPt);
884 
885  // Create views for coarseA matrix
886  localSpGEMM(intermediatePt, AP, "coarseA", coarseA);
887  } // BuildCoarseLocalMatrix
888 
889  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
891  localSpGEMM(const typename Matrix::local_matrix_type& A,
892  const typename Matrix::local_matrix_type& B,
893  const std::string matrixLabel,
894  typename Matrix::local_matrix_type& C) const {
895 
896  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
897  using values_type = typename local_matrix_type::values_type;
898  using size_type = typename local_graph_type::size_type;
899  using col_index_type = typename local_graph_type::data_type;
900  using array_layout = typename local_graph_type::array_layout;
901  using memory_space = typename device_type::memory_space;
902  using memory_traits = typename local_graph_type::memory_traits;
905 
906  // Options
907  int team_work_size = 16;
908  std::string myalg("SPGEMM_KK_MEMORY");
909  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
910  KokkosKernels::Experimental::KokkosKernelsHandle<typename row_pointer_type::const_value_type,
911  typename col_indices_type::const_value_type,
912  typename values_type::const_value_type,
914  memory_space,
915  memory_space> kh;
916  kh.create_spgemm_handle(alg_enum);
917  kh.set_team_work_size(team_work_size);
918 
919  // Create views for AP matrix
920  row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"),
921  A.numRows() + 1);
922  col_indices_type colIndC;
923  values_type valuesC;
924 
925  // Symbolic multiplication
926  KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
927  B.numRows(), B.numCols(),
928  A.graph.row_map, A.graph.entries, false,
929  B.graph.row_map, B.graph.entries, false,
930  rowPtrC);
931 
932  // allocate column indices and values of AP
933  size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
934  if (nnzC) {
935  colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC);
936  valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC);
937  }
938 
939  // Numeric multiplication
940  KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
941  B.numRows(), B.numCols(),
942  A.graph.row_map, A.graph.entries, A.values, false,
943  B.graph.row_map, B.graph.entries, B.values, false,
944  rowPtrC, colIndC, valuesC);
945  kh.destroy_spgemm_handle();
946 
947  C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
948 
949  } // localSpGEMM
950 
951 } //namespace MueLu
952 
953 #endif /* MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ */
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Container class for aggregation information.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in non-factories.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void BuildInitialAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
void BuildFurtherAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
typename device_type::execution_space execution_space
typename Matrix::local_matrix_type local_matrix_type
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &currentLevel) const
Input.
void Build(Level &currentLevel) const
Build aggregates.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
static RCP< Xpetra::Vector< Magnitude, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
void deep_copy(const View< DT, DP... > &dst, typename ViewTraits< DT, DP... >::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP... >::specialize, void >::value >::type *=nullptr)
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
Namespace for MueLu classes and methods.
@ Statistics0
Print statistics that do not involve significant additional computation.
@ AGGREGATED
Definition: MueLu_Types.hpp:73
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)