MueLu  Version of the Day
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
48 
49 #include <Xpetra_CrsGraphFactory.hpp>
50 #include <Xpetra_CrsGraph.hpp>
51 #include <Xpetra_ImportFactory.hpp>
52 #include <Xpetra_MapFactory.hpp>
53 #include <Xpetra_Map.hpp>
54 #include <Xpetra_Matrix.hpp>
55 #include <Xpetra_MultiVectorFactory.hpp>
56 #include <Xpetra_MultiVector.hpp>
57 #include <Xpetra_StridedMap.hpp>
58 #include <Xpetra_VectorFactory.hpp>
59 #include <Xpetra_Vector.hpp>
60 
62 
63 #include "MueLu_AmalgamationFactory.hpp"
64 #include "MueLu_AmalgamationInfo.hpp"
65 #include "MueLu_Exceptions.hpp"
66 #include "MueLu_GraphBase.hpp"
67 #include "MueLu_Graph.hpp"
68 #include "MueLu_Level.hpp"
69 #include "MueLu_LWGraph.hpp"
70 #include "MueLu_MasterList.hpp"
71 #include "MueLu_Monitor.hpp"
72 #include "MueLu_PreDropFunctionBaseClass.hpp"
73 #include "MueLu_PreDropFunctionConstVal.hpp"
74 #include "MueLu_Utilities.hpp"
75 
76 #include <algorithm>
77 #include <cstdlib>
78 #include <string>
79 
80 // If defined, read environment variables.
81 // Should be removed once we are confident that this works.
82 //#define DJS_READ_ENV_VARIABLES
83 
84 namespace MueLu {
85 
86  namespace Details {
87  template<class real_type, class LO>
88  struct DropTol {
89 
90  DropTol() = default;
91  DropTol(DropTol const&) = default;
92  DropTol(DropTol &&) = default;
93 
94  DropTol& operator=(DropTol const&) = default;
95  DropTol& operator=(DropTol &&) = default;
96 
97  DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
98  : val{val_}, diag{diag_}, col{col_}, drop{drop_}
99  {}
100 
101  real_type val {Teuchos::ScalarTraits<real_type>::zero()};
102  real_type diag {Teuchos::ScalarTraits<real_type>::zero()};
103  LO col {Teuchos::OrdinalTraits<LO>::invalid()};
104  bool drop {true};
105  };
106  }
107 
108 
109  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
111  RCP<ParameterList> validParamList = rcp(new ParameterList());
112 
113 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
114  SET_VALID_ENTRY("aggregation: drop tol");
115  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
116  SET_VALID_ENTRY("aggregation: drop scheme");
117  {
118  typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
119  validParamList->getEntry("aggregation: drop scheme").setValidator(
120  rcp(new validatorType(Teuchos::tuple<std::string>("classical", "distance laplacian"), "aggregation: drop scheme")));
121  }
122  SET_VALID_ENTRY("aggregation: distance laplacian algo");
123  SET_VALID_ENTRY("aggregation: classical algo");
124 #undef SET_VALID_ENTRY
125  validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access");
126 
127  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
128  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
129  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for Coordinates");
130 
131  return validParamList;
132  }
133 
134  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
136 
137  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
139  Input(currentLevel, "A");
140  Input(currentLevel, "UnAmalgamationInfo");
141 
142  const ParameterList& pL = GetParameterList();
143  if (pL.get<bool>("lightweight wrap") == true) {
144  if (pL.get<std::string>("aggregation: drop scheme") == "distance laplacian")
145  Input(currentLevel, "Coordinates");
146 
147  }
148  }
149 
150  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
152 
153  FactoryMonitor m(*this, "Build", currentLevel);
154 
155  typedef Teuchos::ScalarTraits<SC> STS;
156  typedef typename STS::magnitudeType real_type;
157  typedef Xpetra::MultiVector<real_type,LO,GO,NO> RealValuedMultiVector;
158 
159  if (predrop_ != Teuchos::null)
160  GetOStream(Parameters0) << predrop_->description();
161 
162  RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
163  RCP<AmalgamationInfo> amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
164 
165  const ParameterList & pL = GetParameterList();
166  bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
167 
168  GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
169 
170  // decide wether to use the fast-track code path for standard maps or the somewhat slower
171  // code path for non-standard maps
172  /*bool bNonStandardMaps = false;
173  if (A->IsView("stridedMaps") == true) {
174  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
175  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
176  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
177  if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
178  bNonStandardMaps = true;
179  }*/
180 
181  if (doExperimentalWrap) {
182  std::string algo = pL.get<std::string>("aggregation: drop scheme");
183 
184  TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
185  TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian)");
186 
187  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
188  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
189  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
190  real_type realThreshold = STS::magnitude(threshold);// CMS: Rename this to "magnitude threshold" sometime
192  // Remove this bit once we are confident that cut-based dropping works.
193 #ifdef HAVE_MUELU_DEBUG
194  int distanceLaplacianCutVerbose = 0;
195 #endif
196 #ifdef DJS_READ_ENV_VARIABLES
197  if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
198  distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
199  }
200 
201  if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
202  auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
203  realThreshold = 1e-4*tmp;
204  }
205 
206 # ifdef HAVE_MUELU_DEBUG
207  if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
208  distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
209  }
210 # endif
211 #endif
213 
214  enum decisionAlgoType {defaultAlgo, unscaled_cut, scaled_cut};
215 
216  decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
217  decisionAlgoType classicalAlgo = defaultAlgo;
218  if (algo == "distance laplacian") {
219  if (distanceLaplacianAlgoStr == "default")
220  distanceLaplacianAlgo = defaultAlgo;
221  else if (distanceLaplacianAlgoStr == "unscaled cut")
222  distanceLaplacianAlgo = unscaled_cut;
223  else if (distanceLaplacianAlgoStr == "scaled cut")
224  distanceLaplacianAlgo = scaled_cut;
225  else
226  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
227  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
228  } else if (algo == "classical") {
229  if (classicalAlgoStr == "default")
230  classicalAlgo = defaultAlgo;
231  else if (classicalAlgoStr == "unscaled cut")
232  classicalAlgo = unscaled_cut;
233  else if (classicalAlgoStr == "scaled cut")
234  classicalAlgo = scaled_cut;
235  else
236  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
237  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
238 
239  } else
240  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
241  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
242 
243  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
244 
245  GO numDropped = 0, numTotal = 0;
246  std::string graphType = "unamalgamated"; //for description purposes only
247  if (algo == "classical") {
248  if (predrop_ == null) {
249  // ap: this is a hack: had to declare predrop_ as mutable
250  predrop_ = rcp(new PreDropFunctionConstVal(threshold));
251  }
252 
253  if (predrop_ != null) {
254  RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
255  TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
256  "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
257  // If a user provided a predrop function, it overwrites the XML threshold parameter
258  SC newt = predropConstVal->GetThreshold();
259  if (newt != threshold) {
260  GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
261  threshold = newt;
262  }
263  }
264  // At this points we either have
265  // (predrop_ != null)
266  // Therefore, it is sufficient to check only threshold
267  if (A->GetFixedBlockSize() == 1 && threshold == STS::zero() && A->hasCrsGraph()) {
268  // Case 1: scalar problem, no dropping => just use matrix graph
269  RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
270  // Detect and record rows that correspond to Dirichlet boundary conditions
271  ArrayRCP<const bool > boundaryNodes;
272  boundaryNodes = MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold);
273  graph->SetBoundaryNodeMap(boundaryNodes);
274  numTotal = A->getNodeNumEntries();
275 
276  if (GetVerbLevel() & Statistics1) {
277  GO numLocalBoundaryNodes = 0;
278  GO numGlobalBoundaryNodes = 0;
279  for (LO i = 0; i < boundaryNodes.size(); ++i)
280  if (boundaryNodes[i])
281  numLocalBoundaryNodes++;
282  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
283  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
284  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
285  }
286 
287  Set(currentLevel, "DofsPerNode", 1);
288  Set(currentLevel, "Graph", graph);
289 
290  } else if ( (A->GetFixedBlockSize() == 1 && threshold != STS::zero()) ||
291  (A->GetFixedBlockSize() == 1 && threshold == STS::zero() && !A->hasCrsGraph())) {
292  // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
293  // graph's map information, e.g., whether index is local
294  // OR a matrix without a CrsGraph
295 
296  // allocate space for the local graph
297  ArrayRCP<LO> rows (A->getNodeNumRows()+1);
298  ArrayRCP<LO> columns(A->getNodeNumEntries());
299 
301  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
302  ArrayRCP<const bool> boundaryNodes = MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold);
303 
304  LO realnnz = 0;
305  rows[0] = 0;
306  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
307  size_t nnz = A->getNumEntriesInLocalRow(row);
308  ArrayView<const LO> indices;
309  ArrayView<const SC> vals;
310  A->getLocalRowView(row, indices, vals);
311 
312  if(classicalAlgo == defaultAlgo) {
313  //FIXME the current predrop function uses the following
314  //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
315  //FIXME but the threshold doesn't take into account the rows' diagonal entries
316  //FIXME For now, hardwiring the dropping in here
317 
318  LO rownnz = 0;
319  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
320  LO col = indices[colID];
321 
322  // we avoid a square root by using squared values
323  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
324  typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
325 
326  if (aij > aiiajj || row == col) {
327  columns[realnnz++] = col;
328  rownnz++;
329  } else
330  numDropped++;
331  }
332  rows[row+1] = realnnz;
333  }
334  else {
335  /* Cut Algorithm */
336  //CMS
337  using DropTol = Details::DropTol<real_type,LO>;
338  std::vector<DropTol> drop_vec;
339  drop_vec.reserve(nnz);
340  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
341  const real_type one = Teuchos::ScalarTraits<real_type>::one();
342  LO rownnz = 0;
343 
344  // find magnitudes
345  for (LO colID = 0; colID < (LO)nnz; colID++) {
346  LO col = indices[colID];
347  if (row == col) {
348  drop_vec.emplace_back( zero, one, colID, false);
349  continue;
350  }
351 
352  // Don't aggregate boundaries
353  if(boundaryNodes[colID]) continue;
354  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
355  typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
356  drop_vec.emplace_back(aij, aiiajj, colID, false);
357  }
358 
359  const size_t n = drop_vec.size();
360 
361  if (classicalAlgo == unscaled_cut) {
362  std::sort( drop_vec.begin(), drop_vec.end()
363  , [](DropTol const& a, DropTol const& b) {
364  return a.val > b.val;
365  });
366 
367  bool drop = false;
368  for (size_t i=1; i<n; ++i) {
369  if (!drop) {
370  auto const& x = drop_vec[i-1];
371  auto const& y = drop_vec[i];
372  auto a = x.val;
373  auto b = y.val;
374  if (a > realThreshold*b) {
375  drop = true;
376 #ifdef HAVE_MUELU_DEBUG
377  if (distanceLaplacianCutVerbose) {
378  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
379  }
380 #endif
381  }
382  }
383  drop_vec[i].drop = drop;
384  }
385  } else if (classicalAlgo == scaled_cut) {
386  std::sort( drop_vec.begin(), drop_vec.end()
387  , [](DropTol const& a, DropTol const& b) {
388  return a.val/a.diag > b.val/b.diag;
389  });
390  bool drop = false;
391  // printf("[%d] Scaled Cut: ",(int)row);
392  // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep");
393  for (size_t i=1; i<n; ++i) {
394  if (!drop) {
395  auto const& x = drop_vec[i-1];
396  auto const& y = drop_vec[i];
397  auto a = x.val/x.diag;
398  auto b = y.val/y.diag;
399  if (a > realThreshold*b) {
400  drop = true;
401 
402 #ifdef HAVE_MUELU_DEBUG
403  if (distanceLaplacianCutVerbose) {
404  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
405  }
406 #endif
407  }
408  // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep");
409 
410  }
411  drop_vec[i].drop = drop;
412  }
413  // printf("\n");
414  }
415  std::sort( drop_vec.begin(), drop_vec.end()
416  , [](DropTol const& a, DropTol const& b) {
417  return a.col < b.col;
418  }
419  );
420 
421  for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
422  LO col = indices[drop_vec[idxID].col];
423  // don't drop diagonal
424  if (row == col) {
425  columns[realnnz++] = col;
426  rownnz++;
427  continue;
428  }
429 
430  if (!drop_vec[idxID].drop) {
431  columns[realnnz++] = col;
432  rownnz++;
433  } else {
434  numDropped++;
435  }
436  }
437  // CMS
438  rows[row+1] = realnnz;
439 
440  }
441  }//end for row
442 
443  columns.resize(realnnz);
444  numTotal = A->getNodeNumEntries();
445  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A"));
446  graph->SetBoundaryNodeMap(boundaryNodes);
447  if (GetVerbLevel() & Statistics1) {
448  GO numLocalBoundaryNodes = 0;
449  GO numGlobalBoundaryNodes = 0;
450  for (LO i = 0; i < boundaryNodes.size(); ++i)
451  if (boundaryNodes[i])
452  numLocalBoundaryNodes++;
453  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
454  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
455  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
456  }
457  Set(currentLevel, "Graph", graph);
458  Set(currentLevel, "DofsPerNode", 1);
459 
460  } else if (A->GetFixedBlockSize() > 1 && threshold == STS::zero()) {
461  // Case 3: Multiple DOF/node problem without dropping
462  const RCP<const Map> rowMap = A->getRowMap();
463  const RCP<const Map> colMap = A->getColMap();
464 
465  graphType = "amalgamated";
466 
467  // build node row map (uniqueMap) and node column map (nonUniqueMap)
468  // the arrays rowTranslation and colTranslation contain the local node id
469  // given a local dof id. The data is calculated by the AmalgamationFactory and
470  // stored in the variable container "UnAmalgamationInfo"
471  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
472  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
473  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
474  Array<LO> colTranslation = *(amalInfo->getColTranslation());
475 
476  // get number of local nodes
477  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
478 
479  // Allocate space for the local graph
480  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
481  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
482 
483  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
484 
485  // Detect and record rows that correspond to Dirichlet boundary conditions
486  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
487  // TODO the array one bigger than the number of local rows, and the last entry can
488  // TODO hold the actual number of boundary nodes. Clever, huh?
489  ArrayRCP<const bool > pointBoundaryNodes;
490  pointBoundaryNodes = MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold);
491 
492  // extract striding information
493  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
494  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
495  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
496  if (A->IsView("stridedMaps") == true) {
497  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
498  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
499  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
500  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
501  blkId = strMap->getStridedBlockId();
502  if (blkId > -1)
503  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
504  }
505 
506  // loop over all local nodes
507  LO realnnz = 0;
508  rows[0] = 0;
509  Array<LO> indicesExtra;
510  for (LO row = 0; row < numRows; row++) {
511  ArrayView<const LO> indices;
512  indicesExtra.resize(0);
513 
514  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
515  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
516  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
517  // with local ids.
518  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
519  // node.
520  bool isBoundary = false;
521  isBoundary = true;
522  for (LO j = 0; j < blkPartSize; j++) {
523  if (!pointBoundaryNodes[row*blkPartSize+j]) {
524  isBoundary = false;
525  break;
526  }
527  }
528 
529  // Merge rows of A
530  // The array indicesExtra contains local column node ids for the current local node "row"
531  if (!isBoundary)
532  MergeRows(*A, row, indicesExtra, colTranslation);
533  else
534  indicesExtra.push_back(row);
535  indices = indicesExtra;
536  numTotal += indices.size();
537 
538  // add the local column node ids to the full columns array which
539  // contains the local column node ids for all local node rows
540  LO nnz = indices.size(), rownnz = 0;
541  for (LO colID = 0; colID < nnz; colID++) {
542  LO col = indices[colID];
543  columns[realnnz++] = col;
544  rownnz++;
545  }
546 
547  if (rownnz == 1) {
548  // If the only element remaining after filtering is diagonal, mark node as boundary
549  // FIXME: this should really be replaced by the following
550  // if (indices.size() == 1 && indices[0] == row)
551  // boundaryNodes[row] = true;
552  // We do not do it this way now because there is no framework for distinguishing isolated
553  // and boundary nodes in the aggregation algorithms
554  amalgBoundaryNodes[row] = true;
555  }
556  rows[row+1] = realnnz;
557  } //for (LO row = 0; row < numRows; row++)
558  columns.resize(realnnz);
559 
560  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
561  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
562 
563  if (GetVerbLevel() & Statistics1) {
564  GO numLocalBoundaryNodes = 0;
565  GO numGlobalBoundaryNodes = 0;
566 
567  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
568  if (amalgBoundaryNodes[i])
569  numLocalBoundaryNodes++;
570 
571  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
572  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
573  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
574  << " agglomerated Dirichlet nodes" << std::endl;
575  }
576 
577  Set(currentLevel, "Graph", graph);
578  Set(currentLevel, "DofsPerNode", blkSize); // full block size
579 
580  } else if (A->GetFixedBlockSize() > 1 && threshold != STS::zero()) {
581  // Case 4: Multiple DOF/node problem with dropping
582  const RCP<const Map> rowMap = A->getRowMap();
583  const RCP<const Map> colMap = A->getColMap();
584 
585  graphType = "amalgamated";
586 
587  // build node row map (uniqueMap) and node column map (nonUniqueMap)
588  // the arrays rowTranslation and colTranslation contain the local node id
589  // given a local dof id. The data is calculated by the AmalgamationFactory and
590  // stored in the variable container "UnAmalgamationInfo"
591  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
592  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
593  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
594  Array<LO> colTranslation = *(amalInfo->getColTranslation());
595 
596  // get number of local nodes
597  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
598 
599  // Allocate space for the local graph
600  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
601  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
602 
603  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
604 
605  // Detect and record rows that correspond to Dirichlet boundary conditions
606  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
607  // TODO the array one bigger than the number of local rows, and the last entry can
608  // TODO hold the actual number of boundary nodes. Clever, huh?
609  ArrayRCP<const bool > pointBoundaryNodes;
610  pointBoundaryNodes = MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold);
611 
612  // extract striding information
613  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
614  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
615  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
616  if (A->IsView("stridedMaps") == true) {
617  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
618  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
619  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
620  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
621  blkId = strMap->getStridedBlockId();
622  if (blkId > -1)
623  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
624  }
625 
626  // extract diagonal data for dropping strategy
628  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
629 
630  // loop over all local nodes
631  LO realnnz = 0;
632  rows[0] = 0;
633  Array<LO> indicesExtra;
634  for (LO row = 0; row < numRows; row++) {
635  ArrayView<const LO> indices;
636  indicesExtra.resize(0);
637 
638  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
639  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
640  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
641  // with local ids.
642  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
643  // node.
644  bool isBoundary = false;
645  isBoundary = true;
646  for (LO j = 0; j < blkPartSize; j++) {
647  if (!pointBoundaryNodes[row*blkPartSize+j]) {
648  isBoundary = false;
649  break;
650  }
651  }
652 
653  // Merge rows of A
654  // The array indicesExtra contains local column node ids for the current local node "row"
655  if (!isBoundary)
656  MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
657  else
658  indicesExtra.push_back(row);
659  indices = indicesExtra;
660  numTotal += indices.size();
661 
662  // add the local column node ids to the full columns array which
663  // contains the local column node ids for all local node rows
664  LO nnz = indices.size(), rownnz = 0;
665  for (LO colID = 0; colID < nnz; colID++) {
666  LO col = indices[colID];
667  columns[realnnz++] = col;
668  rownnz++;
669  }
670 
671  if (rownnz == 1) {
672  // If the only element remaining after filtering is diagonal, mark node as boundary
673  // FIXME: this should really be replaced by the following
674  // if (indices.size() == 1 && indices[0] == row)
675  // boundaryNodes[row] = true;
676  // We do not do it this way now because there is no framework for distinguishing isolated
677  // and boundary nodes in the aggregation algorithms
678  amalgBoundaryNodes[row] = true;
679  }
680  rows[row+1] = realnnz;
681  } //for (LO row = 0; row < numRows; row++)
682  columns.resize(realnnz);
683 
684  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
685  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
686 
687  if (GetVerbLevel() & Statistics1) {
688  GO numLocalBoundaryNodes = 0;
689  GO numGlobalBoundaryNodes = 0;
690 
691  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
692  if (amalgBoundaryNodes[i])
693  numLocalBoundaryNodes++;
694 
695  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
696  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
697  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
698  << " agglomerated Dirichlet nodes" << std::endl;
699  }
700 
701  Set(currentLevel, "Graph", graph);
702  Set(currentLevel, "DofsPerNode", blkSize); // full block size
703  }
704 
705  } else if (algo == "distance laplacian") {
706  LO blkSize = A->GetFixedBlockSize();
707  GO indexBase = A->getRowMap()->getIndexBase();
708 
709  // [*0*] : FIXME
710  // ap: somehow, if I move this line to [*1*], Belos throws an error
711  // I'm not sure what's going on. Do we always have to Get data, if we did
712  // DeclareInput for it?
713  RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
714 
715  // Detect and record rows that correspond to Dirichlet boundary conditions
716  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
717  // TODO the array one bigger than the number of local rows, and the last entry can
718  // TODO hold the actual number of boundary nodes. Clever, huh?
719  ArrayRCP<const bool > pointBoundaryNodes;
720  pointBoundaryNodes = MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold);
721 
722  if ( (blkSize == 1) && (threshold == STS::zero()) ) {
723  // Trivial case: scalar problem, no dropping. Can return original graph
724  RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
725  graph->SetBoundaryNodeMap(pointBoundaryNodes);
726  graphType="unamalgamated";
727  numTotal = A->getNodeNumEntries();
728 
729  if (GetVerbLevel() & Statistics1) {
730  GO numLocalBoundaryNodes = 0;
731  GO numGlobalBoundaryNodes = 0;
732  for (LO i = 0; i < pointBoundaryNodes.size(); ++i)
733  if (pointBoundaryNodes[i])
734  numLocalBoundaryNodes++;
735  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
736  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
737  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
738  }
739 
740  Set(currentLevel, "DofsPerNode", blkSize);
741  Set(currentLevel, "Graph", graph);
742 
743  } else {
744  // ap: We make quite a few assumptions here; general case may be a lot different,
745  // but much much harder to implement. We assume that:
746  // 1) all maps are standard maps, not strided maps
747  // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
748  // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
749  //
750  // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
751  // but as I totally don't understand that code, here is my solution
752 
753  // [*1*]: see [*0*]
754 
755  // Check that the number of local coordinates is consistent with the #rows in A
756  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getNodeNumElements()/blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
757  "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getNodeNumElements() << ") by modulo block size ("<< blkSize <<").");
758 
759  const RCP<const Map> colMap = A->getColMap();
760  RCP<const Map> uniqueMap, nonUniqueMap;
761  Array<LO> colTranslation;
762  if (blkSize == 1) {
763  uniqueMap = A->getRowMap();
764  nonUniqueMap = A->getColMap();
765  graphType="unamalgamated";
766 
767  } else {
768  uniqueMap = Coords->getMap();
769  TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
770  "Different index bases for matrix and coordinates");
771 
772  AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
773 
774  graphType = "amalgamated";
775  }
776  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
777 
778  RCP<RealValuedMultiVector> ghostedCoords;
779  RCP<Vector> ghostedLaplDiag;
780  Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
781  if (threshold != STS::zero()) {
782  // Get ghost coordinates
783  RCP<const Import> importer;
784  {
785  SubFactoryMonitor m1(*this, "Import construction", currentLevel);
786  if (blkSize == 1 && A->getCrsGraph()->getImporter() != Teuchos::null) {
787  GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
788  importer = A->getCrsGraph()->getImporter();
789  } else {
790  GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
791  importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
792  }
793  } //subtimer
794  ghostedCoords = Xpetra::MultiVectorFactory<real_type,LO,GO,NO>::Build(nonUniqueMap, Coords->getNumVectors());
795  {
796  SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
797  ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
798  } //subtimer
799 
800  // Construct Distance Laplacian diagonal
801  RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
802  ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
803  Array<LO> indicesExtra;
804  Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
805  if (threshold != STS::zero()) {
806  const size_t numVectors = ghostedCoords->getNumVectors();
807  coordData.reserve(numVectors);
808  for (size_t j = 0; j < numVectors; j++) {
809  Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
810  coordData.push_back(tmpData);
811  }
812  }
813  {
814  SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
815  for (LO row = 0; row < numRows; row++) {
816  ArrayView<const LO> indices;
817 
818  if (blkSize == 1) {
819  ArrayView<const SC> vals;
820  A->getLocalRowView(row, indices, vals);
821 
822  } else {
823  // Merge rows of A
824  indicesExtra.resize(0);
825  MergeRows(*A, row, indicesExtra, colTranslation);
826  indices = indicesExtra;
827  }
828 
829  LO nnz = indices.size();
830  for (LO colID = 0; colID < nnz; colID++) {
831  const LO col = indices[colID];
832 
833  if (row != col) {
834  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
835  }
836  }
837  }
838  } //subtimer
839  {
840  SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
841  ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
842  ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
843  ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
844  } //subtimer
845 
846  } else {
847  GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
848  }
849 
850  // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
851 
852  // allocate space for the local graph
853  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
854  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
855 
856  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
857 
858  LO realnnz = 0;
859  rows[0] = 0;
860  Array<LO> indicesExtra;
861  {
862  SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
863  Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
864  if (threshold != STS::zero()) {
865  const size_t numVectors = ghostedCoords->getNumVectors();
866  coordData.reserve(numVectors);
867  for (size_t j = 0; j < numVectors; j++) {
868  Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
869  coordData.push_back(tmpData);
870  }
871  }
872 
873  for (LO row = 0; row < numRows; row++) {
874  ArrayView<const LO> indices;
875  indicesExtra.resize(0);
876  bool isBoundary = false;
877 
878  if (blkSize == 1) {
879  ArrayView<const SC> vals;
880  A->getLocalRowView(row, indices, vals);
881  isBoundary = pointBoundaryNodes[row];
882  } else {
883  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
884  for (LO j = 0; j < blkSize; j++) {
885  if (!pointBoundaryNodes[row*blkSize+j]) {
886  isBoundary = false;
887  break;
888  }
889  }
890 
891  // Merge rows of A
892  if (!isBoundary)
893  MergeRows(*A, row, indicesExtra, colTranslation);
894  else
895  indicesExtra.push_back(row);
896  indices = indicesExtra;
897  }
898  numTotal += indices.size();
899 
900  LO nnz = indices.size(), rownnz = 0;
901 
902  if (threshold != STS::zero()) {
903  // default
904  if (distanceLaplacianAlgo == defaultAlgo) {
905  /* Standard Distance Laplacian */
906  for (LO colID = 0; colID < nnz; colID++) {
907 
908  LO col = indices[colID];
909 
910  if (row == col) {
911  columns[realnnz++] = col;
912  rownnz++;
913  continue;
914  }
915 
916  // We do not want the distance Laplacian aggregating boundary nodes
917  if(isBoundary) continue;
918 
919 
920  SC laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
921  real_type aiiajj = STS::magnitude(realThreshold*realThreshold * ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
922  real_type aij = STS::magnitude(laplVal*laplVal);
923 
924  if (aij > aiiajj) {
925  columns[realnnz++] = col;
926  rownnz++;
927  } else {
928  numDropped++;
929  }
930  }
931  } else {
932  /* Cut Algorithm */
933  using DropTol = Details::DropTol<real_type,LO>;
934  std::vector<DropTol> drop_vec;
935  drop_vec.reserve(nnz);
936  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
937  const real_type one = Teuchos::ScalarTraits<real_type>::one();
938 
939  // find magnitudes
940  for (LO colID = 0; colID < nnz; colID++) {
941 
942  LO col = indices[colID];
943 
944  if (row == col) {
945  drop_vec.emplace_back( zero, one, colID, false);
946  continue;
947  }
948  // We do not want the distance Laplacian aggregating boundary nodes
949  if(isBoundary) continue;
950 
951  SC laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
952  real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
953  real_type aij = STS::magnitude(laplVal*laplVal);
954 
955  drop_vec.emplace_back(aij, aiiajj, colID, false);
956  }
957 
958  const size_t n = drop_vec.size();
959 
960  if (distanceLaplacianAlgo == unscaled_cut) {
961 
962  std::sort( drop_vec.begin(), drop_vec.end()
963  , [](DropTol const& a, DropTol const& b) {
964  return a.val > b.val;
965  }
966  );
967 
968  bool drop = false;
969  for (size_t i=1; i<n; ++i) {
970  if (!drop) {
971  auto const& x = drop_vec[i-1];
972  auto const& y = drop_vec[i];
973  auto a = x.val;
974  auto b = y.val;
975  if (a > realThreshold*b) {
976  drop = true;
977 #ifdef HAVE_MUELU_DEBUG
978  if (distanceLaplacianCutVerbose) {
979  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
980  }
981 #endif
982  }
983  }
984  drop_vec[i].drop = drop;
985  }
986  }
987  else if (distanceLaplacianAlgo == scaled_cut) {
988 
989  std::sort( drop_vec.begin(), drop_vec.end()
990  , [](DropTol const& a, DropTol const& b) {
991  return a.val/a.diag > b.val/b.diag;
992  }
993  );
994 
995  bool drop = false;
996  for (size_t i=1; i<n; ++i) {
997  if (!drop) {
998  auto const& x = drop_vec[i-1];
999  auto const& y = drop_vec[i];
1000  auto a = x.val/x.diag;
1001  auto b = y.val/y.diag;
1002  if (a > realThreshold*b) {
1003  drop = true;
1004 #ifdef HAVE_MUELU_DEBUG
1005  if (distanceLaplacianCutVerbose) {
1006  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1007  }
1008 #endif
1009  }
1010  }
1011  drop_vec[i].drop = drop;
1012  }
1013  }
1014 
1015  std::sort( drop_vec.begin(), drop_vec.end()
1016  , [](DropTol const& a, DropTol const& b) {
1017  return a.col < b.col;
1018  }
1019  );
1020 
1021  for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
1022  LO col = indices[drop_vec[idxID].col];
1023 
1024 
1025  // don't drop diagonal
1026  if (row == col) {
1027  columns[realnnz++] = col;
1028  rownnz++;
1029  continue;
1030  }
1031 
1032  if (!drop_vec[idxID].drop) {
1033  columns[realnnz++] = col;
1034  rownnz++;
1035  } else {
1036  numDropped++;
1037  }
1038  }
1039  }
1040  } else {
1041  // Skip laplace calculation and threshold comparison for zero threshold
1042  for (LO colID = 0; colID < nnz; colID++) {
1043  LO col = indices[colID];
1044  columns[realnnz++] = col;
1045  rownnz++;
1046  }
1047  }
1048 
1049  if ( rownnz == 1) {
1050  // If the only element remaining after filtering is diagonal, mark node as boundary
1051  // FIXME: this should really be replaced by the following
1052  // if (indices.size() == 1 && indices[0] == row)
1053  // boundaryNodes[row] = true;
1054  // We do not do it this way now because there is no framework for distinguishing isolated
1055  // and boundary nodes in the aggregation algorithms
1056  amalgBoundaryNodes[row] = true;
1057  }
1058  rows[row+1] = realnnz;
1059  } //for (LO row = 0; row < numRows; row++)
1060 
1061  } //subtimer
1062  columns.resize(realnnz);
1063 
1064  RCP<GraphBase> graph;
1065  {
1066  SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1067  graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1068  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1069  } //subtimer
1070 
1071  if (GetVerbLevel() & Statistics1) {
1072  GO numLocalBoundaryNodes = 0;
1073  GO numGlobalBoundaryNodes = 0;
1074 
1075  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1076  if (amalgBoundaryNodes[i])
1077  numLocalBoundaryNodes++;
1078 
1079  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1080  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1081  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1082  << " using threshold " << dirichletThreshold << std::endl;
1083  }
1084 
1085  Set(currentLevel, "Graph", graph);
1086  Set(currentLevel, "DofsPerNode", blkSize);
1087  }
1088  }
1089 
1090  if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1091  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1092  GO numGlobalTotal, numGlobalDropped;
1093  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1094  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1095  GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1096  if (numGlobalTotal != 0)
1097  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1098  GetOStream(Statistics1) << std::endl;
1099  }
1100 
1101  } else {
1102  //what Tobias has implemented
1103 
1104  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1105  //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1106  GetOStream(Runtime0) << "algorithm = \"" << "failsafe" << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1107  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1108 
1109  RCP<const Map> rowMap = A->getRowMap();
1110  RCP<const Map> colMap = A->getColMap();
1111 
1112  LO blockdim = 1; // block dim for fixed size blocks
1113  GO indexBase = rowMap->getIndexBase(); // index base of maps
1114  GO offset = 0;
1115 
1116  // 1) check for blocking/striding information
1117  if(A->IsView("stridedMaps") &&
1118  Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1119  Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1120  RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1121  TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1122  blockdim = strMap->getFixedBlockSize();
1123  offset = strMap->getOffset();
1124  oldView = A->SwitchToView(oldView);
1125  GetOStream(Statistics1) << "CoalesceDropFactory::Build():" << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1126  } else GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1127 
1128  // 2) get row map for amalgamated matrix (graph of A)
1129  // with same distribution over all procs as row map of A
1130  RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1131  GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getNodeNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1132 
1133  // 3) create graph of amalgamated matrix
1134  RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getNodeMaxNumRowEntries()*blockdim);
1135 
1136  LO numRows = A->getRowMap()->getNodeNumElements();
1137  LO numNodes = nodeMap->getNodeNumElements();
1138  const ArrayRCP<bool> amalgBoundaryNodes(numNodes, false);
1139  const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1140  bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1141 
1142  // 4) do amalgamation. generate graph of amalgamated matrix
1143  // Note, this code is much more inefficient than the leightwight implementation
1144  // Most of the work has already been done in the AmalgamationFactory
1145  for(LO row=0; row<numRows; row++) {
1146  // get global DOF id
1147  GO grid = rowMap->getGlobalElement(row);
1148 
1149  // reinitialize boolean helper variable
1150  bIsDiagonalEntry = false;
1151 
1152  // translate grid to nodeid
1153  GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1154 
1155  size_t nnz = A->getNumEntriesInLocalRow(row);
1156  Teuchos::ArrayView<const LO> indices;
1157  Teuchos::ArrayView<const SC> vals;
1158  A->getLocalRowView(row, indices, vals);
1159 
1160  RCP<std::vector<GO> > cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1161  LO realnnz = 0;
1162  for(LO col=0; col<Teuchos::as<LO>(nnz); col++) {
1163  GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1164 
1165  if(vals[col]!=STS::zero()) {
1166  GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1167  cnodeIds->push_back(cnodeId);
1168  realnnz++; // increment number of nnz in matrix row
1169  if (grid == gcid) bIsDiagonalEntry = true;
1170  }
1171  }
1172 
1173  if(realnnz == 1 && bIsDiagonalEntry == true) {
1174  LO lNodeId = nodeMap->getLocalElement(nodeId);
1175  numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1176  if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1177  amalgBoundaryNodes[lNodeId] = true;
1178  }
1179 
1180  Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp( cnodeIds );
1181 
1182  if(arr_cnodeIds.size() > 0 )
1183  crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1184  }
1185  // fill matrix graph
1186  crsGraph->fillComplete(nodeMap,nodeMap);
1187 
1188  // 5) create MueLu Graph object
1189  RCP<GraphBase> graph = rcp(new Graph(crsGraph, "amalgamated graph of A"));
1190 
1191  // Detect and record rows that correspond to Dirichlet boundary conditions
1192  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1193 
1194  if (GetVerbLevel() & Statistics1) {
1195  GO numLocalBoundaryNodes = 0;
1196  GO numGlobalBoundaryNodes = 0;
1197  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1198  if (amalgBoundaryNodes[i])
1199  numLocalBoundaryNodes++;
1200  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1201  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1202  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1203  }
1204 
1205  // 6) store results in Level
1206  //graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1207  Set(currentLevel, "DofsPerNode", blockdim);
1208  Set(currentLevel, "Graph", graph);
1209 
1210  } //if (doExperimentalWrap) ... else ...
1211 
1212  } //Build
1213 
1214  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1215  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1216  typedef typename ArrayView<const LO>::size_type size_type;
1217 
1218  // extract striding information
1219  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1220  if (A.IsView("stridedMaps") == true) {
1221  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1222  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1223  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1224  if (strMap->getStridedBlockId() > -1)
1225  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1226  }
1227 
1228  // count nonzero entries in all dof rows associated with node row
1229  size_t nnz = 0, pos = 0;
1230  for (LO j = 0; j < blkSize; j++)
1231  nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1232 
1233  if (nnz == 0) {
1234  cols.resize(0);
1235  return;
1236  }
1237 
1238  cols.resize(nnz);
1239 
1240  // loop over all local dof rows associated with local node "row"
1241  ArrayView<const LO> inds;
1242  ArrayView<const SC> vals;
1243  for (LO j = 0; j < blkSize; j++) {
1244  A.getLocalRowView(row*blkSize+j, inds, vals);
1245  size_type numIndices = inds.size();
1246 
1247  if (numIndices == 0) // skip empty dof rows
1248  continue;
1249 
1250  // cols: stores all local node ids for current local node id "row"
1251  cols[pos++] = translation[inds[0]];
1252  for (size_type k = 1; k < numIndices; k++) {
1253  LO nodeID = translation[inds[k]];
1254  // Here we try to speed up the process by reducing the size of an array
1255  // to sort. This works if the column nonzeros belonging to the same
1256  // node are stored consequently.
1257  if (nodeID != cols[pos-1])
1258  cols[pos++] = nodeID;
1259  }
1260  }
1261  cols.resize(pos);
1262  nnz = pos;
1263 
1264  // Sort and remove duplicates
1265  std::sort(cols.begin(), cols.end());
1266  pos = 0;
1267  for (size_t j = 1; j < nnz; j++)
1268  if (cols[j] != cols[pos])
1269  cols[++pos] = cols[j];
1270  cols.resize(pos+1);
1271  }
1272 
1273  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1274  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1275  typedef typename ArrayView<const LO>::size_type size_type;
1276  typedef Teuchos::ScalarTraits<SC> STS;
1277 
1278  // extract striding information
1279  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1280  if (A.IsView("stridedMaps") == true) {
1281  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1282  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1283  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1284  if (strMap->getStridedBlockId() > -1)
1285  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1286  }
1287 
1288  // count nonzero entries in all dof rows associated with node row
1289  size_t nnz = 0, pos = 0;
1290  for (LO j = 0; j < blkSize; j++)
1291  nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1292 
1293  if (nnz == 0) {
1294  cols.resize(0);
1295  return;
1296  }
1297 
1298  cols.resize(nnz);
1299 
1300  // loop over all local dof rows associated with local node "row"
1301  ArrayView<const LO> inds;
1302  ArrayView<const SC> vals;
1303  for (LO j = 0; j < blkSize; j++) {
1304  A.getLocalRowView(row*blkSize+j, inds, vals);
1305  size_type numIndices = inds.size();
1306 
1307  if (numIndices == 0) // skip empty dof rows
1308  continue;
1309 
1310  // cols: stores all local node ids for current local node id "row"
1311  LO prevNodeID = -1;
1312  for (size_type k = 0; k < numIndices; k++) {
1313  LO dofID = inds[k];
1314  LO nodeID = translation[inds[k]];
1315 
1316  // we avoid a square root by using squared values
1317  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold*ghostedDiagVals[dofID]*ghostedDiagVals[row*blkSize+j]); // eps^2 * |a_ii| * |a_jj|
1318  typename STS::magnitudeType aij = STS::magnitude(vals[k]*vals[k]);
1319 
1320  // check dropping criterion
1321  if (aij > aiiajj || (row*blkSize+j == dofID)) {
1322  // accept entry in graph
1323 
1324  // Here we try to speed up the process by reducing the size of an array
1325  // to sort. This works if the column nonzeros belonging to the same
1326  // node are stored consequently.
1327  if (nodeID != prevNodeID) {
1328  cols[pos++] = nodeID;
1329  prevNodeID = nodeID;
1330  }
1331  }
1332  }
1333  }
1334  cols.resize(pos);
1335  nnz = pos;
1336 
1337  // Sort and remove duplicates
1338  std::sort(cols.begin(), cols.end());
1339  pos = 0;
1340  for (size_t j = 1; j < nnz; j++)
1341  if (cols[j] != cols[pos])
1342  cols[++pos] = cols[j];
1343  cols.resize(pos+1);
1344 
1345  return;
1346  }
1347 } //namespace MueLu
1348 
1349 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
translate global (row/column) id to global amalgamation block id
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
void DeclareInput(Level &currentLevel) const
Input.
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
Exception indicating invalid cast attempted.
Exception throws to report incompatible objects (like maps).
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
MueLu representation of a compressed row storage graph.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::magnitude(0.), const bool count_twos_as_dirichlet=false)
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Warnings1
Additional warnings.
@ Parameters0
Print class parameters.
DropTol(DropTol &&)=default
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
DropTol & operator=(DropTol const &)=default
DropTol & operator=(DropTol &&)=default
DropTol(DropTol const &)=default