MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
47#define MUELU_COALESCEDROPFACTORY_DEF_HPP
48
49#include <Xpetra_CrsGraphFactory.hpp>
50#include <Xpetra_CrsGraph.hpp>
51#include <Xpetra_ImportFactory.hpp>
52#include <Xpetra_ExportFactory.hpp>
53#include <Xpetra_MapFactory.hpp>
54#include <Xpetra_Map.hpp>
55#include <Xpetra_Matrix.hpp>
56#include <Xpetra_MultiVectorFactory.hpp>
57#include <Xpetra_MultiVector.hpp>
58#include <Xpetra_StridedMap.hpp>
59#include <Xpetra_VectorFactory.hpp>
60#include <Xpetra_Vector.hpp>
61
62#include <Xpetra_IO.hpp>
63
65
66#include "MueLu_AmalgamationFactory.hpp"
67#include "MueLu_AmalgamationInfo.hpp"
68#include "MueLu_Exceptions.hpp"
69#include "MueLu_GraphBase.hpp"
70#include "MueLu_Graph.hpp"
71#include "MueLu_Level.hpp"
72#include "MueLu_LWGraph.hpp"
73#include "MueLu_MasterList.hpp"
74#include "MueLu_Monitor.hpp"
75#include "MueLu_PreDropFunctionConstVal.hpp"
76#include "MueLu_Utilities.hpp"
77
78#ifdef HAVE_XPETRA_TPETRA
79#include "Tpetra_CrsGraphTransposer.hpp"
80#endif
81
82#include <algorithm>
83#include <cstdlib>
84#include <string>
85
86// If defined, read environment variables.
87// Should be removed once we are confident that this works.
88//#define DJS_READ_ENV_VARIABLES
89
90
91namespace MueLu {
92
93 namespace Details {
94 template<class real_type, class LO>
95 struct DropTol {
96
97 DropTol() = default;
98 DropTol(DropTol const&) = default;
99 DropTol(DropTol &&) = default;
100
101 DropTol& operator=(DropTol const&) = default;
102 DropTol& operator=(DropTol &&) = default;
103
104 DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
105 : val{val_}, diag{diag_}, col{col_}, drop{drop_}
106 {}
107
108 real_type val {Teuchos::ScalarTraits<real_type>::zero()};
109 real_type diag {Teuchos::ScalarTraits<real_type>::zero()};
110 LO col {Teuchos::OrdinalTraits<LO>::invalid()};
111 bool drop {true};
112
113 // CMS: Auxillary information for debugging info
114 // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
115 };
116 }
117
118
119 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
121 RCP<ParameterList> validParamList = rcp(new ParameterList());
122
123#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
124 SET_VALID_ENTRY("aggregation: drop tol");
125 SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
126 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
127 SET_VALID_ENTRY("aggregation: greedy Dirichlet");
128 SET_VALID_ENTRY("aggregation: row sum drop tol");
129 SET_VALID_ENTRY("aggregation: drop scheme");
130 SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
131 SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
132 SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
133
134 {
135 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
136 // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
137 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple<std::string>("signed classical sa","classical", "distance laplacian","signed classical","block diagonal","block diagonal classical","block diagonal distance laplacian","block diagonal signed classical","block diagonal colored signed classical"), "aggregation: drop scheme")));
138
139 }
140 SET_VALID_ENTRY("aggregation: distance laplacian algo");
141 SET_VALID_ENTRY("aggregation: classical algo");
142 SET_VALID_ENTRY("aggregation: coloring: localize color graph");
143#undef SET_VALID_ENTRY
144 validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access");
145
146 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
147 validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
148 validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for Coordinates");
149 validParamList->set< RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
150
151 return validParamList;
152 }
153
154 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
156
157 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
159 Input(currentLevel, "A");
160 Input(currentLevel, "UnAmalgamationInfo");
161
162 const ParameterList& pL = GetParameterList();
163 if (pL.get<bool>("lightweight wrap") == true) {
164 std::string algo = pL.get<std::string>("aggregation: drop scheme");
165 if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
166 Input(currentLevel, "Coordinates");
167 }
168 if(algo == "signed classical sa")
169 ;
170 else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
171 Input(currentLevel, "BlockNumber");
172 }
173 }
174
175 }
176
177 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
179
180 FactoryMonitor m(*this, "Build", currentLevel);
181
182 typedef Teuchos::ScalarTraits<SC> STS;
183 typedef typename STS::magnitudeType real_type;
184 typedef Xpetra::MultiVector<real_type,LO,GO,NO> RealValuedMultiVector;
185 typedef Xpetra::MultiVectorFactory<real_type,LO,GO,NO> RealValuedMultiVectorFactory;
186
187 if (predrop_ != Teuchos::null)
188 GetOStream(Parameters0) << predrop_->description();
189
190 RCP<Matrix> realA = Get< RCP<Matrix> >(currentLevel, "A");
191 RCP<AmalgamationInfo> amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
192 const ParameterList & pL = GetParameterList();
193 bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
194
195 GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
196 std::string algo = pL.get<std::string>("aggregation: drop scheme");
197 const bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
198
199 RCP<RealValuedMultiVector> Coords;
200 RCP<Matrix> A;
201
202 bool use_block_algorithm=false;
203 LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
204 bool useSignedClassicalRS = false;
205 bool useSignedClassicalSA = false;
206 bool generateColoringGraph = false;
207
208 // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
209 // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case
210 typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
211
212
213 RCP<LocalOrdinalVector> ghostedBlockNumber;
214 ArrayRCP<const LO> g_block_id;
215
216 if(algo == "distance laplacian" ) {
217 // Grab the coordinates for distance laplacian
218 Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
219 A = realA;
220 }
221 else if(algo == "signed classical sa") {
222 useSignedClassicalSA = true;
223 algo = "classical";
224 A = realA;
225 }
226 else if(algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
227 useSignedClassicalRS = true;
228 // if(realA->GetFixedBlockSize() > 1) {
229 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector> >(currentLevel, "BlockNumber");
230 // Ghost the column block numbers if we need to
231 RCP<const Import> importer = realA->getCrsGraph()->getImporter();
232 if(!importer.is_null()) {
233 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
234 ghostedBlockNumber= Xpetra::VectorFactory<LO,LO,GO,NO>::Build(importer->getTargetMap());
235 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
236 }
237 else {
238 ghostedBlockNumber = BlockNumber;
239 }
240 g_block_id = ghostedBlockNumber->getData(0);
241 // }
242 if(algo == "block diagonal colored signed classical")
243 generateColoringGraph=true;
244 algo = "classical";
245 A = realA;
246
247 }
248 else if(algo == "block diagonal") {
249 // Handle the "block diagonal" filtering and then leave
250 BlockDiagonalize(currentLevel,realA,false);
251 return;
252 }
253 else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
254 // Handle the "block diagonal" filtering, and then continue onward
255 use_block_algorithm = true;
256 RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel,realA,true);
257 if(algo == "block diagonal distance laplacian") {
258 // We now need to expand the coordinates by the interleaved blocksize
259 RCP<RealValuedMultiVector> OldCoords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
260 if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
261 LO dim = (LO) OldCoords->getNumVectors();
262 Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(),dim);
263 for(LO k=0; k<dim; k++){
264 ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
265 ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
266 for(LO i=0; i <(LO)OldCoords->getLocalLength(); i++) {
267 LO new_base = i*dim;
268 for(LO j=0; j<interleaved_blocksize; j++)
269 new_vec[new_base + j] = old_vec[i];
270 }
271 }
272 }
273 else {
274 Coords = OldCoords;
275 }
276 algo = "distance laplacian";
277 }
278 else if(algo == "block diagonal classical") {
279 algo = "classical";
280 }
281 // All cases
282 A = filteredMatrix;
283 rowSumTol = -1.0;
284 }
285 else {
286 A = realA;
287 }
288
289 // Distance Laplacian weights
290 Array<double> dlap_weights = pL.get<Array<double> >("aggregation: distance laplacian directional weights");
291 enum {NO_WEIGHTS=0, SINGLE_WEIGHTS, BLOCK_WEIGHTS};
292 int use_dlap_weights = NO_WEIGHTS;
293 if(algo == "distance laplacian") {
294 LO dim = (LO) Coords->getNumVectors();
295 // If anything isn't 1.0 we need to turn on the weighting
296 bool non_unity = false;
297 for (LO i=0; !non_unity && i<(LO)dlap_weights.size(); i++) {
298 if(dlap_weights[i] != 1.0) {
299 non_unity = true;
300 }
301 }
302 if(non_unity) {
303 LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
304 if((LO)dlap_weights.size() == dim)
305 use_dlap_weights = SINGLE_WEIGHTS;
306 else if((LO)dlap_weights.size() == blocksize * dim)
307 use_dlap_weights = BLOCK_WEIGHTS;
308 else {
309 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
310 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
311 }
313 GetOStream(Statistics1) << "Using distance laplacian weights: "<<dlap_weights<<std::endl;
314 }
315 }
316
317 // decide wether to use the fast-track code path for standard maps or the somewhat slower
318 // code path for non-standard maps
319 /*bool bNonStandardMaps = false;
320 if (A->IsView("stridedMaps") == true) {
321 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
322 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
323 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
324 if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
325 bNonStandardMaps = true;
326 }*/
327
328 if (doExperimentalWrap) {
329 TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
330 TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
331
332 SC threshold;
333 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
334 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
335 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0,currentLevel.GetLevelID());
336 else
337 threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
338
339
340 std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
341 std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
342 real_type realThreshold = STS::magnitude(threshold);// CMS: Rename this to "magnitude threshold" sometime
343
345 // Remove this bit once we are confident that cut-based dropping works.
346#ifdef HAVE_MUELU_DEBUG
347 int distanceLaplacianCutVerbose = 0;
348#endif
349#ifdef DJS_READ_ENV_VARIABLES
350 if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
351 distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
352 }
353
354 if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
355 auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
356 realThreshold = 1e-4*tmp;
357 }
358
359# ifdef HAVE_MUELU_DEBUG
360 if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
361 distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
362 }
363# endif
364#endif
366
367 enum decisionAlgoType {defaultAlgo, unscaled_cut, scaled_cut, scaled_cut_symmetric};
368
369 decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
370 decisionAlgoType classicalAlgo = defaultAlgo;
371 if (algo == "distance laplacian") {
372 if (distanceLaplacianAlgoStr == "default")
373 distanceLaplacianAlgo = defaultAlgo;
374 else if (distanceLaplacianAlgoStr == "unscaled cut")
375 distanceLaplacianAlgo = unscaled_cut;
376 else if (distanceLaplacianAlgoStr == "scaled cut")
377 distanceLaplacianAlgo = scaled_cut;
378 else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
379 distanceLaplacianAlgo = scaled_cut_symmetric;
380 else
381 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
382 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize()<< std::endl;
383 } else if (algo == "classical") {
384 if (classicalAlgoStr == "default")
385 classicalAlgo = defaultAlgo;
386 else if (classicalAlgoStr == "unscaled cut")
387 classicalAlgo = unscaled_cut;
388 else if (classicalAlgoStr == "scaled cut")
389 classicalAlgo = scaled_cut;
390 else
391 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
392 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
393
394 } else
395 GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
396 Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
397
398 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
399
400
401 // NOTE: We don't support signed classical RS or SA with cut drop at present
402 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
403 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
404
405 GO numDropped = 0, numTotal = 0;
406 std::string graphType = "unamalgamated"; //for description purposes only
407
408
409 /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
410 BlockSize is the number of storage blocks that must kept together during the amalgamation process.
411
412 Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
413
414 numPDEs = BlockSize * storageblocksize.
415
416 If numPDEs==1
417 Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1
418 No other values makes sense.
419
420 If numPDEs>1
421 If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs.
422 If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n.
423 Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested.
424 */
425 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,Exceptions::RuntimeError,"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
426 const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
427
428
429 /************************** RS or SA-style Classical Dropping (and variants) **************************/
430 if (algo == "classical") {
431 if (predrop_ == null) {
432 // ap: this is a hack: had to declare predrop_ as mutable
433 predrop_ = rcp(new PreDropFunctionConstVal(threshold));
434 }
435
436 if (predrop_ != null) {
437 RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
438 TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
439 "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
440 // If a user provided a predrop function, it overwrites the XML threshold parameter
441 SC newt = predropConstVal->GetThreshold();
442 if (newt != threshold) {
443 GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
444 threshold = newt;
445 }
446 }
447 // At this points we either have
448 // (predrop_ != null)
449 // Therefore, it is sufficient to check only threshold
450 if ( BlockSize==1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
451 // Case 1: scalar problem, no dropping => just use matrix graph
452 RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
453 // Detect and record rows that correspond to Dirichlet boundary conditions
454 ArrayRCP<bool > boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
455 if (rowSumTol > 0.)
456 Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
457
458 graph->SetBoundaryNodeMap(boundaryNodes);
459 numTotal = A->getLocalNumEntries();
460
461 if (GetVerbLevel() & Statistics1) {
462 GO numLocalBoundaryNodes = 0;
463 GO numGlobalBoundaryNodes = 0;
464 for (LO i = 0; i < boundaryNodes.size(); ++i)
465 if (boundaryNodes[i])
466 numLocalBoundaryNodes++;
467 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
468 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
469 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
470 }
471
472 Set(currentLevel, "DofsPerNode", 1);
473 Set(currentLevel, "Graph", graph);
474
475 } else if ( (BlockSize == 1 && threshold != STS::zero()) ||
476 (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
477 (BlockSize == 1 && useSignedClassicalRS) ||
478 (BlockSize == 1 && useSignedClassicalSA) ) {
479 // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
480 // graph's map information, e.g., whether index is local
481 // OR a matrix without a CrsGraph
482
483 // allocate space for the local graph
484 ArrayRCP<LO> rows (A->getLocalNumRows()+1);
485 ArrayRCP<LO> columns(A->getLocalNumEntries());
486
487 using MT = typename STS::magnitudeType;
488 RCP<Vector> ghostedDiag;
489 ArrayRCP<const SC> ghostedDiagVals;
490 ArrayRCP<const MT> negMaxOffDiagonal;
491 // RS style needs the max negative off-diagonal, SA style needs the diagonal
492 if(useSignedClassicalRS) {
493 if(ghostedBlockNumber.is_null()) {
496 GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
497 }
498 else {
499 negMaxOffDiagonal = MueLu::Utilities<SC,LO,GO,NO>::GetMatrixMaxMinusOffDiagonal(*A,*ghostedBlockNumber);
501 GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
502 }
503 }
504 else {
506 ghostedDiagVals = ghostedDiag->getData(0);
507 }
508 ArrayRCP<bool> boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
509 if (rowSumTol > 0.) {
510 if(ghostedBlockNumber.is_null()) {
512 GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
513 Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
514 } else {
516 GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
517 Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
518 }
519 }
520
521 LO realnnz = 0;
522 rows[0] = 0;
523 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
524 size_t nnz = A->getNumEntriesInLocalRow(row);
525 bool rowIsDirichlet = boundaryNodes[row];
526 ArrayView<const LO> indices;
527 ArrayView<const SC> vals;
528 A->getLocalRowView(row, indices, vals);
529
530 if(classicalAlgo == defaultAlgo) {
531 //FIXME the current predrop function uses the following
532 //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
533 //FIXME but the threshold doesn't take into account the rows' diagonal entries
534 //FIXME For now, hardwiring the dropping in here
535
536 LO rownnz = 0;
537 if(useSignedClassicalRS) {
538 // Signed classical RS style
539 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
540 LO col = indices[colID];
541 MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
542 MT neg_aij = - STS::real(vals[colID]);
543 /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
544 g_block_id.is_null() ? -1 : g_block_id[row],
545 g_block_id.is_null() ? -1 : g_block_id[col],
546 neg_aij, max_neg_aik);*/
547 if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
548 columns[realnnz++] = col;
549 rownnz++;
550 } else
551 numDropped++;
552 }
553 rows[row+1] = realnnz;
554 }
555 else if(useSignedClassicalSA) {
556 // Signed classical SA style
557 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
558 LO col = indices[colID];
559
560 bool is_nonpositive = STS::real(vals[colID]) <= 0;
561 MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
562 MT aij = is_nonpositive ? STS::magnitude(vals[colID]*vals[colID]) : (-STS::magnitude(vals[colID]*vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
563 /*
564 if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID],
565 vals[colID],aij, aiiajj);
566 */
567
568 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
569 columns[realnnz++] = col;
570 rownnz++;
571 } else
572 numDropped++;
573 }
574 rows[row+1] = realnnz;
575 }
576 else {
577 // Standard abs classical
578 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
579 LO col = indices[colID];
580 MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
581 MT aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
582
583 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
584 columns[realnnz++] = col;
585 rownnz++;
586 } else
587 numDropped++;
588 }
589 rows[row+1] = realnnz;
590 }
591 }
592 else {
593 /* Cut Algorithm */
594 //CMS
595 using DropTol = Details::DropTol<real_type,LO>;
596 std::vector<DropTol> drop_vec;
597 drop_vec.reserve(nnz);
598 const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
599 const real_type one = Teuchos::ScalarTraits<real_type>::one();
600 LO rownnz = 0;
601 // NOTE: This probably needs to be fixed for rowsum
602
603 // find magnitudes
604 for (LO colID = 0; colID < (LO)nnz; colID++) {
605 LO col = indices[colID];
606 if (row == col) {
607 drop_vec.emplace_back( zero, one, colID, false);
608 continue;
609 }
610
611 // Don't aggregate boundaries
612 if(boundaryNodes[colID]) continue;
613 typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
614 typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
615 drop_vec.emplace_back(aij, aiiajj, colID, false);
616 }
617
618 const size_t n = drop_vec.size();
619
620 if (classicalAlgo == unscaled_cut) {
621 std::sort( drop_vec.begin(), drop_vec.end()
622 , [](DropTol const& a, DropTol const& b) {
623 return a.val > b.val;
624 });
625
626 bool drop = false;
627 for (size_t i=1; i<n; ++i) {
628 if (!drop) {
629 auto const& x = drop_vec[i-1];
630 auto const& y = drop_vec[i];
631 auto a = x.val;
632 auto b = y.val;
633 if (a > realThreshold*b) {
634 drop = true;
635#ifdef HAVE_MUELU_DEBUG
636 if (distanceLaplacianCutVerbose) {
637 std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
638 }
639#endif
640 }
641 }
642 drop_vec[i].drop = drop;
643 }
644 } else if (classicalAlgo == scaled_cut) {
645 std::sort( drop_vec.begin(), drop_vec.end()
646 , [](DropTol const& a, DropTol const& b) {
647 return a.val/a.diag > b.val/b.diag;
648 });
649 bool drop = false;
650 // printf("[%d] Scaled Cut: ",(int)row);
651 // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep");
652 for (size_t i=1; i<n; ++i) {
653 if (!drop) {
654 auto const& x = drop_vec[i-1];
655 auto const& y = drop_vec[i];
656 auto a = x.val/x.diag;
657 auto b = y.val/y.diag;
658 if (a > realThreshold*b) {
659 drop = true;
660
661#ifdef HAVE_MUELU_DEBUG
662 if (distanceLaplacianCutVerbose) {
663 std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
664 }
665#endif
666 }
667 // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep");
668
669 }
670 drop_vec[i].drop = drop;
671 }
672 // printf("\n");
673 }
674 std::sort( drop_vec.begin(), drop_vec.end()
675 , [](DropTol const& a, DropTol const& b) {
676 return a.col < b.col;
677 }
678 );
679
680 for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
681 LO col = indices[drop_vec[idxID].col];
682 // don't drop diagonal
683 if (row == col) {
684 columns[realnnz++] = col;
685 rownnz++;
686 continue;
687 }
688
689 if (!drop_vec[idxID].drop) {
690 columns[realnnz++] = col;
691 rownnz++;
692 } else {
693 numDropped++;
694 }
695 }
696 // CMS
697 rows[row+1] = realnnz;
698
699 }
700 }//end for row
701
702 columns.resize(realnnz);
703 numTotal = A->getLocalNumEntries();
704
705 if (aggregationMayCreateDirichlet) {
706 // If the only element remaining after filtering is diagonal, mark node as boundary
707 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
708 if (rows[row+1]- rows[row] <= 1)
709 boundaryNodes[row] = true;
710 }
711 }
712
713 RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A"));
714 graph->SetBoundaryNodeMap(boundaryNodes);
715 if (GetVerbLevel() & Statistics1) {
716 GO numLocalBoundaryNodes = 0;
717 GO numGlobalBoundaryNodes = 0;
718 for (LO i = 0; i < boundaryNodes.size(); ++i)
719 if (boundaryNodes[i])
720 numLocalBoundaryNodes++;
721 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
722 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
723 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
724 }
725 Set(currentLevel, "Graph", graph);
726 Set(currentLevel, "DofsPerNode", 1);
727
728 // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
729 if(generateColoringGraph) {
730 RCP<GraphBase> colorGraph;
731 RCP<const Import> importer = A->getCrsGraph()->getImporter();
732 BlockDiagonalizeGraph(graph,ghostedBlockNumber,colorGraph,importer);
733 Set(currentLevel, "Coloring Graph",colorGraph);
734 // #define CMS_DUMP
735#ifdef CMS_DUMP
736 {
737 Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("m_regular_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
738 Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("m_color_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
739 // int rank = graph->GetDomainMap()->getComm()->getRank();
740 // {
741 // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
742 // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
743 // colorGraph->print(*fancy,Debug);
744 // }
745 // {
746 // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
747 // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
748 // graph->print(*fancy,Debug);
749 // }
750
751 }
752#endif
753 }//end generateColoringGraph
754 } else if (BlockSize > 1 && threshold == STS::zero()) {
755 // Case 3: Multiple DOF/node problem without dropping
756 const RCP<const Map> rowMap = A->getRowMap();
757 const RCP<const Map> colMap = A->getColMap();
758
759 graphType = "amalgamated";
760
761 // build node row map (uniqueMap) and node column map (nonUniqueMap)
762 // the arrays rowTranslation and colTranslation contain the local node id
763 // given a local dof id. The data is calculated by the AmalgamationFactory and
764 // stored in the variable container "UnAmalgamationInfo"
765 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
766 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
767 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
768 Array<LO> colTranslation = *(amalInfo->getColTranslation());
769
770 // get number of local nodes
771 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
772
773 // Allocate space for the local graph
774 ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
775 ArrayRCP<LO> columns = ArrayRCP<LO>(A->getLocalNumEntries());
776
777 const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
778
779 // Detect and record rows that correspond to Dirichlet boundary conditions
780 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
781 // TODO the array one bigger than the number of local rows, and the last entry can
782 // TODO hold the actual number of boundary nodes. Clever, huh?
783 ArrayRCP<bool > pointBoundaryNodes;
784 pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
785 if (rowSumTol > 0.)
786 Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
787
788
789 // extract striding information
790 LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
791 LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
792 LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
793 if (A->IsView("stridedMaps") == true) {
794 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
795 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
796 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
797 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
798 blkId = strMap->getStridedBlockId();
799 if (blkId > -1)
800 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
801 }
802
803 // loop over all local nodes
804 LO realnnz = 0;
805 rows[0] = 0;
806 Array<LO> indicesExtra;
807 for (LO row = 0; row < numRows; row++) {
808 ArrayView<const LO> indices;
809 indicesExtra.resize(0);
810
811 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
812 // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
813 // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
814 // with local ids.
815 // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
816 // node.
817 bool isBoundary = false;
818 if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
819 for (LO j = 0; j < blkPartSize; j++) {
820 if (pointBoundaryNodes[row*blkPartSize+j]) {
821 isBoundary = true;
822 break;
823 }
824 }
825 } else {
826 isBoundary = true;
827 for (LO j = 0; j < blkPartSize; j++) {
828 if (!pointBoundaryNodes[row*blkPartSize+j]) {
829 isBoundary = false;
830 break;
831 }
832 }
833 }
834
835 // Merge rows of A
836 // The array indicesExtra contains local column node ids for the current local node "row"
837 if (!isBoundary)
838 MergeRows(*A, row, indicesExtra, colTranslation);
839 else
840 indicesExtra.push_back(row);
841 indices = indicesExtra;
842 numTotal += indices.size();
843
844 // add the local column node ids to the full columns array which
845 // contains the local column node ids for all local node rows
846 LO nnz = indices.size(), rownnz = 0;
847 for (LO colID = 0; colID < nnz; colID++) {
848 LO col = indices[colID];
849 columns[realnnz++] = col;
850 rownnz++;
851 }
852
853 if (rownnz == 1) {
854 // If the only element remaining after filtering is diagonal, mark node as boundary
855 // FIXME: this should really be replaced by the following
856 // if (indices.size() == 1 && indices[0] == row)
857 // boundaryNodes[row] = true;
858 // We do not do it this way now because there is no framework for distinguishing isolated
859 // and boundary nodes in the aggregation algorithms
860 amalgBoundaryNodes[row] = true;
861 }
862 rows[row+1] = realnnz;
863 } //for (LO row = 0; row < numRows; row++)
864 columns.resize(realnnz);
865
866 RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
867 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
868
869 if (GetVerbLevel() & Statistics1) {
870 GO numLocalBoundaryNodes = 0;
871 GO numGlobalBoundaryNodes = 0;
872
873 for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
874 if (amalgBoundaryNodes[i])
875 numLocalBoundaryNodes++;
876
877 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
878 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
879 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
880 << " agglomerated Dirichlet nodes" << std::endl;
881 }
882
883 Set(currentLevel, "Graph", graph);
884 Set(currentLevel, "DofsPerNode", blkSize); // full block size
885
886 } else if (BlockSize > 1 && threshold != STS::zero()) {
887 // Case 4: Multiple DOF/node problem with dropping
888 const RCP<const Map> rowMap = A->getRowMap();
889 const RCP<const Map> colMap = A->getColMap();
890 graphType = "amalgamated";
891
892 // build node row map (uniqueMap) and node column map (nonUniqueMap)
893 // the arrays rowTranslation and colTranslation contain the local node id
894 // given a local dof id. The data is calculated by the AmalgamationFactory and
895 // stored in the variable container "UnAmalgamationInfo"
896 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
897 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
898 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
899 Array<LO> colTranslation = *(amalInfo->getColTranslation());
900
901 // get number of local nodes
902 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
903
904 // Allocate space for the local graph
905 ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
906 ArrayRCP<LO> columns = ArrayRCP<LO>(A->getLocalNumEntries());
907
908 const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
909
910 // Detect and record rows that correspond to Dirichlet boundary conditions
911 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
912 // TODO the array one bigger than the number of local rows, and the last entry can
913 // TODO hold the actual number of boundary nodes. Clever, huh?
914 ArrayRCP<bool > pointBoundaryNodes;
915 pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
916 if (rowSumTol > 0.)
917 Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
918
919
920 // extract striding information
921 LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
922 LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
923 LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
924 if (A->IsView("stridedMaps") == true) {
925 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
926 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
927 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
928 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
929 blkId = strMap->getStridedBlockId();
930 if (blkId > -1)
931 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
932 }
933
934 // extract diagonal data for dropping strategy
936 const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
937
938 // loop over all local nodes
939 LO realnnz = 0;
940 rows[0] = 0;
941 Array<LO> indicesExtra;
942 for (LO row = 0; row < numRows; row++) {
943 ArrayView<const LO> indices;
944 indicesExtra.resize(0);
945
946 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
947 // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
948 // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
949 // with local ids.
950 // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
951 // node.
952 bool isBoundary = false;
953 if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
954 for (LO j = 0; j < blkPartSize; j++) {
955 if (pointBoundaryNodes[row*blkPartSize+j]) {
956 isBoundary = true;
957 break;
958 }
959 }
960 } else {
961 isBoundary = true;
962 for (LO j = 0; j < blkPartSize; j++) {
963 if (!pointBoundaryNodes[row*blkPartSize+j]) {
964 isBoundary = false;
965 break;
966 }
967 }
968 }
969
970 // Merge rows of A
971 // The array indicesExtra contains local column node ids for the current local node "row"
972 if (!isBoundary)
973 MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
974 else
975 indicesExtra.push_back(row);
976 indices = indicesExtra;
977 numTotal += indices.size();
978
979 // add the local column node ids to the full columns array which
980 // contains the local column node ids for all local node rows
981 LO nnz = indices.size(), rownnz = 0;
982 for (LO colID = 0; colID < nnz; colID++) {
983 LO col = indices[colID];
984 columns[realnnz++] = col;
985 rownnz++;
986 }
987
988 if (rownnz == 1) {
989 // If the only element remaining after filtering is diagonal, mark node as boundary
990 // FIXME: this should really be replaced by the following
991 // if (indices.size() == 1 && indices[0] == row)
992 // boundaryNodes[row] = true;
993 // We do not do it this way now because there is no framework for distinguishing isolated
994 // and boundary nodes in the aggregation algorithms
995 amalgBoundaryNodes[row] = true;
996 }
997 rows[row+1] = realnnz;
998 } //for (LO row = 0; row < numRows; row++)
999 columns.resize(realnnz);
1000
1001 RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1002 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1003
1004 if (GetVerbLevel() & Statistics1) {
1005 GO numLocalBoundaryNodes = 0;
1006 GO numGlobalBoundaryNodes = 0;
1007
1008 for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1009 if (amalgBoundaryNodes[i])
1010 numLocalBoundaryNodes++;
1011
1012 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1013 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1014 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
1015 << " agglomerated Dirichlet nodes" << std::endl;
1016 }
1017
1018 Set(currentLevel, "Graph", graph);
1019 Set(currentLevel, "DofsPerNode", blkSize); // full block size
1020 }
1021
1022 } else if (algo == "distance laplacian") {
1023 LO blkSize = A->GetFixedBlockSize();
1024 GO indexBase = A->getRowMap()->getIndexBase();
1025 // [*0*] : FIXME
1026 // ap: somehow, if I move this line to [*1*], Belos throws an error
1027 // I'm not sure what's going on. Do we always have to Get data, if we did
1028 // DeclareInput for it?
1029 // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
1030
1031 // Detect and record rows that correspond to Dirichlet boundary conditions
1032 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
1033 // TODO the array one bigger than the number of local rows, and the last entry can
1034 // TODO hold the actual number of boundary nodes. Clever, huh?
1035 ArrayRCP<bool > pointBoundaryNodes;
1036 pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
1037 if (rowSumTol > 0.)
1038 Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
1039
1040 if ( (blkSize == 1) && (threshold == STS::zero()) ) {
1041 // Trivial case: scalar problem, no dropping. Can return original graph
1042 RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
1043 graph->SetBoundaryNodeMap(pointBoundaryNodes);
1044 graphType="unamalgamated";
1045 numTotal = A->getLocalNumEntries();
1046
1047 if (GetVerbLevel() & Statistics1) {
1048 GO numLocalBoundaryNodes = 0;
1049 GO numGlobalBoundaryNodes = 0;
1050 for (LO i = 0; i < pointBoundaryNodes.size(); ++i)
1051 if (pointBoundaryNodes[i])
1052 numLocalBoundaryNodes++;
1053 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1054 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1055 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1056 }
1057
1058 Set(currentLevel, "DofsPerNode", blkSize);
1059 Set(currentLevel, "Graph", graph);
1060
1061 } else {
1062 // ap: We make quite a few assumptions here; general case may be a lot different,
1063 // but much much harder to implement. We assume that:
1064 // 1) all maps are standard maps, not strided maps
1065 // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
1066 // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
1067 //
1068 // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
1069 // but as I totally don't understand that code, here is my solution
1070
1071 // [*1*]: see [*0*]
1072
1073 // Check that the number of local coordinates is consistent with the #rows in A
1074 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements()/blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1075 "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size ("<< blkSize <<").");
1076
1077 const RCP<const Map> colMap = A->getColMap();
1078 RCP<const Map> uniqueMap, nonUniqueMap;
1079 Array<LO> colTranslation;
1080 if (blkSize == 1) {
1081 uniqueMap = A->getRowMap();
1082 nonUniqueMap = A->getColMap();
1083 graphType="unamalgamated";
1084
1085 } else {
1086 uniqueMap = Coords->getMap();
1087 TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1088 "Different index bases for matrix and coordinates");
1089
1090 AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1091
1092 graphType = "amalgamated";
1093 }
1094 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1095
1096 RCP<RealValuedMultiVector> ghostedCoords;
1097 RCP<Vector> ghostedLaplDiag;
1098 Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1099 if (threshold != STS::zero()) {
1100 // Get ghost coordinates
1101 RCP<const Import> importer;
1102 {
1103 SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1104 if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1105 GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1106 importer = realA->getCrsGraph()->getImporter();
1107 } else {
1108 GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1109 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1110 }
1111 } //subtimer
1112 ghostedCoords = Xpetra::MultiVectorFactory<real_type,LO,GO,NO>::Build(nonUniqueMap, Coords->getNumVectors());
1113 {
1114 SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1115 ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1116 } //subtimer
1117
1118 // Construct Distance Laplacian diagonal
1119 RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1120 Array<LO> indicesExtra;
1121 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1122 if (threshold != STS::zero()) {
1123 const size_t numVectors = ghostedCoords->getNumVectors();
1124 coordData.reserve(numVectors);
1125 for (size_t j = 0; j < numVectors; j++) {
1126 Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
1127 coordData.push_back(tmpData);
1128 }
1129 }
1130 {
1131 SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1132 ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1133 for (LO row = 0; row < numRows; row++) {
1134 ArrayView<const LO> indices;
1135
1136 if (blkSize == 1) {
1137 ArrayView<const SC> vals;
1138 A->getLocalRowView(row, indices, vals);
1139
1140 } else {
1141 // Merge rows of A
1142 indicesExtra.resize(0);
1143 MergeRows(*A, row, indicesExtra, colTranslation);
1144 indices = indicesExtra;
1145 }
1146
1147 LO nnz = indices.size();
1148 bool haveAddedToDiag = false;
1149 for (LO colID = 0; colID < nnz; colID++) {
1150 const LO col = indices[colID];
1151
1152 if (row != col) {
1153 if(use_dlap_weights == SINGLE_WEIGHTS) {
1154 /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1155 MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1156 MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1157 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1158 }
1159 else if(use_dlap_weights == BLOCK_WEIGHTS) {
1160 int block_id = row % interleaved_blocksize;
1161 int block_start = block_id * interleaved_blocksize;
1162 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1163 }
1164 else {
1165 // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1166 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1167 }
1168 haveAddedToDiag = true;
1169 }
1170 }
1171 // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1172 // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1173 if (!haveAddedToDiag)
1174 localLaplDiagData[row] = STS::rmax();
1175 }
1176 } //subtimer
1177 {
1178 SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1179 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1180 ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1181 ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1182 } //subtimer
1183
1184 } else {
1185 GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1186 }
1187
1188 // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1189
1190 // allocate space for the local graph
1191 ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
1192 ArrayRCP<LO> columns = ArrayRCP<LO>(A->getLocalNumEntries());
1193
1194#ifdef HAVE_MUELU_DEBUG
1195 // DEBUGGING
1196 for(LO i=0; i<(LO)columns.size(); i++) columns[i]=-666;
1197#endif
1198
1199 // Extra array for if we're allowing symmetrization with cutting
1200 ArrayRCP<LO> rows_stop;
1201 bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1202 if(use_stop_array)
1203 rows_stop.resize(numRows);
1204
1205 const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
1206
1207 LO realnnz = 0;
1208 rows[0] = 0;
1209
1210 Array<LO> indicesExtra;
1211 {
1212 SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1213 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1214 if (threshold != STS::zero()) {
1215 const size_t numVectors = ghostedCoords->getNumVectors();
1216 coordData.reserve(numVectors);
1217 for (size_t j = 0; j < numVectors; j++) {
1218 Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
1219 coordData.push_back(tmpData);
1220 }
1221 }
1222
1223 ArrayView<const SC> vals;//CMS hackery
1224 for (LO row = 0; row < numRows; row++) {
1225 ArrayView<const LO> indices;
1226 indicesExtra.resize(0);
1227 bool isBoundary = false;
1228
1229 if (blkSize == 1) {
1230 // ArrayView<const SC> vals;//CMS uncomment
1231 A->getLocalRowView(row, indices, vals);
1232 isBoundary = pointBoundaryNodes[row];
1233 } else {
1234 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1235 for (LO j = 0; j < blkSize; j++) {
1236 if (!pointBoundaryNodes[row*blkSize+j]) {
1237 isBoundary = false;
1238 break;
1239 }
1240 }
1241
1242 // Merge rows of A
1243 if (!isBoundary)
1244 MergeRows(*A, row, indicesExtra, colTranslation);
1245 else
1246 indicesExtra.push_back(row);
1247 indices = indicesExtra;
1248 }
1249 numTotal += indices.size();
1250
1251 LO nnz = indices.size(), rownnz = 0;
1252
1253 if(use_stop_array) {
1254 rows[row+1] = rows[row]+nnz;
1255 realnnz = rows[row];
1256 }
1257
1258 if (threshold != STS::zero()) {
1259 // default
1260 if (distanceLaplacianAlgo == defaultAlgo) {
1261 /* Standard Distance Laplacian */
1262 for (LO colID = 0; colID < nnz; colID++) {
1263
1264 LO col = indices[colID];
1265
1266 if (row == col) {
1267 columns[realnnz++] = col;
1268 rownnz++;
1269 continue;
1270 }
1271
1272 // We do not want the distance Laplacian aggregating boundary nodes
1273 if(isBoundary) continue;
1274
1275 SC laplVal;
1276 if(use_dlap_weights == SINGLE_WEIGHTS) {
1277 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1278 }
1279 else if(use_dlap_weights == BLOCK_WEIGHTS) {
1280 int block_id = row % interleaved_blocksize;
1281 int block_start = block_id * interleaved_blocksize;
1282 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1283 }
1284 else {
1285 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1286 }
1287 real_type aiiajj = STS::magnitude(realThreshold*realThreshold * ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
1288 real_type aij = STS::magnitude(laplVal*laplVal);
1289
1290 if (aij > aiiajj) {
1291 columns[realnnz++] = col;
1292 rownnz++;
1293 } else {
1294 numDropped++;
1295 }
1296 }
1297 } else {
1298 /* Cut Algorithm */
1299 using DropTol = Details::DropTol<real_type,LO>;
1300 std::vector<DropTol> drop_vec;
1301 drop_vec.reserve(nnz);
1302 const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1303 const real_type one = Teuchos::ScalarTraits<real_type>::one();
1304
1305 // find magnitudes
1306 for (LO colID = 0; colID < nnz; colID++) {
1307
1308 LO col = indices[colID];
1309
1310 if (row == col) {
1311 drop_vec.emplace_back( zero, one, colID, false);
1312 continue;
1313 }
1314 // We do not want the distance Laplacian aggregating boundary nodes
1315 if(isBoundary) continue;
1316
1317 SC laplVal;
1318 if(use_dlap_weights == SINGLE_WEIGHTS) {
1319 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1320 }
1321 else if(use_dlap_weights == BLOCK_WEIGHTS) {
1322 int block_id = row % interleaved_blocksize;
1323 int block_start = block_id * interleaved_blocksize;
1324 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1325 }
1326 else {
1327 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1328 }
1329
1330 real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
1331 real_type aij = STS::magnitude(laplVal*laplVal);
1332
1333 drop_vec.emplace_back(aij, aiiajj, colID, false);
1334 }
1335
1336 const size_t n = drop_vec.size();
1337
1338 if (distanceLaplacianAlgo == unscaled_cut) {
1339
1340 std::sort( drop_vec.begin(), drop_vec.end()
1341 , [](DropTol const& a, DropTol const& b) {
1342 return a.val > b.val;
1343 }
1344 );
1345
1346 bool drop = false;
1347 for (size_t i=1; i<n; ++i) {
1348 if (!drop) {
1349 auto const& x = drop_vec[i-1];
1350 auto const& y = drop_vec[i];
1351 auto a = x.val;
1352 auto b = y.val;
1353 if (a > realThreshold*b) {
1354 drop = true;
1355#ifdef HAVE_MUELU_DEBUG
1356 if (distanceLaplacianCutVerbose) {
1357 std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1358 }
1359#endif
1360 }
1361 }
1362 drop_vec[i].drop = drop;
1363 }
1364 }
1365 else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1366
1367 std::sort( drop_vec.begin(), drop_vec.end()
1368 , [](DropTol const& a, DropTol const& b) {
1369 return a.val/a.diag > b.val/b.diag;
1370 }
1371 );
1372
1373 bool drop = false;
1374 for (size_t i=1; i<n; ++i) {
1375 if (!drop) {
1376 auto const& x = drop_vec[i-1];
1377 auto const& y = drop_vec[i];
1378 auto a = x.val/x.diag;
1379 auto b = y.val/y.diag;
1380 if (a > realThreshold*b) {
1381 drop = true;
1382#ifdef HAVE_MUELU_DEBUG
1383 if (distanceLaplacianCutVerbose) {
1384 std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1385 }
1386#endif
1387 }
1388 }
1389 drop_vec[i].drop = drop;
1390 }
1391 }
1392
1393 std::sort( drop_vec.begin(), drop_vec.end()
1394 , [](DropTol const& a, DropTol const& b) {
1395 return a.col < b.col;
1396 }
1397 );
1398
1399 for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
1400 LO col = indices[drop_vec[idxID].col];
1401
1402
1403 // don't drop diagonal
1404 if (row == col) {
1405 columns[realnnz++] = col;
1406 rownnz++;
1407 // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1408 continue;
1409 }
1410
1411 if (!drop_vec[idxID].drop) {
1412 columns[realnnz++] = col;
1413 // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1414 rownnz++;
1415 } else {
1416 // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1417 numDropped++;
1418 }
1419 }
1420 }
1421 } else {
1422 // Skip laplace calculation and threshold comparison for zero threshold
1423 for (LO colID = 0; colID < nnz; colID++) {
1424 LO col = indices[colID];
1425 columns[realnnz++] = col;
1426 rownnz++;
1427 }
1428 }
1429
1430 if ( rownnz == 1) {
1431 // If the only element remaining after filtering is diagonal, mark node as boundary
1432 // FIXME: this should really be replaced by the following
1433 // if (indices.size() == 1 && indices[0] == row)
1434 // boundaryNodes[row] = true;
1435 // We do not do it this way now because there is no framework for distinguishing isolated
1436 // and boundary nodes in the aggregation algorithms
1437 amalgBoundaryNodes[row] = true;
1438 }
1439
1440 if(use_stop_array)
1441 rows_stop[row] = rownnz + rows[row];
1442 else
1443 rows[row+1] = realnnz;
1444 } //for (LO row = 0; row < numRows; row++)
1445
1446 } //subtimer
1447
1448 if (use_stop_array) {
1449 // Do symmetrization of the cut matrix
1450 // NOTE: We assume nested row/column maps here
1451 for (LO row = 0; row < numRows; row++) {
1452 for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1453 LO col = columns[colidx];
1454 if(col >= numRows) continue;
1455
1456 bool found = false;
1457 for(LO t_col = rows[col] ; !found && t_col < rows_stop[col]; t_col++) {
1458 if (columns[t_col] == row)
1459 found = true;
1460 }
1461 // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1462 // into a Dirichlet unknown. In that case don't.
1463 if(!found && !pointBoundaryNodes[col] && rows_stop[col] < rows[col+1]) {
1464 LO new_idx = rows_stop[col];
1465 // printf("(%d,%d) SYMADD entry\n",col,row);
1466 columns[new_idx] = row;
1467 rows_stop[col]++;
1468 numDropped--;
1469 }
1470 }
1471 }
1472
1473 // Condense everything down
1474 LO current_start=0;
1475 for (LO row = 0; row < numRows; row++) {
1476 LO old_start = current_start;
1477 for (LO col = rows[row]; col < rows_stop[row]; col++) {
1478 if(current_start != col) {
1479 columns[current_start] = columns[col];
1480 }
1481 current_start++;
1482 }
1483 rows[row] = old_start;
1484 }
1485 rows[numRows] = realnnz = current_start;
1486
1487 }
1488
1489 columns.resize(realnnz);
1490
1491 RCP<GraphBase> graph;
1492 {
1493 SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1494 graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1495 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1496 } //subtimer
1497
1498 if (GetVerbLevel() & Statistics1) {
1499 GO numLocalBoundaryNodes = 0;
1500 GO numGlobalBoundaryNodes = 0;
1501
1502 for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1503 if (amalgBoundaryNodes[i])
1504 numLocalBoundaryNodes++;
1505
1506 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1507 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1508 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1509 << " using threshold " << dirichletThreshold << std::endl;
1510 }
1511
1512 Set(currentLevel, "Graph", graph);
1513 Set(currentLevel, "DofsPerNode", blkSize);
1514 }
1515 }
1516
1517 if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1518 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1519 GO numGlobalTotal, numGlobalDropped;
1520 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1521 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1522 GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1523 if (numGlobalTotal != 0)
1524 GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1525 GetOStream(Statistics1) << std::endl;
1526 }
1527
1528 } else {
1529 //what Tobias has implemented
1530
1531 SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1532 //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1533 GetOStream(Runtime0) << "algorithm = \"" << "failsafe" << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1534 Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1535
1536 RCP<const Map> rowMap = A->getRowMap();
1537 RCP<const Map> colMap = A->getColMap();
1538
1539 LO blockdim = 1; // block dim for fixed size blocks
1540 GO indexBase = rowMap->getIndexBase(); // index base of maps
1541 GO offset = 0;
1542
1543 // 1) check for blocking/striding information
1544 if(A->IsView("stridedMaps") &&
1545 Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1546 Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1547 RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1548 TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1549 blockdim = strMap->getFixedBlockSize();
1550 offset = strMap->getOffset();
1551 oldView = A->SwitchToView(oldView);
1552 GetOStream(Statistics1) << "CoalesceDropFactory::Build():" << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1553 } else GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1554
1555 // 2) get row map for amalgamated matrix (graph of A)
1556 // with same distribution over all procs as row map of A
1557 RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1558 GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1559
1560 // 3) create graph of amalgamated matrix
1561 RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries()*blockdim);
1562
1563 LO numRows = A->getRowMap()->getLocalNumElements();
1564 LO numNodes = nodeMap->getLocalNumElements();
1565 const ArrayRCP<bool> amalgBoundaryNodes(numNodes, false);
1566 const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1567 bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1568
1569 // 4) do amalgamation. generate graph of amalgamated matrix
1570 // Note, this code is much more inefficient than the leightwight implementation
1571 // Most of the work has already been done in the AmalgamationFactory
1572 for(LO row=0; row<numRows; row++) {
1573 // get global DOF id
1574 GO grid = rowMap->getGlobalElement(row);
1575
1576 // reinitialize boolean helper variable
1577 bIsDiagonalEntry = false;
1578
1579 // translate grid to nodeid
1580 GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1581
1582 size_t nnz = A->getNumEntriesInLocalRow(row);
1583 Teuchos::ArrayView<const LO> indices;
1584 Teuchos::ArrayView<const SC> vals;
1585 A->getLocalRowView(row, indices, vals);
1586
1587 RCP<std::vector<GO> > cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1588 LO realnnz = 0;
1589 for(LO col=0; col<Teuchos::as<LO>(nnz); col++) {
1590 GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1591
1592 if(vals[col]!=STS::zero()) {
1593 GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1594 cnodeIds->push_back(cnodeId);
1595 realnnz++; // increment number of nnz in matrix row
1596 if (grid == gcid) bIsDiagonalEntry = true;
1597 }
1598 }
1599
1600 if(realnnz == 1 && bIsDiagonalEntry == true) {
1601 LO lNodeId = nodeMap->getLocalElement(nodeId);
1602 numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1603 if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1604 amalgBoundaryNodes[lNodeId] = true;
1605 }
1606
1607 Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp( cnodeIds );
1608
1609 if(arr_cnodeIds.size() > 0 )
1610 crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1611 }
1612 // fill matrix graph
1613 crsGraph->fillComplete(nodeMap,nodeMap);
1614
1615 // 5) create MueLu Graph object
1616 RCP<GraphBase> graph = rcp(new Graph(crsGraph, "amalgamated graph of A"));
1617
1618 // Detect and record rows that correspond to Dirichlet boundary conditions
1619 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1620
1621 if (GetVerbLevel() & Statistics1) {
1622 GO numLocalBoundaryNodes = 0;
1623 GO numGlobalBoundaryNodes = 0;
1624 for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1625 if (amalgBoundaryNodes[i])
1626 numLocalBoundaryNodes++;
1627 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1628 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1629 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1630 }
1631
1632 // 6) store results in Level
1633 //graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1634 Set(currentLevel, "DofsPerNode", blockdim);
1635 Set(currentLevel, "Graph", graph);
1636
1637 } //if (doExperimentalWrap) ... else ...
1638
1639
1640 } //Build
1641
1642 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1643 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1644 typedef typename ArrayView<const LO>::size_type size_type;
1645
1646 // extract striding information
1647 LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1648 if (A.IsView("stridedMaps") == true) {
1649 Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1650 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1651 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1652 if (strMap->getStridedBlockId() > -1)
1653 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1654 }
1655
1656 // count nonzero entries in all dof rows associated with node row
1657 size_t nnz = 0, pos = 0;
1658 for (LO j = 0; j < blkSize; j++)
1659 nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1660
1661 if (nnz == 0) {
1662 cols.resize(0);
1663 return;
1664 }
1665
1666 cols.resize(nnz);
1667
1668 // loop over all local dof rows associated with local node "row"
1669 ArrayView<const LO> inds;
1670 ArrayView<const SC> vals;
1671 for (LO j = 0; j < blkSize; j++) {
1672 A.getLocalRowView(row*blkSize+j, inds, vals);
1673 size_type numIndices = inds.size();
1674
1675 if (numIndices == 0) // skip empty dof rows
1676 continue;
1677
1678 // cols: stores all local node ids for current local node id "row"
1679 cols[pos++] = translation[inds[0]];
1680 for (size_type k = 1; k < numIndices; k++) {
1681 LO nodeID = translation[inds[k]];
1682 // Here we try to speed up the process by reducing the size of an array
1683 // to sort. This works if the column nonzeros belonging to the same
1684 // node are stored consequently.
1685 if (nodeID != cols[pos-1])
1686 cols[pos++] = nodeID;
1687 }
1688 }
1689 cols.resize(pos);
1690 nnz = pos;
1691
1692 // Sort and remove duplicates
1693 std::sort(cols.begin(), cols.end());
1694 pos = 0;
1695 for (size_t j = 1; j < nnz; j++)
1696 if (cols[j] != cols[pos])
1697 cols[++pos] = cols[j];
1698 cols.resize(pos+1);
1699 }
1700
1701 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1702 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1703 typedef typename ArrayView<const LO>::size_type size_type;
1704 typedef Teuchos::ScalarTraits<SC> STS;
1705
1706 // extract striding information
1707 LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1708 if (A.IsView("stridedMaps") == true) {
1709 Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1710 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1711 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1712 if (strMap->getStridedBlockId() > -1)
1713 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1714 }
1715
1716 // count nonzero entries in all dof rows associated with node row
1717 size_t nnz = 0, pos = 0;
1718 for (LO j = 0; j < blkSize; j++)
1719 nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1720
1721 if (nnz == 0) {
1722 cols.resize(0);
1723 return;
1724 }
1725
1726 cols.resize(nnz);
1727
1728 // loop over all local dof rows associated with local node "row"
1729 ArrayView<const LO> inds;
1730 ArrayView<const SC> vals;
1731 for (LO j = 0; j < blkSize; j++) {
1732 A.getLocalRowView(row*blkSize+j, inds, vals);
1733 size_type numIndices = inds.size();
1734
1735 if (numIndices == 0) // skip empty dof rows
1736 continue;
1737
1738 // cols: stores all local node ids for current local node id "row"
1739 LO prevNodeID = -1;
1740 for (size_type k = 0; k < numIndices; k++) {
1741 LO dofID = inds[k];
1742 LO nodeID = translation[inds[k]];
1743
1744 // we avoid a square root by using squared values
1745 typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold*ghostedDiagVals[dofID]*ghostedDiagVals[row*blkSize+j]); // eps^2 * |a_ii| * |a_jj|
1746 typename STS::magnitudeType aij = STS::magnitude(vals[k]*vals[k]);
1747
1748 // check dropping criterion
1749 if (aij > aiiajj || (row*blkSize+j == dofID)) {
1750 // accept entry in graph
1751
1752 // Here we try to speed up the process by reducing the size of an array
1753 // to sort. This works if the column nonzeros belonging to the same
1754 // node are stored consequently.
1755 if (nodeID != prevNodeID) {
1756 cols[pos++] = nodeID;
1757 prevNodeID = nodeID;
1758 }
1759 }
1760 }
1761 }
1762 cols.resize(pos);
1763 nnz = pos;
1764
1765 // Sort and remove duplicates
1766 std::sort(cols.begin(), cols.end());
1767 pos = 0;
1768 for (size_t j = 1; j < nnz; j++)
1769 if (cols[j] != cols[pos])
1770 cols[++pos] = cols[j];
1771 cols.resize(pos+1);
1772
1773 return;
1774 }
1775
1776
1777
1778 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1779 Teuchos::RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalize(Level & currentLevel,const RCP<Matrix>& A,bool generate_matrix) const {
1780 typedef Teuchos::ScalarTraits<SC> STS;
1781
1782 const ParameterList & pL = GetParameterList();
1783 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1784 const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1785
1786 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector> >(currentLevel, "BlockNumber");
1787 RCP<LocalOrdinalVector> ghostedBlockNumber;
1788 GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)"<<std::endl;
1789
1790 // Ghost the column block numbers if we need to
1791 RCP<const Import> importer = A->getCrsGraph()->getImporter();
1792 if(!importer.is_null()) {
1793 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1794 ghostedBlockNumber= Xpetra::VectorFactory<LO,LO,GO,NO>::Build(importer->getTargetMap());
1795 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1796 }
1797 else {
1798 ghostedBlockNumber = BlockNumber;
1799 }
1800
1801 // Accessors for block numbers
1802 Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1803 Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1804
1805 // allocate space for the local graph
1806 ArrayRCP<size_t> rows_mat;
1807 ArrayRCP<LO> rows_graph,columns;
1808 ArrayRCP<SC> values;
1809 RCP<CrsMatrixWrap> crs_matrix_wrap;
1810
1811 if(generate_matrix) {
1812 crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1813 crs_matrix_wrap->getCrsMatrix()->allocateAllValues(A->getLocalNumEntries(), rows_mat, columns, values);
1814 }
1815 else {
1816 rows_graph.resize(A->getLocalNumRows()+1);
1817 columns.resize(A->getLocalNumEntries());
1818 values.resize(A->getLocalNumEntries());
1819 }
1820
1821 LO realnnz = 0;
1822 GO numDropped = 0, numTotal = 0;
1823 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1824 LO row_block = row_block_number[row];
1825 size_t nnz = A->getNumEntriesInLocalRow(row);
1826 ArrayView<const LO> indices;
1827 ArrayView<const SC> vals;
1828 A->getLocalRowView(row, indices, vals);
1829
1830 LO rownnz = 0;
1831 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1832 LO col = indices[colID];
1833 LO col_block = col_block_number[col];
1834
1835 if(row_block == col_block) {
1836 if(generate_matrix) values[realnnz] = vals[colID];
1837 columns[realnnz++] = col;
1838 rownnz++;
1839 } else
1840 numDropped++;
1841 }
1842 if(generate_matrix) rows_mat[row+1] = realnnz;
1843 else rows_graph[row+1] = realnnz;
1844 }
1845
1846 ArrayRCP<bool> boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
1847 if (rowSumTol > 0.)
1848 Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
1849
1850
1851 if(!generate_matrix) {
1852 // We can't resize an Arrayrcp and pass the checks for setAllValues
1853 values.resize(realnnz);
1854 columns.resize(realnnz);
1855 }
1856 numTotal = A->getLocalNumEntries();
1857
1858 if (GetVerbLevel() & Statistics1) {
1859 GO numLocalBoundaryNodes = 0;
1860 GO numGlobalBoundaryNodes = 0;
1861 for (LO i = 0; i < boundaryNodes.size(); ++i)
1862 if (boundaryNodes[i])
1863 numLocalBoundaryNodes++;
1864 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1865 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1866 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1867
1868 GO numGlobalTotal, numGlobalDropped;
1869 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1870 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1871 GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1872 if (numGlobalTotal != 0)
1873 GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1874 GetOStream(Statistics1) << std::endl;
1875 }
1876
1877 Set(currentLevel, "Filtering", true);
1878
1879 if(generate_matrix) {
1880 // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1881 // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1882 // here, which is legit, because we never use them anyway.
1883 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat,columns,values);
1884 crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1885 }
1886 else {
1887 RCP<GraphBase> graph = rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1888 graph->SetBoundaryNodeMap(boundaryNodes);
1889 Set(currentLevel, "Graph", graph);
1890 }
1891
1892
1893 Set(currentLevel, "DofsPerNode", 1);
1894 return crs_matrix_wrap;
1895 }
1896
1897
1898 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1899 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalizeGraph(const RCP<GraphBase> & inputGraph, const RCP<LocalOrdinalVector> & ghostedBlockNumber, RCP<GraphBase> & outputGraph, RCP<const Import> & importer) const {
1900
1901 TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1902 const ParameterList & pL = GetParameterList();
1903
1904 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1905
1906 GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1907 if (localizeColoringGraph)
1908 GetOStream(Statistics1) << ", with localization" <<std::endl;
1909 else
1910 GetOStream(Statistics1) << ", without localization" <<std::endl;
1911
1912 // Accessors for block numbers
1913 Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1914 Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1915
1916 // allocate space for the local graph
1917 ArrayRCP<size_t> rows_mat;
1918 ArrayRCP<LO> rows_graph,columns;
1919
1920 rows_graph.resize(inputGraph->GetNodeNumVertices()+1);
1921 columns.resize(inputGraph->GetNodeNumEdges());
1922
1923 LO realnnz = 0;
1924 GO numDropped = 0, numTotal = 0;
1925 const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getLocalNumElements());
1926 if (localizeColoringGraph) {
1927
1928 for (LO row = 0; row < numRows; ++row) {
1929 LO row_block = row_block_number[row];
1930 ArrayView<const LO> indices = inputGraph->getNeighborVertices(row);
1931
1932 LO rownnz = 0;
1933 for (LO colID = 0; colID < Teuchos::as<LO>(indices.size()); colID++) {
1934 LO col = indices[colID];
1935 LO col_block = col_block_number[col];
1936
1937 if((row_block == col_block) && (col < numRows)) {
1938 columns[realnnz++] = col;
1939 rownnz++;
1940 } else
1941 numDropped++;
1942 }
1943 rows_graph[row+1] = realnnz;
1944 }
1945 } else {
1946 // ghosting of boundary node map
1947 Teuchos::ArrayRCP<const bool> boundaryNodes = inputGraph->GetBoundaryNodeMap();
1948 auto boundaryNodesVector = Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(inputGraph->GetDomainMap());
1949 for (size_t i=0; i<inputGraph->GetNodeNumVertices(); i++)
1950 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1951 // Xpetra::IO<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1952 auto boundaryColumnVector = Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(inputGraph->GetImportMap());
1953 boundaryColumnVector->doImport(*boundaryNodesVector,*importer, Xpetra::INSERT);
1954 auto boundaryColumn = boundaryColumnVector->getData(0);
1955
1956 for (LO row = 0; row < numRows; ++row) {
1957 LO row_block = row_block_number[row];
1958 ArrayView<const LO> indices = inputGraph->getNeighborVertices(row);
1959
1960 LO rownnz = 0;
1961 for (LO colID = 0; colID < Teuchos::as<LO>(indices.size()); colID++) {
1962 LO col = indices[colID];
1963 LO col_block = col_block_number[col];
1964
1965 if((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1966 columns[realnnz++] = col;
1967 rownnz++;
1968 } else
1969 numDropped++;
1970 }
1971 rows_graph[row+1] = realnnz;
1972 }
1973 }
1974
1975 columns.resize(realnnz);
1976 numTotal = inputGraph->GetNodeNumEdges();
1977
1978 if (GetVerbLevel() & Statistics1) {
1979 RCP<const Teuchos::Comm<int> > comm = inputGraph->GetDomainMap()->getComm();
1980 GO numGlobalTotal, numGlobalDropped;
1981 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1982 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1983 GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1984 if (numGlobalTotal != 0)
1985 GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1986 GetOStream(Statistics1) << std::endl;
1987 }
1988
1989 if (localizeColoringGraph) {
1990 outputGraph = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1991 outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1992 } else {
1993 TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
1994#ifdef HAVE_XPETRA_TPETRA
1995 auto outputGraph2 = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1996
1997 auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
1998 auto sym = rcp(new Tpetra::CrsGraphTransposer<LocalOrdinal,GlobalOrdinal,Node>(tpGraph));
1999 auto tpGraphSym = sym->symmetrize();
2000
2001 auto colIndsSym = // FIXME persistingView is temporary; better fix would be change to LWGraph constructor
2002 Kokkos::Compat::persistingView(tpGraphSym->getLocalIndicesHost());
2003
2004 auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
2005 ArrayRCP<LO> rows_graphSym;
2006 rows_graphSym.resize(rowsSym.size());
2007 for (size_t row = 0; row < rowsSym.size(); row++)
2008 rows_graphSym[row] = rowsSym[row];
2009 outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
2010 outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
2011#endif
2012 }
2013
2014 }
2015
2016
2017
2018} //namespace MueLu
2019
2020#endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
void DeclareInput(Level &currentLevel) const
Input.
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
void BlockDiagonalizeGraph(const RCP< GraphBase > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< GraphBase > &outputGraph, RCP< const Import > &importer) const
RCP< PreDropFunctionBaseClass > predrop_
MueLu::PreDropFunctionConstVal< Scalar, LocalOrdinal, GlobalOrdinal, Node > PreDropFunctionConstVal
Exception indicating invalid cast attempted.
Exception throws to report incompatible objects (like maps).
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
MueLu representation of a compressed row storage graph.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
virtual const Teuchos::ParameterList & GetParameterList() const
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
static Teuchos::ArrayRCP< Magnitude > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &A)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar > > &v, DefaultLocalOrdinal i0, DefaultLocalOrdinal i1)
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
VerbLevel GetVerbLevel() const
Get the verbosity level.
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line).
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Warnings1
Additional warnings.
@ Parameters0
Print class parameters.
DropTol & operator=(DropTol const &)=default
DropTol(DropTol &&)=default
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
DropTol & operator=(DropTol &&)=default
DropTol(DropTol const &)=default