MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_ClassicalPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_CLASSICALPFACTORY_DEF_HPP
47#define MUELU_CLASSICALPFACTORY_DEF_HPP
48
49#include <Xpetra_MultiVectorFactory.hpp>
50#include <Xpetra_VectorFactory.hpp>
51#include <Xpetra_CrsGraphFactory.hpp>
52#include <Xpetra_Matrix.hpp>
53#include <Xpetra_Map.hpp>
54#include <Xpetra_MapFactory.hpp>
55#include <Xpetra_Vector.hpp>
56#include <Xpetra_Import.hpp>
57#include <Xpetra_ImportUtils.hpp>
58#include <Xpetra_IO.hpp>
59#include <Xpetra_StridedMapFactory.hpp>
60
61#include <Teuchos_OrdinalTraits.hpp>
62
63#include "MueLu_MasterList.hpp"
64#include "MueLu_Monitor.hpp"
65#include "MueLu_PerfUtils.hpp"
67#include "MueLu_ClassicalMapFactory.hpp"
68#include "MueLu_AmalgamationInfo.hpp"
69#include "MueLu_GraphBase.hpp"
70
71
72//#define CMS_DEBUG
73//#define CMS_DUMP
74
75namespace {
76
77template<class SC>
78int Sign(SC val) {
79 using STS = typename Teuchos::ScalarTraits<SC>;
80 typename STS::magnitudeType MT_ZERO = Teuchos::ScalarTraits<typename STS::magnitudeType>::zero();
81 if(STS::real(val) > MT_ZERO) return 1;
82 else if(STS::real(val) < MT_ZERO) return -1;
83 else return 0;
84}
85
86}// anonymous namepsace
87
88namespace MueLu {
89
90
91
92
93 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
95 RCP<ParameterList> validParamList = rcp(new ParameterList());
96#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
97 SET_VALID_ENTRY("aggregation: deterministic");
98 SET_VALID_ENTRY("aggregation: coloring algorithm");
99 SET_VALID_ENTRY("aggregation: classical scheme");
100
101 // To know if we need BlockNumber
102 SET_VALID_ENTRY("aggregation: drop scheme");
103 {
104 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
105 validParamList->getEntry("aggregation: classical scheme").setValidator(rcp(new validatorType(Teuchos::tuple<std::string>("direct","ext+i","classical modified"), "aggregation: classical scheme")));
106
107 }
108
109#undef SET_VALID_ENTRY
110 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
111 // validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
112 validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
113 validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
114 validParamList->set< RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the CoarseMap");
115 validParamList->set< RCP<const FactoryBase> >("FC Splitting", Teuchos::null, "Generating factory of the FC Splitting");
116 validParamList->set< RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for Block Number");
117 // validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
118
119 return validParamList;
120 }
121
122 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
124 Input(fineLevel, "A");
125 Input(fineLevel, "Graph");
126 Input(fineLevel, "DofsPerNode");
127 // Input(fineLevel, "UnAmalgamationInfo");
128 Input(fineLevel, "CoarseMap");
129 Input(fineLevel, "FC Splitting");
130
131 const ParameterList& pL = GetParameterList();
132 std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
133 if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
134 Input(fineLevel, "BlockNumber");
135 }
136
137 }
138
139 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
141 return BuildP(fineLevel, coarseLevel);
142 }
143
144 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
146 FactoryMonitor m(*this, "Build", coarseLevel);
147 // using STS = Teuchos::ScalarTraits<SC>;
148
149 // We start by assuming that someone did a reasonable strength of connection
150 // algorithm before we start to get our Graph, DofsPerNode and UnAmalgamationInfo
151
152 // We begin by getting a MIS (from a graph coloring) and then at that point we need
153 // to start generating entries for the prolongator.
154 RCP<const Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
155 RCP<const Map> ownedCoarseMap = Get<RCP<const Map> >(fineLevel,"CoarseMap");
156 RCP<const LocalOrdinalVector> owned_fc_splitting = Get<RCP<LocalOrdinalVector> >(fineLevel,"FC Splitting");
157 RCP<const GraphBase> graph = Get< RCP<GraphBase> >(fineLevel, "Graph");
158 // LO nDofsPerNode = Get<LO>(fineLevel, "DofsPerNode");
159 // RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel, "UnAmalgamationInfo");
160 RCP<const Import> Importer = A->getCrsGraph()->getImporter();
161 Xpetra::UnderlyingLib lib = ownedCoarseMap->lib();
162
163 // RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
164 RCP<Matrix> P;
165 // SC SC_ZERO = STS::zero();
166 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
169 const ParameterList& pL = GetParameterList();
170
171 // FIXME: This guy doesn't work right now for NumPDEs != 1
172 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError,"ClassicalPFactory: Multiple PDEs per node not supported yet");
173
174 // NOTE: Let's hope we never need to deal with this case
175 TEUCHOS_TEST_FOR_EXCEPTION(!A->getRowMap()->isSameAs(*A->getDomainMap()),Exceptions::RuntimeError,"ClassicalPFactory: MPI Ranks > 1 not supported yet");
176
177 // Do we need ghosts rows of A and myPointType?
178 std::string scheme = pL.get<std::string>("aggregation: classical scheme");
179 bool need_ghost_rows =false;
180 if(scheme == "ext+i")
181 need_ghost_rows=true;
182 else if(scheme == "direct")
183 need_ghost_rows=false;
184 else if(scheme == "classical modified")
185 need_ghost_rows=true;
186 // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
187
188 if (GetVerbLevel() & Statistics1) {
189 GetOStream(Statistics1) << "ClassicalPFactory: scheme = "<<scheme<<std::endl;
190 }
191
192 // Ghost the FC splitting and grab the data (if needed)
193 RCP<const LocalOrdinalVector> fc_splitting;
194 ArrayRCP<const LO> myPointType;
195 if(Importer.is_null()) {
196 fc_splitting = owned_fc_splitting;
197 }
198 else {
199 RCP<LocalOrdinalVector> fc_splitting_nonconst = LocalOrdinalVectorFactory::Build(A->getCrsGraph()->getColMap());
200 fc_splitting_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
201 fc_splitting = fc_splitting_nonconst;
202 }
203 myPointType = fc_splitting->getData(0);
204
205
206 /* Ghost A (if needed) */
207 RCP<const Matrix> Aghost;
208 RCP<const LocalOrdinalVector> fc_splitting_ghost;
209 ArrayRCP<const LO> myPointType_ghost;
210 RCP<const Import> remoteOnlyImporter;
211 if(need_ghost_rows && !Importer.is_null()){
212 ArrayView<const LO> remoteLIDs = Importer->getRemoteLIDs();
213 size_t numRemote = Importer->getNumRemoteIDs();
214 Array<GO> remoteRows(numRemote);
215 for (size_t i = 0; i < numRemote; i++)
216 remoteRows[i] = Importer->getTargetMap()->getGlobalElement(remoteLIDs[i]);
217
218 RCP<const Map> remoteRowMap = MapFactory::Build(lib,Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), remoteRows(),
219 A->getDomainMap()->getIndexBase(), A->getDomainMap()->getComm());
220
221 remoteOnlyImporter = Importer->createRemoteOnlyImport(remoteRowMap);
222 RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
223 RCP<CrsMatrix> Aghost_crs = CrsMatrixFactory::Build(Acrs,*remoteOnlyImporter,A->getDomainMap(),remoteOnlyImporter->getTargetMap());
224 Aghost = rcp(new CrsMatrixWrap(Aghost_crs));
225 // CAG: It seems that this isn't actually needed?
226 // We also may need need to ghost myPointType for Aghost
227 // RCP<const Import> Importer2 = Aghost->getCrsGraph()->getImporter();
228 // if(Importer2.is_null()) {
229 // RCP<LocalOrdinalVector> fc_splitting_ghost_nonconst = LocalOrdinalVectorFactory::Build(Aghost->getColMap());
230 // fc_splitting_ghost_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
231 // fc_splitting_ghost = fc_splitting_ghost_nonconst;
232 // myPointType_ghost = fc_splitting_ghost->getData(0);
233 // }
234 }
235
236 /* Generate the ghosted Coarse map using the "Tuminaro maneuver" (if needed)*/
237 RCP<const Map> coarseMap;
238 if(Importer.is_null())
239 coarseMap = ownedCoarseMap;
240 else {
241 // Generate a domain vector with the coarse ID's as entries for C points
242 GhostCoarseMap(*A,*Importer,myPointType,ownedCoarseMap,coarseMap);
243 }
244
245 /* Get the block number, if we need it (and ghost it) */
246 RCP<LocalOrdinalVector> BlockNumber;
247 std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
248 if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
249 RCP<LocalOrdinalVector> OwnedBlockNumber;
250 OwnedBlockNumber = Get<RCP<LocalOrdinalVector> >(fineLevel, "BlockNumber");
251 if(Importer.is_null())
252 BlockNumber = OwnedBlockNumber;
253 else{
254 BlockNumber = LocalOrdinalVectorFactory::Build(A->getColMap());
255 BlockNumber->doImport(*OwnedBlockNumber,*Importer,Xpetra::INSERT);
256 }
257 }
258
259#if defined(CMS_DEBUG) || defined(CMS_DUMP)
260 {
261 RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
262 int rank = A->getRowMap()->getComm()->getRank();
263 printf("[%d] A local size = %dx%d\n",rank,(int)Acrs->getRowMap()->getLocalNumElements(),(int)Acrs->getColMap()->getLocalNumElements());
264
265 printf("[%d] graph local size = %dx%d\n",rank,(int)graph->GetDomainMap()->getLocalNumElements(),(int)graph->GetImportMap()->getLocalNumElements());
266
267 std::ofstream ofs(std::string("dropped_graph_") + std::to_string(fineLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
268 RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
269 graph->print(*fancy,Debug);
270 std::string out_fc = std::string("fc_splitting_") + std::to_string(fineLevel.GetLevelID()) +std::string("_") + std::to_string(rank)+ std::string(".dat");
271 std::string out_block = std::string("block_number_") + std::to_string(fineLevel.GetLevelID()) +std::string("_") + std::to_string(rank)+ std::string(".dat");
272
273 // We don't support writing LO vectors in Xpetra (boo!) so....
274 using real_type = typename Teuchos::ScalarTraits<SC>::magnitudeType;
275 using RealValuedMultiVector = typename Xpetra::MultiVector<real_type,LO,GO,NO>;
276 typedef Xpetra::MultiVectorFactory<real_type,LO,GO,NO> RealValuedMultiVectorFactory;
277
278 RCP<RealValuedMultiVector> mv = RealValuedMultiVectorFactory::Build(fc_splitting->getMap(),1);
279 ArrayRCP<real_type> mv_data= mv->getDataNonConst(0);
280
281 // FC Splitting
282 ArrayRCP<const LO> fc_data= fc_splitting->getData(0);
283 for(LO i=0; i<(LO)fc_data.size(); i++)
284 mv_data[i] = Teuchos::as<real_type>(fc_data[i]);
285 Xpetra::IO<real_type,LO,GO,NO>::Write(out_fc,*mv);
286
287 // Block Number
288 if(!BlockNumber.is_null()) {
289 RCP<RealValuedMultiVector> mv2 = RealValuedMultiVectorFactory::Build(BlockNumber->getMap(),1);
290 ArrayRCP<real_type> mv_data2= mv2->getDataNonConst(0);
291 ArrayRCP<const LO> b_data= BlockNumber->getData(0);
292 for(LO i=0; i<(LO)b_data.size(); i++) {
293 mv_data2[i] = Teuchos::as<real_type>(b_data[i]);
294 }
295 Xpetra::IO<real_type,LO,GO,NO>::Write(out_block,*mv2);
296 }
297 }
298
299
300#endif
301
302
303 /* Generate reindexing arrays */
304 // Note: cpoint2pcol is ghosted if myPointType is
305 // NOTE: Since the ghosted coarse column map follows the ordering of
306 // the fine column map, this *should* work, because it is in local indices.
307 // pcol2cpoint - Takes a LCID for P and turns in into an LCID for A.
308 // cpoint2pcol - Takes a LCID for A --- if it is a C Point --- and turns in into an LCID for P.
309 Array<LO> cpoint2pcol(myPointType.size(),LO_INVALID);
310 Array<LO> pcol2cpoint(coarseMap->getLocalNumElements(),LO_INVALID);
311 LO num_c_points = 0;
312 LO num_f_points =0;
313 for(LO i=0; i<(LO) myPointType.size(); i++) {
314 if(myPointType[i] == C_PT) {
315 cpoint2pcol[i] = num_c_points;
316 num_c_points++;
317 }
318 else if (myPointType[i] == F_PT)
319 num_f_points++;
320 }
321 for(LO i=0; i<(LO)cpoint2pcol.size(); i++) {
322 if(cpoint2pcol[i] != LO_INVALID)
323 pcol2cpoint[cpoint2pcol[i]] =i;
324 }
325
326 // Generate edge strength flags (this will make everything easier later)
327 // These do *not* need to be ghosted (unlike A)
328 Teuchos::Array<size_t> eis_rowptr;
329 Teuchos::Array<bool> edgeIsStrong;
330 {
331 SubFactoryMonitor sfm(*this,"Strength Flags",coarseLevel);
332 GenerateStrengthFlags(*A,*graph,eis_rowptr,edgeIsStrong);
333 }
334
335 // Phase 3: Generate the P matrix
336 RCP<const Map> coarseColMap = coarseMap;
337 RCP<const Map> coarseDomainMap = ownedCoarseMap;
338 if(scheme == "ext+i") {
339 SubFactoryMonitor sfm(*this,"Ext+i Interpolation",coarseLevel);
340 Coarsen_Ext_Plus_I(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,P);
341 }
342 else if(scheme == "direct") {
343 SubFactoryMonitor sfm(*this,"Direct Interpolation",coarseLevel);
344 Coarsen_Direct(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,P);
345 }
346 else if(scheme == "classical modified") {
347 SubFactoryMonitor sfm(*this,"Classical Modified Interpolation",coarseLevel);
348 Coarsen_ClassicalModified(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,remoteOnlyImporter,P);
349 }
350 // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
351
352#ifdef CMS_DEBUG
353 Xpetra::IO<SC,LO,GO,NO>::Write("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
354 // Xpetra::IO<SC,LO,GO,NO>::WriteLocal("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
355#endif
356
357 // Save output
358 Set(coarseLevel,"P",P);
359 RCP<const CrsGraph> pg = P->getCrsGraph();
360 Set(coarseLevel,"P Graph",pg);
361
362 //RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
363 // P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero());
364 // Set(coarseLevel, "Nullspace", coarseNullspace);
365
366 if (IsPrint(Statistics1)) {
367 RCP<ParameterList> params = rcp(new ParameterList());
368 params->set("printLoadBalancingInfo", true);
370 }
371 }
372/* ************************************************************************* */
373template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
375Coarsen_ClassicalModified(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<const Import> remoteOnlyImporter,RCP<Matrix> & P) const {
376 /* ============================================================= */
377 /* Phase 3 : Classical Modified Interpolation */
378 /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
379 /* interpolation for parallel algebraic multigrid", NLAA 2008 */
380 /* 15:115-139 */
381 /* ============================================================= */
382 /* Definitions: */
383 /* F = F-points */
384 /* C = C-points */
385 /* N_i = non-zero neighbors of node i */
386 /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
387 /* F_i^s = F \cap S_i [strong F-neighbors of i] */
388 /* C_i^s = C \cap S_i [strong C-neighbors of i] */
389
390 /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
391 /* This guy has a typo. The paper had a \cap instead of \cup */
392 /* I would note that this set can contain both F-points and */
393 /* C-points. They're just weak neighbors of this guy. */
394 /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
395
396
397 /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
398 /* { a_ij, otherwise */
399
400 /* F_i^s\star = {k\in N_i | C_i^s \cap C_k^s = \emptyset} */
401 /* [set of F-neighbors of i that do not share a strong */
402 /* C-neighbor with i] */
403
404
405 /* Rewritten Equation (9) on p. 120 */
406 /* \tilde{a}_ii = (a_ij + \sum_{k\in{N_i^w \cup F_i^s\star}} a_ik */
407 /* */
408 /* f_ij = \sum_{k\in{F_i^s\setminusF_i^s*}} \frac{a_ik \bar{a}_kj}{\sum_{m\inC_i^s \bar{a}_km}} */
409 /* */
410 /* w_ij = \frac{1}{\tilde{a}_ii} ( a_ij + f_ij) for all j in C_i^s */
411
412 // const point_type F_PT = ClassicalMapFactory::F_PT;
415 using STS = typename Teuchos::ScalarTraits<SC>;
416 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
417 // size_t ST_INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
418 SC SC_ZERO = STS::zero();
419#ifdef CMS_DEBUG
420 int rank = A.getRowMap()->getComm()->getRank();
421#endif
422
423 // Get the block number if we have it.
424 ArrayRCP<const LO> block_id;
425 if(!BlockNumber.is_null())
426 block_id = BlockNumber->getData(0);
427
428 // Initial (estimated) allocation
429 // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
430 // needs to be a copy below.
431 size_t Nrows = A.getLocalNumRows();
432 double c_point_density = (double)num_c_points / (num_c_points+num_f_points);
433 double mean_strong_neighbors_per_row = (double) graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
434 // double mean_neighbors_per_row = (double)A.getLocalNumEntries() / Nrows;
435 double nnz_per_row_est = c_point_density*mean_strong_neighbors_per_row;
436
437 size_t nnz_est = std::max(Nrows,std::min((size_t)A.getLocalNumEntries(),(size_t)(nnz_per_row_est*Nrows)));
438 Array<size_t> tmp_rowptr(Nrows+1);
439 Array<LO> tmp_colind(nnz_est);
440
441 // Algorithm (count+realloc)
442 // For each row, i,
443 // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
444 size_t ct=0;
445 for(LO row=0; row < (LO) Nrows; row++) {
446 size_t row_start = eis_rowptr[row];
447 ArrayView<const LO> indices;
448 ArrayView<const SC> vals;
449 std::set<LO> C_hat;
450 if(myPointType[row] == DIRICHLET_PT) {
451 // Dirichlet points get ignored completely
452 }
453 else if(myPointType[row] == C_PT) {
454 // C-Points get a single 1 in their row
455 C_hat.insert(cpoint2pcol[row]);
456 }
457 else {
458 // F-Points have a more complicated interpolation
459
460 // C-neighbors of row
461 A.getLocalRowView(row, indices, vals);
462 for(LO j=0; j<indices.size(); j++)
463 if(myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
464 C_hat.insert(cpoint2pcol[indices[j]]);
465 }// end else
466
467 // Realloc if needed
468 if(ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
469 tmp_colind.resize(std::max(ct+(size_t)C_hat.size(),(size_t)2*tmp_colind.size()));
470 }
471
472 // Copy
473 std::copy(C_hat.begin(), C_hat.end(),tmp_colind.begin()+ct);
474 ct+=C_hat.size();
475 tmp_rowptr[row+1] = tmp_rowptr[row] + C_hat.size();
476 }
477 // Resize down
478 tmp_colind.resize(tmp_rowptr[Nrows]);
479
480 RCP<CrsMatrix> Pcrs = CrsMatrixFactory::Build(A.getRowMap(),coarseColMap,0);
481 ArrayRCP<size_t> P_rowptr;
482 ArrayRCP<LO> P_colind;
483 ArrayRCP<SC> P_values;
484
485 if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
486 P_rowptr = Teuchos::arcpFromArray(tmp_rowptr);
487 P_colind = Teuchos::arcpFromArray(tmp_colind);
488 P_values.resize(P_rowptr[Nrows]);
489 } else {
490 // Make the matrix and then get the graph out of it (necessary for Epetra)
491 // NOTE: The lack of an Epetra_CrsGraph::ExpertStaticFillComplete makes this
492 // impossible to do the obvious way
493 Pcrs->allocateAllValues(tmp_rowptr[Nrows],P_rowptr,P_colind,P_values);
494 std::copy(tmp_rowptr.begin(),tmp_rowptr.end(), P_rowptr.begin());
495 std::copy(tmp_colind.begin(),tmp_colind.end(), P_colind.begin());
496 Pcrs->setAllValues(P_rowptr,P_colind,P_values);
497 Pcrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
498 }
499
500 // Generate a remote-ghosted version of the graph (if we're in parallel)
501 RCP<const CrsGraph> Pgraph;
502 RCP<const CrsGraph> Pghost;
503 // TODO: We might want to be more efficient here and actually use
504 // Pgraph in the matrix constructor.
505 ArrayRCP<const size_t> Pghost_rowptr;
506 ArrayRCP<const LO> Pghost_colind;
507 if(!remoteOnlyImporter.is_null()) {
508 if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
509 RCP<CrsGraph> tempPgraph = CrsGraphFactory::Build(A.getRowMap(),coarseColMap,P_rowptr,P_colind);
510 tempPgraph->fillComplete(coarseDomainMap,A.getDomainMap());
511 Pgraph = tempPgraph;
512 } else {
513 // Epetra does not have a graph constructor that uses rowptr and colind.
514 Pgraph = Pcrs->getCrsGraph();
515 }
516 TEUCHOS_ASSERT(!Pgraph.is_null());
517 Pghost = CrsGraphFactory::Build(Pgraph,*remoteOnlyImporter,Pgraph->getDomainMap(),remoteOnlyImporter->getTargetMap());
518 Pghost->getAllIndices(Pghost_rowptr,Pghost_colind);
519 }
520
521 // Gustavson-style perfect hashing
522 ArrayRCP<LO> Acol_to_Pcol(A.getColMap()->getLocalNumElements(),LO_INVALID);
523
524 // Get a quick reindexing array from Pghost LCIDs to P LCIDs
525 ArrayRCP<LO> Pghostcol_to_Pcol;
526 if(!Pghost.is_null()) {
527 Pghostcol_to_Pcol.resize(Pghost->getColMap()->getLocalNumElements(),LO_INVALID);
528 for(LO i=0; i<(LO) Pghost->getColMap()->getLocalNumElements(); i++)
529 Pghostcol_to_Pcol[i] = Pgraph->getColMap()->getLocalElement(Pghost->getColMap()->getGlobalElement(i));
530 }//end Pghost
531
532 // Get a quick reindexing array from Aghost LCIDs to A LCIDs
533 ArrayRCP<LO> Aghostcol_to_Acol;
534 if(!Aghost.is_null()) {
535 Aghostcol_to_Acol.resize(Aghost->getColMap()->getLocalNumElements(),LO_INVALID);
536 for(LO i=0; i<(LO)Aghost->getColMap()->getLocalNumElements(); i++)
537 Aghostcol_to_Acol[i] = A.getColMap()->getLocalElement(Aghost->getColMap()->getGlobalElement(i));
538 }//end Aghost
539
540
541 // Algorithm (numeric)
542 for(LO i=0; i < (LO)Nrows; i++) {
543 if(myPointType[i] == DIRICHLET_PT) {
544 // Dirichlet points get ignored completely
545#ifdef CMS_DEBUG
546 // DEBUG
547 printf("[%d] ** A(%d,:) is a Dirichlet-Point.\n",rank,i);
548#endif
549 }
550 else if (myPointType[i] == C_PT) {
551 // C Points get a single 1 in their row
552 P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
553#ifdef CMS_DEBUG
554 // DEBUG
555 printf("[%d] ** A(%d,:) is a C-Point.\n",rank,i);
556#endif
557 }
558 else {
559 // F Points get all of the fancy stuff
560#ifdef CMS_DEBUG
561 // DEBUG
562 printf("[%d] ** A(%d,:) is a F-Point.\n",rank,i);
563#endif
564
565 // Get all of the relevant information about this row
566 ArrayView<const LO> A_indices_i, A_indices_k;
567 ArrayView<const SC> A_vals_i, A_vals_k;
568 A.getLocalRowView(i, A_indices_i, A_vals_i);
569 size_t row_start = eis_rowptr[i];
570
571 ArrayView<const LO> P_indices_i = P_colind.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
572 ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
573
574 // FIXME: Do we need this?
575 for(LO j=0; j<(LO)P_vals_i.size(); j++)
576 P_vals_i[j] = SC_ZERO;
577
578 // Stash the hash: Flag any strong C-points with their index into P_colind
579 // NOTE: We'll consider any points that are LO_INVALID or less than P_rowptr[i] as not strong C-points
580 for(LO j=0; j<(LO)P_indices_i.size(); j++) {
581 Acol_to_Pcol[pcol2cpoint[P_indices_i[j]]] = P_rowptr[i] + j;
582 }
583
584 // Loop over the entries in the row
585 SC first_denominator = SC_ZERO;
586#ifdef CMS_DEBUG
587 SC a_ii = SC_ZERO;
588#endif
589 for(LO k0=0; k0<(LO)A_indices_i.size(); k0++) {
590 LO k = A_indices_i[k0];
591 SC a_ik = A_vals_i[k0];
592 LO pcol_k = Acol_to_Pcol[k];
593
594 if(k == i) {
595 // Case A: Diagonal value (add to first denominator)
596 // FIXME: Add BlockNumber matching here
597 first_denominator += a_ik;
598#ifdef CMS_DEBUG
599 a_ii = a_ik;
600 printf("- A(%d,%d) is the diagonal\n",i,k);
601#endif
602
603 }
604 else if(myPointType[k] == DIRICHLET_PT) {
605 // Case B: Ignore dirichlet connections completely
606#ifdef CMS_DEBUG
607 printf("- A(%d,%d) is a Dirichlet point\n",i,k);
608#endif
609
610 }
611 else if(pcol_k != LO_INVALID && pcol_k >= (LO)P_rowptr[i]) {
612 // Case C: a_ik is strong C-Point (goes directly into the weight)
613 P_values[pcol_k] += a_ik;
614#ifdef CMS_DEBUG
615 printf("- A(%d,%d) is a strong C-Point\n",i,k);
616#endif
617 }
618 else if (!edgeIsStrong[row_start + k0]) {
619 // Case D: Weak non-Dirichlet neighbor (add to first denominator)
620 if(block_id.size() == 0 || block_id[i] == block_id[k]) {
621 first_denominator += a_ik;
622#ifdef CMS_DEBUG
623 printf("- A(%d,%d) is weak adding to diagonal(%d,%d) (%6.4e)\n",i,k,block_id[i],block_id[k],a_ik);
624 }
625 else {
626 printf("- A(%d,%d) is weak but does not match blocks (%d,%d), discarding\n",i,k,block_id[i],block_id[k]);
627#endif
628 }
629
630 }
631 else {//Case E
632 // Case E: Strong F-Point (adds to the first denominator if we don't share a
633 // a strong C-Point with i; adds to the second denominator otherwise)
634#ifdef CMS_DEBUG
635 printf("- A(%d,%d) is a strong F-Point\n",i,k);
636#endif
637
638 SC a_kk = SC_ZERO;
639 SC second_denominator = SC_ZERO;
640 int sign_akk = 0;
641
642 if(k < (LO)Nrows) {
643 // Grab the diagonal a_kk
644 A.getLocalRowView(k, A_indices_k, A_vals_k);
645 for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
646 LO m = A_indices_k[m0];
647 if(k == m) {
648 a_kk = A_vals_k[m0];
649 break;
650 }
651 }//end for A_indices_k
652
653 // Compute the second denominator term
654 sign_akk = Sign(a_kk);
655 for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
656 LO m = A_indices_k[m0];
657 if(m != k && Acol_to_Pcol[A_indices_k[m0]] >= (LO)P_rowptr[i]) {
658 SC a_km = A_vals_k[m0];
659 second_denominator+= (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
660 }
661 }//end for A_indices_k
662
663 // Now we have the second denominator, for this particular strong F point.
664 // So we can now add the sum to the w_ij components for the P values
665 if(second_denominator != SC_ZERO) {
666 for(LO j0=0; j0<(LO)A_indices_k.size(); j0++) {
667 LO j = A_indices_k[j0];
668 // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
669 // printf("Acol_to_Pcol[%d] = %d P_values.size() = %d\n",j,Acol_to_Pcol[j],(int)P_values.size());
670 if(Acol_to_Pcol[j] >= (LO)P_rowptr[i]) {
671 SC a_kj = A_vals_k[j0];
672 SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
673 P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
674#ifdef CMS_DEBUG
675 printf("- - Unscaled P(%d,A-%d) += %6.4e = %5.4e\n",i,j,a_ik * sign_akj_val / second_denominator,P_values[Acol_to_Pcol[j]]);
676#endif
677 }
678 }//end for A_indices_k
679 }//end if second_denominator != 0
680 else {
681#ifdef CMS_DEBUG
682 printf("- - A(%d,%d) second denominator is zero\n",i,k);
683#endif
684 if(block_id.size() == 0 || block_id[i] == block_id[k])
685 first_denominator += a_ik;
686 }//end else second_denominator != 0
687 }// end if k < Nrows
688 else {
689 // Ghost row
690 LO kless = k-Nrows;
691 // Grab the diagonal a_kk
692 // NOTE: ColMap is not locally fitted to the RowMap
693 // so we need to check GIDs here
694 Aghost->getLocalRowView(kless, A_indices_k, A_vals_k);
695 GO k_g = Aghost->getRowMap()->getGlobalElement(kless);
696 for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
697 GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
698 if(k_g == m_g) {
699 a_kk = A_vals_k[m0];
700 break;
701 }
702 }//end for A_indices_k
703
704 // Compute the second denominator term
705 sign_akk = Sign(a_kk);
706 for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
707 GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
708 LO mghost = A_indices_k[m0];//Aghost LCID
709 LO m = Aghostcol_to_Acol[mghost]; //A's LID (could be LO_INVALID)
710 if(m_g != k_g && m != LO_INVALID && Acol_to_Pcol[m] >= (LO)P_rowptr[i]) {
711 SC a_km = A_vals_k[m0];
712 second_denominator+= (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
713 }
714 }//end for A_indices_k
715
716
717 // Now we have the second denominator, for this particular strong F point.
718 // So we can now add the sum to the w_ij components for the P values
719 if(second_denominator != SC_ZERO) {
720 for(LO j0=0; j0<(LO)A_indices_k.size(); j0++){
721 LO jghost = A_indices_k[j0];//Aghost LCID
722 LO j = Aghostcol_to_Acol[jghost]; //A's LID (could be LO_INVALID)
723 // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
724 if((j != LO_INVALID) && (Acol_to_Pcol[j] >= (LO)P_rowptr[i])) {
725 SC a_kj = A_vals_k[j0];
726 SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
727 P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
728#ifdef CMS_DEBUG
729 printf("- - Unscaled P(%d,A-%d) += %6.4e\n",i,j,a_ik * sign_akj_val / second_denominator);
730#endif
731 }
732
733 }//end for A_indices_k
734 }//end if second_denominator != 0
735 else {
736#ifdef CMS_DEBUG
737 printf("- - A(%d,%d) second denominator is zero\n",i,k);
738#endif
739 if(block_id.size() == 0 || block_id[i] == block_id[k])
740 first_denominator += a_ik;
741 }//end else second_denominator != 0
742 }//end else k < Nrows
743 }//end else Case A,...,E
744
745 }//end for A_indices_i
746
747 // Now, downscale by the first_denominator
748 if(first_denominator != SC_ZERO) {
749 for(LO j0=0; j0<(LO)P_indices_i.size(); j0++) {
750#ifdef CMS_DEBUG
751 SC old_pij = P_vals_i[j0];
752 P_vals_i[j0] /= -first_denominator;
753 printf("P(%d,%d) = %6.4e = %6.4e / (%6.4e + %6.4e)\n",i,P_indices_i[j0],P_vals_i[j0],old_pij,a_ii,first_denominator - a_ii);
754#else
755 P_vals_i[j0] /= -first_denominator;
756#endif
757 }//end for P_indices_i
758 }//end if first_denominator != 0
759
760 }//end else C-Point
761
762 }// end if i < Nrows
763
764 // Finish up
765 Pcrs->setAllValues(P_rowptr,P_colind,P_values);
766 Pcrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
767 // Wrap from CrsMatrix to Matrix
768 P = rcp(new CrsMatrixWrap(Pcrs));
769
770}//end Coarsen_ClassicalModified
771
772
773/* ************************************************************************* */
774template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
776Coarsen_Direct(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<Matrix> & P) const {
777 /* ============================================================= */
778 /* Phase 3 : Direct Interpolation */
779 /* We do not use De Sterck, Falgout, Nolting and Yang (2008) */
780 /* here. Instead we follow: */
781 /* Trottenberg, Oosterlee and Schueller, Multigrid, 2001. */
782 /* with some modifications inspirted by PyAMG */
783 /* ============================================================= */
784 /* Definitions: */
785 /* F = F-points */
786 /* C = C-points */
787 /* N_i = non-zero neighbors of node i */
788 /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
789 /* F_i^s = F \cap S_i [strong F-neighbors of i] */
790 /* C_i^s = C \cap S_i [strong C-neighbors of i] */
791 /* P_i = Set of interpolatory variables for row i [here = C_i^s] */
792
793 /* (A.2.17) from p. 426 */
794 /* a_ij^- = { a_ij, if a_ij < 0 */
795 /* { 0, otherwise */
796 /* a_ij^+ = { a_ij, if a_ij > 0 */
797 /* { 0, otherwise */
798 /* P_i^- = P_i \cap {k | a_ij^- != 0 and a_ij^- = a_ij} */
799 /* [strong C-neighbors with negative edges] */
800 /* P_i^+ = P_i \cap {k | a_ij^+ != 0 and a_ij^+ = a_ij} */
801 /* [strong C-neighbors with positive edges] */
802
803
804 /* de Sterck et al., gives us this: */
805 /* Rewritten Equation (6) on p. 119 */
806 /* w_ij = - a_ji / a_ii \frac{\sum_{k\in N_i} a_ik} {\sum k\inC_i^s} a_ik}, j\in C_i^s */
807
808 /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
809 /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
810 /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
811 /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
812 /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
813 /* NOTE: The text says to modify, if P_i^+ is zero but it isn't entirely clear how that */
814 /* works. We'll follow the PyAMG implementation in a few important ways. */
815
818
819 // Initial (estimated) allocation
820 // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
821 // needs to be a copy below.
822 using STS = typename Teuchos::ScalarTraits<SC>;
823 using MT = typename STS::magnitudeType;
824 using MTS = typename Teuchos::ScalarTraits<MT>;
825 size_t Nrows = A.getLocalNumRows();
826 double c_point_density = (double)num_c_points / (num_c_points+num_f_points);
827 double mean_strong_neighbors_per_row = (double) graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
828 // double mean_neighbors_per_row = (double)A.getLocalNumEntries() / Nrows;
829 double nnz_per_row_est = c_point_density*mean_strong_neighbors_per_row;
830
831 size_t nnz_est = std::max(Nrows,std::min((size_t)A.getLocalNumEntries(),(size_t)(nnz_per_row_est*Nrows)));
832 SC SC_ZERO = STS::zero();
833 MT MT_ZERO = MTS::zero();
834 Array<size_t> tmp_rowptr(Nrows+1);
835 Array<LO> tmp_colind(nnz_est);
836
837 // Algorithm (count+realloc)
838 // For each row, i,
839 // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
840 size_t ct=0;
841 for(LO row=0; row < (LO) Nrows; row++) {
842 size_t row_start = eis_rowptr[row];
843 ArrayView<const LO> indices;
844 ArrayView<const SC> vals;
845 std::set<LO> C_hat;
846 if(myPointType[row] == DIRICHLET_PT) {
847 // Dirichlet points get ignored completely
848 }
849 else if(myPointType[row] == C_PT) {
850 // C-Points get a single 1 in their row
851 C_hat.insert(cpoint2pcol[row]);
852 }
853 else {
854 // F-Points have a more complicated interpolation
855
856 // C-neighbors of row
857 A.getLocalRowView(row, indices, vals);
858 for(LO j=0; j<indices.size(); j++)
859 if(myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
860 C_hat.insert(cpoint2pcol[indices[j]]);
861 }// end else
862
863 // Realloc if needed
864 if(ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
865 tmp_colind.resize(std::max(ct+(size_t)C_hat.size(),(size_t)2*tmp_colind.size()));
866 }
867
868 // Copy
869 std::copy(C_hat.begin(), C_hat.end(),tmp_colind.begin()+ct);
870 ct+=C_hat.size();
871 tmp_rowptr[row+1] = tmp_rowptr[row] + C_hat.size();
872 }
873 // Resize down
874 tmp_colind.resize(tmp_rowptr[Nrows]);
875
876 // Allocate memory & copy
877 P = rcp(new CrsMatrixWrap(A.getRowMap(), coarseColMap, 0));
878 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
879 ArrayRCP<size_t> P_rowptr;
880 ArrayRCP<LO> P_colind;
881 ArrayRCP<SC> P_values;
882
883#ifdef CMS_DEBUG
884printf("CMS: Allocating P w/ %d nonzeros\n",(int)tmp_rowptr[Nrows]);
885#endif
886 PCrs->allocateAllValues(tmp_rowptr[Nrows], P_rowptr, P_colind, P_values);
887 TEUCHOS_TEST_FOR_EXCEPTION(tmp_rowptr.size() !=P_rowptr.size(), Exceptions::RuntimeError,"ClassicalPFactory: Allocation size error (rowptr)");
888 TEUCHOS_TEST_FOR_EXCEPTION(tmp_colind.size() !=P_colind.size(), Exceptions::RuntimeError,"ClassicalPFactory: Allocation size error (colind)");
889 // FIXME: This can be short-circuited for Tpetra, if we decide we care
890 for(LO i=0; i<(LO)Nrows+1; i++)
891 P_rowptr[i] = tmp_rowptr[i];
892 for(LO i=0; i<(LO)tmp_rowptr[Nrows]; i++)
893 P_colind[i] = tmp_colind[i];
894
895
896 // Algorithm (numeric)
897 for(LO i=0; i < (LO)Nrows; i++) {
898 if(myPointType[i] == DIRICHLET_PT) {
899 // Dirichlet points get ignored completely
900#ifdef CMS_DEBUG
901 // DEBUG
902 printf("** A(%d,:) is a Dirichlet-Point.\n",i);
903#endif
904 }
905 else if (myPointType[i] == C_PT) {
906 // C Points get a single 1 in their row
907 P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
908#ifdef CMS_DEBUG
909 // DEBUG
910 printf("** A(%d,:) is a C-Point.\n",i);
911#endif
912 }
913 else {
914 /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
915 /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
916 /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
917 /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
918 /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
919 ArrayView<const LO> A_indices_i, A_incides_k;
920 ArrayView<const SC> A_vals_i, A_indices_k;
921 A.getLocalRowView(i, A_indices_i, A_vals_i);
922 size_t row_start = eis_rowptr[i];
923
924 ArrayView<LO> P_indices_i = P_colind.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
925 ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
926
927#ifdef CMS_DEBUG
928 // DEBUG
929 {
930 char mylabel[5]="FUCD";
931 char sw[3]="ws";
932 printf("** A(%d,:) = ",i);
933 for(LO j=0; j<(LO)A_indices_i.size(); j++){
934 printf("%6.4e(%d-%c%c) ",A_vals_i[j],A_indices_i[j],mylabel[1+myPointType[A_indices_i[j]]],sw[(int)edgeIsStrong[row_start+j]]);
935 }
936 printf("\n");
937 }
938#endif
939
940 SC a_ii = SC_ZERO;
941 SC pos_numerator = SC_ZERO, neg_numerator = SC_ZERO;
942 SC pos_denominator = SC_ZERO, neg_denominator = SC_ZERO;
943 // Find the diagonal and compute the sum ratio
944 for(LO j=0; j<(LO)A_indices_i.size(); j++) {
945 SC a_ik = A_vals_i[j];
946 LO k = A_indices_i[j];
947
948 // Diagonal
949 if(i == k) {
950 a_ii = a_ik;
951 }
952 // Only strong C-neighbors are in the denomintor
953 if(myPointType[k] == C_PT && edgeIsStrong[row_start + j]) {
954 if(STS::real(a_ik) > MT_ZERO) pos_denominator += a_ik;
955 else neg_denominator += a_ik;
956 }
957
958 // All neighbors are in the numerator
959 // NOTE: As per PyAMG, this does not include the diagonal
960 if(i != k) {
961 if(STS::real(a_ik) > MT_ZERO) pos_numerator += a_ik;
962 else neg_numerator += a_ik;
963 }
964 }
965 SC alpha = (neg_denominator == MT_ZERO) ? SC_ZERO : (neg_numerator / neg_denominator);
966 SC beta = (pos_denominator == MT_ZERO) ? SC_ZERO : (pos_numerator / pos_denominator);
967 alpha /= -a_ii;
968 beta /= -a_ii;
969
970 // Loop over the entries
971 for(LO p_j=0; p_j<(LO)P_indices_i.size(); p_j++){
972 LO P_col = pcol2cpoint[P_indices_i[p_j]];
973 SC a_ij = SC_ZERO;
974
975 // Find A_ij (if it is there)
976 // FIXME: We can optimize this if we assume sorting
977 for(LO a_j =0; a_j<(LO)A_indices_i.size(); a_j++) {
978 if(A_indices_i[a_j] == P_col) {
979 a_ij = A_vals_i[a_j];
980 break;
981 }
982 }
983 SC w_ij = (STS::real(a_ij) < 0 ) ? (alpha * a_ij) : (beta * a_ij);
984#ifdef CMS_DEBUG
985 SC alpha_or_beta = (STS::real(a_ij) < 0 ) ? alpha : beta;
986 printf("P(%d,%d/%d) = - %6.4e * %6.4e = %6.4e\n",i,P_indices_i[p_j],pcol2cpoint[P_indices_i[p_j]],alpha_or_beta,a_ij,w_ij);
987#endif
988 P_vals_i[p_j] = w_ij;
989 }//end for A_indices_i
990 }//end else C_PT
991 }//end for Numrows
992
993 // Finish up
994 PCrs->setAllValues(P_rowptr, P_colind, P_values);
995 PCrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
996}
997
998
999/* ************************************************************************* */
1000template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1002Coarsen_Ext_Plus_I(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<Matrix> & P) const {
1003
1004 /* ============================================================= */
1005 /* Phase 3 : Extended+i Interpolation */
1006 /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
1007 /* interpolation for parallel algebraic multigrid", NLAA 2008 */
1008 /* 15:115-139 */
1009 /* ============================================================= */
1010 /* Definitions: */
1011 /* F = F-points */
1012 /* C = C-points */
1013 /* N_i = non-zero neighbors of node i */
1014 /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
1015 /* F_i^s = F \cap S_i [strong F-neighbors of i] */
1016 /* C_i^s = C \cap S_i [strong C-neighbors of i] */
1017 /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
1018 /* This guy has a typo. The paper had a \cap instead of \cup */
1019 /* I would note that this set can contain both F-points and */
1020 /* C-points. They're just weak neighbors of this guy. */
1021 /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
1022
1023 /* \hat{C}_i = C_i \cup (\bigcup_{j\inF_i^s} C_j) */
1024 /* [C-neighbors and C-neighbors of strong F-neighbors of i] */
1025 /* */
1026
1027 /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
1028 /* { a_ij, otherwise */
1029
1030
1031 /* Rewritten Equation (19) on p. 123 */
1032 /* f_ik = \frac{\bar{a}_kj}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
1033 /* w_ij = -\tilde{a}_ii^{-1} (a_ij + \sum_{k\inF_i^s} a_ik f_ik */
1034 /* for j in \hat{C}_i */
1035
1036 /* Rewritten Equation (20) on p. 124 [for the lumped diagonal] */
1037 /* g_ik = \frac{\bar{a}_ki}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
1038 /* \tilde{a}_ii = a_ii + \sum_{n\inN_i^w\setminus \hat{C}_i} a_in + \sum_{k\inF_i^s} a_ik g_ik */
1039 TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"ClassicalPFactory: Ext+i not implemented");
1040
1041}
1042
1043
1044
1045
1046/* ************************************************************************* */
1047template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1049GenerateStrengthFlags(const Matrix & A,const GraphBase & graph, Teuchos::Array<size_t> & eis_rowptr,Teuchos::Array<bool> & edgeIsStrong) const {
1050 // To make this easier, we'll create a bool array equal to the nnz in the matrix
1051 // so we know whether each edge is strong or not. This will save us a bunch of
1052 // trying to match the graph and matrix later
1053 size_t Nrows = A.getLocalNumRows();
1054 eis_rowptr.resize(Nrows+1);
1055
1056 if(edgeIsStrong.size() == 0) {
1057 // Preferred
1058 edgeIsStrong.resize(A.getLocalNumEntries(),false);
1059 }
1060 else {
1061 edgeIsStrong.resize(A.getLocalNumEntries(),false);
1062 edgeIsStrong.assign(A.getLocalNumEntries(),false);
1063 }
1064
1065 eis_rowptr[0] = 0;
1066 for (LO i=0; i<(LO)Nrows; i++) {
1067 LO rowstart = eis_rowptr[i];
1068 ArrayView<const LO> A_indices;
1069 ArrayView<const SC> A_values;
1070 A.getLocalRowView(i, A_indices, A_values);
1071 LO A_size = (LO) A_indices.size();
1072
1073 ArrayView<const LO> G_indices = graph.getNeighborVertices(i);
1074 LO G_size = (LO) G_indices.size();
1075
1076 // Both of these guys should be in the same (sorted) order, but let's check
1077 bool is_ok=true;
1078 for(LO j=0; j<A_size-1; j++)
1079 if (A_indices[j] >= A_indices[j+1]) { is_ok=false; break;}
1080 for(LO j=0; j<G_size-1; j++)
1081 if (G_indices[j] >= G_indices[j+1]) { is_ok=false; break;}
1082 TEUCHOS_TEST_FOR_EXCEPTION(!is_ok, Exceptions::RuntimeError,"ClassicalPFactory: Exected A and Graph to be sorted");
1083
1084 // Now cycle through and set the flags - if the edge is in G it is strong
1085 for(LO g_idx=0, a_idx=0; g_idx < G_size; g_idx++) {
1086 LO col = G_indices[g_idx];
1087 while (A_indices[a_idx] != col && a_idx < A_size) a_idx++;
1088 if(a_idx == A_size) {is_ok=false;break;}
1089 edgeIsStrong[rowstart+a_idx] = true;
1090 }
1091
1092 eis_rowptr[i+1] = eis_rowptr[i] + A_size;
1093 }
1094}
1095
1096
1097/* ************************************************************************* */
1098template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1100GhostCoarseMap(const Matrix &A, const Import & Importer, const ArrayRCP<const LO> myPointType, const RCP<const Map> & coarseMap, RCP<const Map> & coarseColMap) const {
1102 const GO GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
1103 RCP<GlobalOrdinalVector> d_coarseIds = GlobalOrdinalVectorFactory::Build(A.getRowMap());
1104 ArrayRCP<GO> d_data = d_coarseIds->getDataNonConst(0);
1105 LO ct=0;
1106
1107 for(LO i=0; i<(LO)d_data.size(); i++) {
1108 if(myPointType[i] == C_PT) {
1109 d_data[i] = coarseMap->getGlobalElement(ct);
1110 ct++;
1111 }
1112 else
1113 d_data[i] = GO_INVALID;
1114 }
1115
1116 // Ghost this guy
1117 RCP<GlobalOrdinalVector> c_coarseIds = GlobalOrdinalVectorFactory::Build(A.getColMap());
1118 c_coarseIds->doImport(*d_coarseIds,Importer,Xpetra::INSERT);
1119
1120 // If we assume that A is in Aztec ordering, then any subset of A's unknowns will
1121 // be in Aztec ordering as well, which means we can just condense these guys down
1122 // Overallocate, count and view
1123 ArrayRCP<GO> c_data = c_coarseIds->getDataNonConst(0);
1124
1125 Array<GO> c_gids(c_data.size());
1126 LO count=0;
1127
1128 for(LO i=0; i<(LO)c_data.size(); i++) {
1129 if(c_data[i] != GO_INVALID) {
1130 c_gids[count] = c_data[i];
1131 count++;
1132 }
1133 }
1134 // FIXME: Assumes scalar PDE
1135 std::vector<size_t> stridingInfo_(1);
1136 stridingInfo_[0]=1;
1137 GO domainGIDOffset = 0;
1138
1139 coarseColMap = StridedMapFactory::Build(coarseMap->lib(),
1140 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
1141 c_gids.view(0,count),
1142 coarseMap->getIndexBase(),
1143 stridingInfo_,
1144 coarseMap->getComm(),
1145 domainGIDOffset);
1146
1147}
1148
1149
1150} //namespace MueLu
1151
1152
1153
1154#define MUELU_CLASSICALPFACTORY_SHORT
1155#endif // MUELU_CLASSICALPFACTORY_DEF_HPP
1156
1157
#define SET_VALID_ENTRY(name)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void Coarsen_Direct(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
void GenerateStrengthFlags(const Matrix &A, const GraphBase &graph, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Coarsen_Ext_Plus_I(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
typename ClassicalMapFactory::point_type point_type
void Coarsen_ClassicalModified(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< const Import > remoteOnlyImporter, RCP< Matrix > &P) const
void GhostCoarseMap(const Matrix &A, const Import &Importer, const ArrayRCP< const LO > myPointType, const RCP< const Map > &coarseMap, RCP< const Map > &coarseColMap) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
MueLu representation of a graph.
virtual size_t GetNodeNumEdges() const =0
Return number of edges owned by the calling node.
virtual Teuchos::ArrayView< const LocalOrdinal > getNeighborVertices(LocalOrdinal v) const =0
Return the list of vertices adjacent to the vertex 'v'.
virtual size_t GetNodeNumVertices() const =0
Return number of vertices owned by the calling node.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
virtual const Teuchos::ParameterList & GetParameterList() const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
VerbLevel GetVerbLevel() const
Get the verbosity level.
bool IsPrint(MsgType type, int thisProcRankOnly=-1) const
Find out whether we need to print out information for a specific message type.
Namespace for MueLu classes and methods.
@ Debug
Print additional debugging information.
@ Statistics1
Print more statistics.