MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
47#define MUELU_TENTATIVEPFACTORY_DEF_HPP
48
49#include <Xpetra_MapFactory.hpp>
50#include <Xpetra_Map.hpp>
51#include <Xpetra_CrsMatrix.hpp>
52#include <Xpetra_CrsGraphFactory.hpp>
53#include <Xpetra_Matrix.hpp>
54#include <Xpetra_MatrixMatrix.hpp>
55#include <Xpetra_MultiVector.hpp>
56#include <Xpetra_MultiVectorFactory.hpp>
57#include <Xpetra_VectorFactory.hpp>
58#include <Xpetra_Import.hpp>
59#include <Xpetra_ImportFactory.hpp>
60#include <Xpetra_CrsMatrixWrap.hpp>
61#include <Xpetra_StridedMap.hpp>
62#include <Xpetra_StridedMapFactory.hpp>
63#include <Xpetra_IO.hpp>
64
65#ifdef HAVE_MUELU_TPETRA
66#include "Xpetra_TpetraBlockCrsMatrix.hpp"
67//#include "Tpetra_BlockCrsMatrix.hpp"
68#endif
69
71
72#include "MueLu_Aggregates.hpp"
73#include "MueLu_AmalgamationFactory.hpp"
74#include "MueLu_AmalgamationInfo.hpp"
75#include "MueLu_CoarseMapFactory.hpp"
76#include "MueLu_MasterList.hpp"
77#include "MueLu_Monitor.hpp"
78#include "MueLu_NullspaceFactory.hpp"
79#include "MueLu_PerfUtils.hpp"
80#include "MueLu_Utilities.hpp"
81
82
83
84
85namespace MueLu {
86
87 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
89 RCP<ParameterList> validParamList = rcp(new ParameterList());
90
91#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
92 SET_VALID_ENTRY("tentative: calculate qr");
93 SET_VALID_ENTRY("tentative: build coarse coordinates");
94 SET_VALID_ENTRY("tentative: constant column sums");
95#undef SET_VALID_ENTRY
96 validParamList->set< std::string >("Nullspace name", "Nullspace", "Name for the input nullspace");
97
98 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
99 validParamList->set< RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
100 validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
101 validParamList->set< RCP<const FactoryBase> >("Scaled Nullspace", Teuchos::null, "Generating factory of the scaled nullspace");
102 validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
103 validParamList->set< RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
104 validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory of the coordinates");
105 validParamList->set< RCP<const FactoryBase> >("Node Comm", Teuchos::null, "Generating factory of the node level communicator");
106
107 // Make sure we don't recursively validate options for the matrixmatrix kernels
108 ParameterList norecurse;
109 norecurse.disableRecursiveValidation();
110 validParamList->set<ParameterList> ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
111
112 return validParamList;
113 }
114
115 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
117
118 const ParameterList& pL = GetParameterList();
119 // NOTE: This guy can only either be 'Nullspace' or 'Scaled Nullspace' or else the validator above will cause issues
120 std::string nspName = "Nullspace";
121 if(pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
122
123 Input(fineLevel, "A");
124 Input(fineLevel, "Aggregates");
125 Input(fineLevel, nspName);
126 Input(fineLevel, "UnAmalgamationInfo");
127 Input(fineLevel, "CoarseMap");
128 if( fineLevel.GetLevelID() == 0 &&
129 fineLevel.IsAvailable("Coordinates", NoFactory::get()) && // we have coordinates (provided by user app)
130 pL.get<bool>("tentative: build coarse coordinates") ) { // and we want coordinates on other levels
131 bTransferCoordinates_ = true; // then set the transfer coordinates flag to true
132 Input(fineLevel, "Coordinates");
133 } else if (bTransferCoordinates_) {
134 Input(fineLevel, "Coordinates");
135 }
136 }
137
138 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
140 return BuildP(fineLevel, coarseLevel);
141 }
142
143 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
145 FactoryMonitor m(*this, "Build", coarseLevel);
146 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
147 typedef Xpetra::MultiVector<coordinate_type,LO,GO,NO> RealValuedMultiVector;
148 typedef Xpetra::MultiVectorFactory<coordinate_type,LO,GO,NO> RealValuedMultiVectorFactory;
149
150 const ParameterList& pL = GetParameterList();
151 std::string nspName = "Nullspace";
152 if(pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
153
154
155 RCP<Matrix> Ptentative;
156 RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
157 RCP<Aggregates> aggregates = Get< RCP<Aggregates> > (fineLevel, "Aggregates");
158 // No coarse DoFs so we need to bail by setting Ptentattive to null and returning
159 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
160 if ( aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
161 Ptentative = Teuchos::null;
162 Set(coarseLevel, "P", Ptentative);
163 return;
164 }
165
166 RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel, "UnAmalgamationInfo");
167 RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, nspName);
168 RCP<const Map> coarseMap = Get< RCP<const Map> > (fineLevel, "CoarseMap");
169 RCP<RealValuedMultiVector> fineCoords;
171 fineCoords = Get< RCP<RealValuedMultiVector> >(fineLevel, "Coordinates");
172 }
173
174 // FIXME: We should remove the NodeComm on levels past the threshold
175 if(fineLevel.IsAvailable("Node Comm")) {
176 RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel,"Node Comm");
177 Set<RCP<const Teuchos::Comm<int> > >(coarseLevel, "Node Comm", nodeComm);
178 }
179
180 // NOTE: We check DomainMap here rather than RowMap because those are different for BlockCrs matrices
181 TEUCHOS_TEST_FOR_EXCEPTION( A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
182 Exceptions::RuntimeError,"MueLu::TentativePFactory::MakeTentative: Size mismatch between A and Nullspace");
183
184 RCP<MultiVector> coarseNullspace;
185 RCP<RealValuedMultiVector> coarseCoords;
186
188 //*** Create the coarse coordinates ***
189 // First create the coarse map and coarse multivector
190 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
191 LO blkSize = 1;
192 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
193 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
194 }
195 GO indexBase = coarseMap->getIndexBase();
196 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
197 Array<GO> nodeList(numCoarseNodes);
198 const int numDimensions = fineCoords->getNumVectors();
199
200 for (LO i = 0; i < numCoarseNodes; i++) {
201 nodeList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase;
202 }
203 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
204 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
205 nodeList,
206 indexBase,
207 fineCoords->getMap()->getComm());
208 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
209
210 // Create overlapped fine coordinates to reduce global communication
211 RCP<RealValuedMultiVector> ghostedCoords;
212 if (aggregates->AggregatesCrossProcessors()) {
213 RCP<const Map> aggMap = aggregates->GetMap();
214 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
215
216 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
217 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
218 } else {
219 ghostedCoords = fineCoords;
220 }
221
222 // Get some info about aggregates
223 int myPID = coarseCoordsMap->getComm()->getRank();
224 LO numAggs = aggregates->GetNumAggregates();
225 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
226 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
227 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
228
229 // Fill in coarse coordinates
230 for (int dim = 0; dim < numDimensions; ++dim) {
231 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
232 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
233
234 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
235 if (procWinner[lnode] == myPID &&
236 lnode < fineCoordsData.size() &&
237 vertex2AggID[lnode] < coarseCoordsData.size() &&
238 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) == false) {
239 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
240 }
241 }
242 for (LO agg = 0; agg < numAggs; agg++) {
243 coarseCoordsData[agg] /= aggSizes[agg];
244 }
245 }
246 }
247
248 if (!aggregates->AggregatesCrossProcessors()) {
249 if(Xpetra::Helpers<SC,LO,GO,NO>::isTpetraBlockCrs(A)) {
250 BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,coarseLevel.GetLevelID());
251 }
252 else {
253 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,coarseLevel.GetLevelID());
254 }
255 }
256 else
257 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
258
259 // If available, use striding information of fine level matrix A for range
260 // map and coarseMap as domain map; otherwise use plain range map of
261 // Ptent = plain range map of A for range map and coarseMap as domain map.
262 // NOTE:
263 // The latter is not really safe, since there is no striding information
264 // for the range map. This is not really a problem, since striding
265 // information is always available on the intermedium levels and the
266 // coarsest levels.
267 if (A->IsView("stridedMaps") == true)
268 Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
269 else
270 Ptentative->CreateView("stridedMaps", Ptentative->getRangeMap(), coarseMap);
271
273 Set(coarseLevel, "Coordinates", coarseCoords);
274 }
275 Set(coarseLevel, "Nullspace", coarseNullspace);
276 Set(coarseLevel, "P", Ptentative);
277
278 if (IsPrint(Statistics2)) {
279 RCP<ParameterList> params = rcp(new ParameterList());
280 params->set("printLoadBalancingInfo", true);
281 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
282 }
283 }
284
285 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
287 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
288 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
289#ifdef HAVE_MUELU_TPETRA
290
291 /* This routine generates a BlockCrs P for a BlockCrs A. There are a few assumptions here, which meet the use cases we care about, but could
292 be generalized later, if we ever need to do so:
293 1) Null space dimension === block size of matrix: So no elasticity right now
294 2) QR is not supported: Under assumption #1, this shouldn't cause problems.
295 3) Maps are "good": Aka the first chunk of the ColMap is the RowMap.
296
297 These assumptions keep our code way simpler and still support the use cases we actually care about.
298 */
299
300 RCP<const Map> rowMap = A->getRowMap();
301 RCP<const Map> rangeMap = A->getRangeMap();
302 RCP<const Map> colMap = A->getColMap();
303 // const size_t numFinePointRows = rangeMap->getLocalNumElements();
304 const size_t numFineBlockRows = rowMap->getLocalNumElements();
305
306 typedef Teuchos::ScalarTraits<SC> STS;
307 // typedef typename STS::magnitudeType Magnitude;
308 const SC zero = STS::zero();
309 const SC one = STS::one();
310 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
311
312 const GO numAggs = aggregates->GetNumAggregates();
313 const size_t NSDim = fineNullspace->getNumVectors();
314 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
315
316 // Need to generate the coarse block map
317 // NOTE: We assume NSDim == block size here
318 // NOTE: We also assume that coarseMap has contiguous GIDs
319 //const size_t numCoarsePointRows = coarsePointMap->getLocalNumElements();
320 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
321 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
322 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
323 numCoarseBlockRows,
324 coarsePointMap->getIndexBase(),
325 coarsePointMap->getComm());
326 // Sanity checking
327 const ParameterList& pL = GetParameterList();
328 const bool &doQRStep = pL.get<bool>("tentative: calculate qr");
329 const bool &constantColSums = pL.get<bool>("tentative: constant column sums");
330
331 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums,Exceptions::RuntimeError,
332 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
333
334 // The aggregates use the amalgamated column map, which in this case is what we want
335
336 // Aggregates map is based on the amalgamated column map
337 // We can skip global-to-local conversion if LIDs in row map are
338 // same as LIDs in column map
339 bool goodMap = MueLu::Utilities<SC,LO,GO,NO>::MapsAreNested(*rowMap, *colMap);
340
341 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
342 // aggStart is a pointer into aggToRowMapLO
343 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
344 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
345 ArrayRCP<LO> aggStart;
346 ArrayRCP<LO> aggToRowMapLO;
347 ArrayRCP<GO> aggToRowMapGO;
348 if (goodMap) {
349 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
350 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
351 } else {
352 throw std::runtime_error("TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
353 }
354
355 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
356
357 // Pull out the nullspace vectors so that we can have random access.
358 ArrayRCP<ArrayRCP<const SC> > fineNS (NSDim);
359 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
360 for (size_t i = 0; i < NSDim; i++) {
361 fineNS[i] = fineNullspace->getData(i);
362 if (coarsePointMap->getLocalNumElements() > 0)
363 coarseNS[i] = coarseNullspace->getDataNonConst(i);
364 }
365
366 // BlockCrs requires that we build the (block) graph first, so let's do that...
367 // NOTE: Because we're assuming that the NSDim == BlockSize, we only have one
368 // block non-zero per row in the matrix;
369 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap,coarseBlockMap,0);
370 ArrayRCP<size_t> iaPtent;
371 ArrayRCP<LO> jaPtent;
372 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
373 ArrayView<size_t> ia = iaPtent();
374 ArrayView<LO> ja = jaPtent();
375
376 for (size_t i = 0; i < numFineBlockRows; i++) {
377 ia[i] = i;
378 ja[i] = INVALID;
379 }
380 ia[numCoarseBlockRows] = numCoarseBlockRows;
381
382
383 for (GO agg = 0; agg < numAggs; agg++) {
384 LO aggSize = aggStart[agg+1] - aggStart[agg];
385 Xpetra::global_size_t offset = agg;
386
387 for (LO j = 0; j < aggSize; j++) {
388 // FIXME: Allow for bad maps
389 const LO localRow = aggToRowMapLO[aggStart[agg]+j];
390 const size_t rowStart = ia[localRow];
391 ja[rowStart] = offset;
392 }
393 }
394
395 // Compress storage (remove all INVALID, which happen when we skip zeros)
396 // We do that in-place
397 size_t ia_tmp = 0, nnz = 0;
398 for (size_t i = 0; i < numFineBlockRows; i++) {
399 for (size_t j = ia_tmp; j < ia[i+1]; j++)
400 if (ja[j] != INVALID) {
401 ja [nnz] = ja [j];
402 nnz++;
403 }
404 ia_tmp = ia[i+1];
405 ia[i+1] = nnz;
406 }
407
408 if (rowMap->lib() == Xpetra::UseTpetra) {
409 // - Cannot resize for Epetra, as it checks for same pointers
410 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
411 // NOTE: these invalidate ja and val views
412 jaPtent .resize(nnz);
413 }
414
415 GetOStream(Runtime1) << "TentativePFactory : generating block graph" << std::endl;
416 BlockGraph->setAllIndices(iaPtent, jaPtent);
417
418 // Managing labels & constants for ESFC
419 {
420 RCP<ParameterList> FCparams;
421 if(pL.isSublist("matrixmatrix: kernel params"))
422 FCparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
423 else
424 FCparams= rcp(new ParameterList);
425 // By default, we don't need global constants for TentativeP, but we do want it for the graph
426 // if we're printing statistics, so let's leave it on for now.
427 FCparams->set("compute global constants",FCparams->get("compute global constants",true));
428 std::string levelIDs = toString(levelID);
429 FCparams->set("Timer Label",std::string("MueLu::TentativeP-")+levelIDs);
430 RCP<const Export> dummy_e;
431 RCP<const Import> dummy_i;
432 BlockGraph->expertStaticFillComplete(coarseBlockMap,rowMap,dummy_i,dummy_e,FCparams);
433 }
434
435 // Now let's make a BlockCrs Matrix
436 // NOTE: Assumes block size== NSDim
437 RCP<Xpetra::CrsMatrix<SC,LO,GO,NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC,LO,GO,NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap,NSDim);
438 RCP<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> >(P_xpetra);
439 if(P_tpetra.is_null()) throw std::runtime_error("BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
440 RCP<CrsMatrixWrap> P_wrap = rcp(new CrsMatrixWrap(P_xpetra));
441
443 // "no-QR" option //
445 // Local Q factor is just the fine nullspace support over the current aggregate.
446 // Local R factor is the identity.
447 // NOTE: We're not going to do a QR here as we're assuming that blocksize == NSDim
448 // NOTE: "goodMap" case only
449 Teuchos::Array<Scalar> block(NSDim*NSDim, zero);
450 Teuchos::Array<LO> bcol(1);
451
452 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
453 for (LO agg = 0; agg < numAggs; agg++) {
454 bcol[0] = agg;
455 const LO aggSize = aggStart[agg+1] - aggStart[agg];
456 Xpetra::global_size_t offset = agg*NSDim;
457
458 // Process each row in the local Q factor
459 // NOTE: Blocks are in row-major order
460 for (LO j = 0; j < aggSize; j++) {
461 const LO localBlockRow = aggToRowMapLO[aggStart[agg]+j];
462
463 for (size_t r = 0; r < NSDim; r++) {
464 LO localPointRow = localBlockRow*NSDim + r;
465 for (size_t c = 0; c < NSDim; c++)
466 block[r*NSDim+c] = fineNS[c][localPointRow];
467 }
468 // NOTE: Assumes columns==aggs and are ordered sequentially
469 P_tpetra->replaceLocalValues(localBlockRow,bcol(),block());
470
471 }//end aggSize
472
473 for (size_t j = 0; j < NSDim; j++)
474 coarseNS[j][offset+j] = one;
475
476 } //for (GO agg = 0; agg < numAggs; agg++)
477
478 Ptentative = P_wrap;
479#else
480 throw std::runtime_error("TentativePFactory::BuildPuncoupledBlockCrs: Requires Tpetra");
481#endif
482 }
483
484 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
486 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
487 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
488 typedef Teuchos::ScalarTraits<SC> STS;
489 typedef typename STS::magnitudeType Magnitude;
490 const SC zero = STS::zero();
491 const SC one = STS::one();
492
493 // number of aggregates
494 GO numAggs = aggregates->GetNumAggregates();
495
496 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
497 // aggStart is a pointer into aggToRowMap
498 // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
499 // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
500 ArrayRCP<LO> aggStart;
501 ArrayRCP< GO > aggToRowMap;
502 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
503
504 // find size of the largest aggregate
505 LO maxAggSize=0;
506 for (GO i=0; i<numAggs; ++i) {
507 LO sizeOfThisAgg = aggStart[i+1] - aggStart[i];
508 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
509 }
510
511 // dimension of fine level nullspace
512 const size_t NSDim = fineNullspace->getNumVectors();
513
514 // index base for coarse Dof map (usually 0)
515 GO indexBase=A->getRowMap()->getIndexBase();
516
517 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
518 const RCP<const Map> uniqueMap = A->getDomainMap();
519 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
520 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap,NSDim);
521 fineNullspaceWithOverlap->doImport(*fineNullspace,*importer,Xpetra::INSERT);
522
523 // Pull out the nullspace vectors so that we can have random access.
524 ArrayRCP< ArrayRCP<const SC> > fineNS(NSDim);
525 for (size_t i=0; i<NSDim; ++i)
526 fineNS[i] = fineNullspaceWithOverlap->getData(i);
527
528 //Allocate storage for the coarse nullspace.
529 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
530
531 ArrayRCP< ArrayRCP<SC> > coarseNS(NSDim);
532 for (size_t i=0; i<NSDim; ++i)
533 if (coarseMap->getLocalNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
534
535 //This makes the rowmap of Ptent the same as that of A->
536 //This requires moving some parts of some local Q's to other processors
537 //because aggregates can span processors.
538 RCP<const Map > rowMapForPtent = A->getRowMap();
539 const Map& rowMapForPtentRef = *rowMapForPtent;
540
541 // Set up storage for the rows of the local Qs that belong to other processors.
542 // FIXME This is inefficient and could be done within the main loop below with std::vector's.
543 RCP<const Map> colMap = A->getColMap();
544
545 RCP<const Map > ghostQMap;
546 RCP<MultiVector> ghostQvalues;
547 Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > ghostQcolumns;
548 RCP<Xpetra::Vector<GO,LO,GO,Node> > ghostQrowNums;
549 ArrayRCP< ArrayRCP<SC> > ghostQvals;
550 ArrayRCP< ArrayRCP<GO> > ghostQcols;
551 ArrayRCP< GO > ghostQrows;
552
553 Array<GO> ghostGIDs;
554 for (LO j=0; j<numAggs; ++j) {
555 for (LO k=aggStart[j]; k<aggStart[j+1]; ++k) {
556 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
557 ghostGIDs.push_back(aggToRowMap[k]);
558 }
559 }
560 }
561 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
562 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
563 ghostGIDs,
564 indexBase, A->getRowMap()->getComm()); //JG:Xpetra::global_size_t>?
565 //Vector to hold bits of Q that go to other processors.
566 ghostQvalues = MultiVectorFactory::Build(ghostQMap,NSDim);
567 //Note that Epetra does not support MultiVectors templated on Scalar != double.
568 //So to work around this, we allocate an array of Vectors. This shouldn't be too
569 //expensive, as the number of Vectors is NSDim.
570 ghostQcolumns.resize(NSDim);
571 for (size_t i=0; i<NSDim; ++i)
572 ghostQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
573 ghostQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
574 if (ghostQvalues->getLocalLength() > 0) {
575 ghostQvals.resize(NSDim);
576 ghostQcols.resize(NSDim);
577 for (size_t i=0; i<NSDim; ++i) {
578 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
579 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
580 }
581 ghostQrows = ghostQrowNums->getDataNonConst(0);
582 }
583
584 //importer to handle moving Q
585 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
586
587 // Dense QR solver
588 Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
589
590 //Allocate temporary storage for the tentative prolongator.
591 Array<GO> globalColPtr(maxAggSize*NSDim,0);
592 Array<LO> localColPtr(maxAggSize*NSDim,0);
593 Array<SC> valPtr(maxAggSize*NSDim,0.);
594
595 //Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
596 const Map& coarseMapRef = *coarseMap;
597
598 // For the 3-arrays constructor
599 ArrayRCP<size_t> ptent_rowptr;
600 ArrayRCP<LO> ptent_colind;
601 ArrayRCP<Scalar> ptent_values;
602
603 // Because ArrayRCPs are slow...
604 ArrayView<size_t> rowptr_v;
605 ArrayView<LO> colind_v;
606 ArrayView<Scalar> values_v;
607
608 // For temporary usage
609 Array<size_t> rowptr_temp;
610 Array<LO> colind_temp;
611 Array<Scalar> values_temp;
612
613 RCP<CrsMatrix> PtentCrs;
614
615 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim));
616 PtentCrs = PtentCrsWrap->getCrsMatrix();
617 Ptentative = PtentCrsWrap;
618
619 //*****************************************************************
620 //Loop over all aggregates and calculate local QR decompositions.
621 //*****************************************************************
622 GO qctr=0; //for indexing into Ptent data vectors
623 const Map& nonUniqueMapRef = *nonUniqueMap;
624
625 size_t total_nnz_count=0;
626
627 for (GO agg=0; agg<numAggs; ++agg)
628 {
629 LO myAggSize = aggStart[agg+1]-aggStart[agg];
630 // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
631 // "localQR" (in column major format) for the QR routine.
632 Teuchos::SerialDenseMatrix<LO,SC> localQR(myAggSize, NSDim);
633 for (size_t j=0; j<NSDim; ++j) {
634 bool bIsZeroNSColumn = true;
635 for (LO k=0; k<myAggSize; ++k)
636 {
637 // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
638 // fineNS[j][n] is the nth entry in the jth NS vector
639 try{
640 SC nsVal = fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ]; // extract information from fine level NS
641 localQR(k,j) = nsVal;
642 if (nsVal != zero) bIsZeroNSColumn = false;
643 }
644 catch(...) {
645 GetOStream(Runtime1,-1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
646 GetOStream(Runtime1,-1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
647 GetOStream(Runtime1,-1) << "(local?) aggId=" << agg << std::endl;
648 GetOStream(Runtime1,-1) << "aggSize=" << myAggSize << std::endl;
649 GetOStream(Runtime1,-1) << "agg DOF=" << k << std::endl;
650 GetOStream(Runtime1,-1) << "NS vector j=" << j << std::endl;
651 GetOStream(Runtime1,-1) << "j*myAggSize + k = " << j*myAggSize + k << std::endl;
652 GetOStream(Runtime1,-1) << "aggToRowMap["<<agg<<"][" << k << "] = " << aggToRowMap[aggStart[agg]+k] << std::endl;
653 GetOStream(Runtime1,-1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg]+k] << " is global element in nonUniqueMap = " <<
654 nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
655 GetOStream(Runtime1,-1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
656 GetOStream(Runtime1,-1) << "fineNS...=" << fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ] << std::endl;
657 GetOStream(Errors,-1) << "caught an error!" << std::endl;
658 }
659 } //for (LO k=0 ...
660 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
661 } //for (LO j=0 ...
662
663 Xpetra::global_size_t offset=agg*NSDim;
664
665 if(myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
666 // calculate QR decomposition (standard)
667 // R is stored in localQR (size: myAggSize x NSDim)
668
669 // Householder multiplier
670 SC tau = localQR(0,0);
671
672 if (NSDim == 1) {
673 // Only one nullspace vector, so normalize by hand
674 Magnitude dtemp=0;
675 for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
676 Magnitude tmag = STS::magnitude(localQR(k,0));
677 dtemp += tmag*tmag;
678 }
679 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
680 tau = localQR(0,0);
681 localQR(0,0) = dtemp;
682 } else {
683 qrSolver.setMatrix( Teuchos::rcp(&localQR, false) );
684 qrSolver.factor();
685 }
686
687 // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
688 // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
689 // This stores the (offset+k)th entry only if it is local according to the coarseMap.
690 for (size_t j=0; j<NSDim; ++j) {
691 for (size_t k=0; k<=j; ++k) {
692 try {
693 if (coarseMapRef.isNodeLocalElement(offset+k)) {
694 coarseNS[j][offset+k] = localQR(k, j); //TODO is offset+k the correct local ID?!
695 }
696 }
697 catch(...) {
698 GetOStream(Errors,-1) << "caught error in coarseNS insert, j="<<j<<", offset+k = "<<offset+k<<std::endl;
699 }
700 }
701 }
702
703 // Calculate Q, the tentative prolongator.
704 // The Lapack GEQRF call only works for myAggsize >= NSDim
705
706 if (NSDim == 1) {
707 // Only one nullspace vector, so calculate Q by hand
708 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0,0));
709 localQR(0,0) = tau;
710 dtemp = 1 / dtemp;
711 for (LocalOrdinal i=0; i<myAggSize; ++i) {
712 localQR(i,0) *= dtemp ;
713 }
714 } else {
715 qrSolver.formQ();
716 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
717 for (size_t j=0; j<NSDim; j++) {
718 for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
719 localQR(i,j) = (*qFactor)(i,j);
720 }
721 }
722 }
723
724 // end default case (myAggSize >= NSDim)
725 } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
726 // See comments for the uncoupled case
727
728 // R = extended (by adding identity rows) localQR
729 for (size_t j = 0; j < NSDim; j++)
730 for (size_t k = 0; k < NSDim; k++) {
731 TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset+k), Exceptions::RuntimeError,
732 "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset+k);
733
734 if (k < as<size_t>(myAggSize))
735 coarseNS[j][offset+k] = localQR(k,j);
736 else
737 coarseNS[j][offset+k] = (k == j ? one : zero);
738 }
739
740 // Q = I (rectangular)
741 for (size_t i = 0; i < as<size_t>(myAggSize); i++)
742 for (size_t j = 0; j < NSDim; j++)
743 localQR(i,j) = (j == i ? one : zero);
744 } // end else (special handling for 1pt aggregates)
745
746 //Process each row in the local Q factor. If the row is local to the current processor
747 //according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
748 //to be communicated later to the owning processor.
749 //FIXME -- what happens if maps are blocked?
750 for (GO j=0; j<myAggSize; ++j) {
751 //This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
752 //If it is, the row is inserted. If not, the row number, columns, and values are saved in
753 //MultiVectors that will be sent to other processors.
754 GO globalRow = aggToRowMap[aggStart[agg]+j];
755
756 //TODO is the use of Xpetra::global_size_t below correct?
757 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false ) {
758 ghostQrows[qctr] = globalRow;
759 for (size_t k=0; k<NSDim; ++k) {
760 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg*NSDim+k);
761 ghostQvals[k][qctr] = localQR(j,k);
762 }
763 ++qctr;
764 } else {
765 size_t nnz=0;
766 for (size_t k=0; k<NSDim; ++k) {
767 try{
768 if (localQR(j,k) != Teuchos::ScalarTraits<SC>::zero()) {
769 localColPtr[nnz] = agg * NSDim + k;
770 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
771 valPtr[nnz] = localQR(j,k);
772 ++total_nnz_count;
773 ++nnz;
774 }
775 }
776 catch(...) {
777 GetOStream(Errors,-1) << "caught error in colPtr/valPtr insert, current index="<<nnz<<std::endl;
778 }
779 } //for (size_t k=0; k<NSDim; ++k)
780
781 try{
782 Ptentative->insertGlobalValues(globalRow,globalColPtr.view(0,nnz),valPtr.view(0,nnz));
783 }
784 catch(...) {
785 GetOStream(Errors,-1) << "pid " << A->getRowMap()->getComm()->getRank()
786 << "caught error during Ptent row insertion, global row "
787 << globalRow << std::endl;
788 }
789 }
790 } //for (GO j=0; j<myAggSize; ++j)
791
792 } // for (LO agg=0; agg<numAggs; ++agg)
793
794
795 // ***********************************************************
796 // ************* end of aggregate-wise QR ********************
797 // ***********************************************************
798 GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
799 // Import ghost parts of Q factors and insert into Ptentative.
800 // First import just the global row numbers.
801 RCP<Xpetra::Vector<GO,LO,GO,Node> > targetQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(rowMapForPtent);
802 targetQrowNums->putScalar(-1);
803 targetQrowNums->doImport(*ghostQrowNums,*importer,Xpetra::INSERT);
804 ArrayRCP< GO > targetQrows = targetQrowNums->getDataNonConst(0);
805
806 // Now create map based on just the row numbers imported.
807 Array<GO> gidsToImport;
808 gidsToImport.reserve(targetQrows.size());
809 for (typename ArrayRCP<GO>::iterator r=targetQrows.begin(); r!=targetQrows.end(); ++r) {
810 if (*r > -1) {
811 gidsToImport.push_back(*r);
812 }
813 }
814 RCP<const Map > reducedMap = MapFactory::Build( A->getRowMap()->lib(),
815 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
816 gidsToImport, indexBase, A->getRowMap()->getComm() );
817
818 // Import using the row numbers that this processor will receive.
819 importer = ImportFactory::Build(ghostQMap, reducedMap);
820
821 Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > targetQcolumns(NSDim);
822 for (size_t i=0; i<NSDim; ++i) {
823 targetQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(reducedMap);
824 targetQcolumns[i]->doImport(*(ghostQcolumns[i]),*importer,Xpetra::INSERT);
825 }
826 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap,NSDim);
827 targetQvalues->doImport(*ghostQvalues,*importer,Xpetra::INSERT);
828
829 ArrayRCP< ArrayRCP<SC> > targetQvals;
830 ArrayRCP<ArrayRCP<GO> > targetQcols;
831 if (targetQvalues->getLocalLength() > 0) {
832 targetQvals.resize(NSDim);
833 targetQcols.resize(NSDim);
834 for (size_t i=0; i<NSDim; ++i) {
835 targetQvals[i] = targetQvalues->getDataNonConst(i);
836 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
837 }
838 }
839
840 valPtr = Array<SC>(NSDim,0.);
841 globalColPtr = Array<GO>(NSDim,0);
842 for (typename Array<GO>::iterator r=gidsToImport.begin(); r!=gidsToImport.end(); ++r) {
843 if (targetQvalues->getLocalLength() > 0) {
844 for (size_t j=0; j<NSDim; ++j) {
845 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
846 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
847 }
848 Ptentative->insertGlobalValues(*r, globalColPtr.view(0,NSDim), valPtr.view(0,NSDim));
849 } //if (targetQvalues->getLocalLength() > 0)
850 }
851
852 Ptentative->fillComplete(coarseMap, A->getDomainMap());
853 }
854
855
856
857 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
859 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
860 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
861 RCP<const Map> rowMap = A->getRowMap();
862 RCP<const Map> colMap = A->getColMap();
863 const size_t numRows = rowMap->getLocalNumElements();
864
865 typedef Teuchos::ScalarTraits<SC> STS;
866 typedef typename STS::magnitudeType Magnitude;
867 const SC zero = STS::zero();
868 const SC one = STS::one();
869 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
870
871 const GO numAggs = aggregates->GetNumAggregates();
872 const size_t NSDim = fineNullspace->getNumVectors();
873 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
874
875
876 // Sanity checking
877 const ParameterList& pL = GetParameterList();
878 const bool &doQRStep = pL.get<bool>("tentative: calculate qr");
879 const bool &constantColSums = pL.get<bool>("tentative: constant column sums");
880
881 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums,Exceptions::RuntimeError,
882 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
883
884 // Aggregates map is based on the amalgamated column map
885 // We can skip global-to-local conversion if LIDs in row map are
886 // same as LIDs in column map
887 bool goodMap = MueLu::Utilities<SC,LO,GO,NO>::MapsAreNested(*rowMap, *colMap);
888
889 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
890 // aggStart is a pointer into aggToRowMapLO
891 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
892 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
893 ArrayRCP<LO> aggStart;
894 ArrayRCP<LO> aggToRowMapLO;
895 ArrayRCP<GO> aggToRowMapGO;
896 if (goodMap) {
897 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
898 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
899
900 } else {
901 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
902 GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
903 << "using GO->LO conversion with performance penalty" << std::endl;
904 }
905 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
906
907 // Pull out the nullspace vectors so that we can have random access.
908 ArrayRCP<ArrayRCP<const SC> > fineNS (NSDim);
909 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
910 for (size_t i = 0; i < NSDim; i++) {
911 fineNS[i] = fineNullspace->getData(i);
912 if (coarseMap->getLocalNumElements() > 0)
913 coarseNS[i] = coarseNullspace->getDataNonConst(i);
914 }
915
916 size_t nnzEstimate = numRows * NSDim;
917
918 // Time to construct the matrix and fill in the values
919 Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
920 RCP<CrsMatrix> PtentCrs = rcp_dynamic_cast<CrsMatrixWrap>(Ptentative)->getCrsMatrix();
921
922 ArrayRCP<size_t> iaPtent;
923 ArrayRCP<LO> jaPtent;
924 ArrayRCP<SC> valPtent;
925
926 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
927
928 ArrayView<size_t> ia = iaPtent();
929 ArrayView<LO> ja = jaPtent();
930 ArrayView<SC> val = valPtent();
931
932 ia[0] = 0;
933 for (size_t i = 1; i <= numRows; i++)
934 ia[i] = ia[i-1] + NSDim;
935
936 for (size_t j = 0; j < nnzEstimate; j++) {
937 ja [j] = INVALID;
938 val[j] = zero;
939 }
940
941
942 if (doQRStep) {
944 // Standard aggregate-wise QR //
946 for (GO agg = 0; agg < numAggs; agg++) {
947 LO aggSize = aggStart[agg+1] - aggStart[agg];
948
949 Xpetra::global_size_t offset = agg*NSDim;
950
951 // Extract the piece of the nullspace corresponding to the aggregate, and
952 // put it in the flat array, "localQR" (in column major format) for the
953 // QR routine.
954 Teuchos::SerialDenseMatrix<LO,SC> localQR(aggSize, NSDim);
955 if (goodMap) {
956 for (size_t j = 0; j < NSDim; j++)
957 for (LO k = 0; k < aggSize; k++)
958 localQR(k,j) = fineNS[j][aggToRowMapLO[aggStart[agg]+k]];
959 } else {
960 for (size_t j = 0; j < NSDim; j++)
961 for (LO k = 0; k < aggSize; k++)
962 localQR(k,j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+k])];
963 }
964
965 // Test for zero columns
966 for (size_t j = 0; j < NSDim; j++) {
967 bool bIsZeroNSColumn = true;
968
969 for (LO k = 0; k < aggSize; k++)
970 if (localQR(k,j) != zero)
971 bIsZeroNSColumn = false;
972
973 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
974 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
975 }
976
977 // Calculate QR decomposition (standard)
978 // NOTE: Q is stored in localQR and R is stored in coarseNS
979 if (aggSize >= Teuchos::as<LO>(NSDim)) {
980
981 if (NSDim == 1) {
982 // Only one nullspace vector, calculate Q and R by hand
983 Magnitude norm = STS::magnitude(zero);
984 for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
985 norm += STS::magnitude(localQR(k,0)*localQR(k,0));
986 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
987
988 // R = norm
989 coarseNS[0][offset] = norm;
990
991 // Q = localQR(:,0)/norm
992 for (LO i = 0; i < aggSize; i++)
993 localQR(i,0) /= norm;
994
995 } else {
996 Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
997 qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
998 qrSolver.factor();
999
1000 // R = upper triangular part of localQR
1001 for (size_t j = 0; j < NSDim; j++)
1002 for (size_t k = 0; k <= j; k++)
1003 coarseNS[j][offset+k] = localQR(k,j); //TODO is offset+k the correct local ID?!
1004
1005 // Calculate Q, the tentative prolongator.
1006 // The Lapack GEQRF call only works for myAggsize >= NSDim
1007 qrSolver.formQ();
1008 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
1009 for (size_t j = 0; j < NSDim; j++)
1010 for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
1011 localQR(i,j) = (*qFactor)(i,j);
1012 }
1013
1014 } else {
1015 // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
1016
1017 // The local QR decomposition is not possible in the "overconstrained"
1018 // case (i.e. number of columns in localQR > number of rows), which
1019 // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
1020 // is only possible for single node aggregates in structural mechanics.
1021 // (Similar problems may arise in discontinuous Galerkin problems...)
1022 // We bypass the QR decomposition and use an identity block in the
1023 // tentative prolongator for the single node aggregate and transfer the
1024 // corresponding fine level null space information 1-to-1 to the coarse
1025 // level null space part.
1026
1027 // NOTE: The resulting tentative prolongation operator has
1028 // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
1029 // coarse level operator A. To deal with that one has the following
1030 // options:
1031 // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
1032 // false) to add some identity block to the diagonal of the zero rows
1033 // in the coarse level operator A, such that standard level smoothers
1034 // can be used again.
1035 // - Use special (projection-based) level smoothers, which can deal
1036 // with singular matrices (very application specific)
1037 // - Adapt the code below to avoid zero columns. However, we do not
1038 // support a variable number of DOFs per node in MueLu/Xpetra which
1039 // makes the implementation really hard.
1040
1041 // R = extended (by adding identity rows) localQR
1042 for (size_t j = 0; j < NSDim; j++)
1043 for (size_t k = 0; k < NSDim; k++)
1044 if (k < as<size_t>(aggSize))
1045 coarseNS[j][offset+k] = localQR(k,j);
1046 else
1047 coarseNS[j][offset+k] = (k == j ? one : zero);
1048
1049 // Q = I (rectangular)
1050 for (size_t i = 0; i < as<size_t>(aggSize); i++)
1051 for (size_t j = 0; j < NSDim; j++)
1052 localQR(i,j) = (j == i ? one : zero);
1053 }
1054
1055
1056 // Process each row in the local Q factor
1057 // FIXME: What happens if maps are blocked?
1058 for (LO j = 0; j < aggSize; j++) {
1059 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg]+j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]));
1060
1061 size_t rowStart = ia[localRow];
1062 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1063 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1064 if (localQR(j,k) != zero) {
1065 ja [rowStart+lnnz] = offset + k;
1066 val[rowStart+lnnz] = localQR(j,k);
1067 lnnz++;
1068 }
1069 }
1070 }
1071 }
1072
1073 } else {
1074 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
1075 if (NSDim>1)
1076 GetOStream(Warnings0) << "TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1078 // "no-QR" option //
1080 // Local Q factor is just the fine nullspace support over the current aggregate.
1081 // Local R factor is the identity.
1082 // TODO I have not implemented any special handling for aggregates that are too
1083 // TODO small to locally support the nullspace, as is done in the standard QR
1084 // TODO case above.
1085 if (goodMap) {
1086 for (GO agg = 0; agg < numAggs; agg++) {
1087 const LO aggSize = aggStart[agg+1] - aggStart[agg];
1088 Xpetra::global_size_t offset = agg*NSDim;
1089
1090 // Process each row in the local Q factor
1091 // FIXME: What happens if maps are blocked?
1092 for (LO j = 0; j < aggSize; j++) {
1093
1094 //TODO Here I do not check for a zero nullspace column on the aggregate.
1095 // as is done in the standard QR case.
1096
1097 const LO localRow = aggToRowMapLO[aggStart[agg]+j];
1098
1099 const size_t rowStart = ia[localRow];
1100
1101 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1102 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1103 SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg]+j]];
1104 if(constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1105 if (qr_jk != zero) {
1106 ja [rowStart+lnnz] = offset + k;
1107 val[rowStart+lnnz] = qr_jk;
1108 lnnz++;
1109 }
1110 }
1111 }
1112 for (size_t j = 0; j < NSDim; j++)
1113 coarseNS[j][offset+j] = one;
1114 } //for (GO agg = 0; agg < numAggs; agg++)
1115
1116 } else {
1117 for (GO agg = 0; agg < numAggs; agg++) {
1118 const LO aggSize = aggStart[agg+1] - aggStart[agg];
1119 Xpetra::global_size_t offset = agg*NSDim;
1120 for (LO j = 0; j < aggSize; j++) {
1121
1122 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]);
1123
1124 const size_t rowStart = ia[localRow];
1125
1126 for (size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1127 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1128 SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j])];
1129 if(constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1130 if (qr_jk != zero) {
1131 ja [rowStart+lnnz] = offset + k;
1132 val[rowStart+lnnz] = qr_jk;
1133 lnnz++;
1134 }
1135 }
1136 }
1137 for (size_t j = 0; j < NSDim; j++)
1138 coarseNS[j][offset+j] = one;
1139 } //for (GO agg = 0; agg < numAggs; agg++)
1140
1141 } //if (goodmap) else ...
1142
1143 } //if doQRStep ... else
1144
1145 // Compress storage (remove all INVALID, which happen when we skip zeros)
1146 // We do that in-place
1147 size_t ia_tmp = 0, nnz = 0;
1148 for (size_t i = 0; i < numRows; i++) {
1149 for (size_t j = ia_tmp; j < ia[i+1]; j++)
1150 if (ja[j] != INVALID) {
1151 ja [nnz] = ja [j];
1152 val[nnz] = val[j];
1153 nnz++;
1154 }
1155 ia_tmp = ia[i+1];
1156 ia[i+1] = nnz;
1157 }
1158 if (rowMap->lib() == Xpetra::UseTpetra) {
1159 // - Cannot resize for Epetra, as it checks for same pointers
1160 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
1161 // NOTE: these invalidate ja and val views
1162 jaPtent .resize(nnz);
1163 valPtent.resize(nnz);
1164 }
1165
1166 GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1167
1168 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1169
1170
1171 // Managing labels & constants for ESFC
1172 RCP<ParameterList> FCparams;
1173 if(pL.isSublist("matrixmatrix: kernel params"))
1174 FCparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
1175 else
1176 FCparams= rcp(new ParameterList);
1177 // By default, we don't need global constants for TentativeP
1178 FCparams->set("compute global constants",FCparams->get("compute global constants",false));
1179 std::string levelIDs = toString(levelID);
1180 FCparams->set("Timer Label",std::string("MueLu::TentativeP-")+levelIDs);
1181 RCP<const Export> dummy_e;
1182 RCP<const Import> dummy_i;
1183
1184 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(),dummy_i,dummy_e,FCparams);
1185 }
1186
1187
1188
1189} //namespace MueLu
1190
1191// TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
1192
1193#define MUELU_TENTATIVEPFACTORY_SHORT
1194#endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
int GetLevelID() const
Return level number.
static const NoFactory * get()
virtual const Teuchos::ParameterList & GetParameterList() const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void BuildPuncoupledBlockCrs(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
static bool MapsAreNested(const Xpetra::Map< DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &rowMap, const Xpetra::Map< DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &colMap)
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
bool IsPrint(MsgType type, int thisProcRankOnly=-1) const
Find out whether we need to print out information for a specific message type.
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics2
Print even more statistics.
@ Runtime1
Description of what is happening (more verbose)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.