MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_RefMaxwell_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_REFMAXWELL_DEF_HPP
47#define MUELU_REFMAXWELL_DEF_HPP
48
49#include <sstream>
50
51#include "MueLu_ConfigDefs.hpp"
52
53#include "Xpetra_Map.hpp"
54#include "Xpetra_MatrixMatrix.hpp"
55#include "Xpetra_TripleMatrixMultiply.hpp"
56#include "Xpetra_CrsMatrixUtils.hpp"
57#include "Xpetra_MatrixUtils.hpp"
58
60
61#include "MueLu_AmalgamationFactory.hpp"
62#include "MueLu_RAPFactory.hpp"
63#include "MueLu_SmootherFactory.hpp"
64
65#include "MueLu_CoalesceDropFactory.hpp"
66#include "MueLu_CoarseMapFactory.hpp"
67#include "MueLu_CoordinatesTransferFactory.hpp"
68#include "MueLu_UncoupledAggregationFactory.hpp"
69#include "MueLu_TentativePFactory.hpp"
70#include "MueLu_SaPFactory.hpp"
71#include "MueLu_AggregationExportFactory.hpp"
72#include "MueLu_Utilities.hpp"
73#include "MueLu_Maxwell_Utils.hpp"
74
75#include "MueLu_CoalesceDropFactory_kokkos.hpp"
76#include "MueLu_UncoupledAggregationFactory_kokkos.hpp"
77#include "MueLu_TentativePFactory_kokkos.hpp"
78#include "MueLu_SaPFactory_kokkos.hpp"
79#include <Kokkos_Core.hpp>
80#include <KokkosSparse_CrsMatrix.hpp>
81
82#include "MueLu_ZoltanInterface.hpp"
83#include "MueLu_Zoltan2Interface.hpp"
84#include "MueLu_RepartitionHeuristicFactory.hpp"
85#include "MueLu_RepartitionFactory.hpp"
86#include "MueLu_RebalanceAcFactory.hpp"
87#include "MueLu_RebalanceTransferFactory.hpp"
88
90
93
94#ifdef HAVE_MUELU_CUDA
95#include "cuda_profiler_api.h"
96#endif
97
98// Stratimikos
99#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
101#endif
102
103
104namespace MueLu {
105
106 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
107 Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getDomainMap() const {
108 return SM_Matrix_->getDomainMap();
109 }
110
111
112 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
113 Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getRangeMap() const {
114 return SM_Matrix_->getRangeMap();
115 }
116
117
118 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
120
121 if (list.isType<std::string>("parameterlist: syntax") && list.get<std::string>("parameterlist: syntax") == "ml") {
122 Teuchos::ParameterList newList = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list,"refmaxwell"));
123 if(list.isSublist("refmaxwell: 11list") && list.sublist("refmaxwell: 11list").isSublist("edge matrix free: coarse"))
124 newList.sublist("refmaxwell: 11list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse"),"SA"));
125 if(list.isSublist("refmaxwell: 22list"))
126 newList.sublist("refmaxwell: 22list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 22list"),"SA"));
127 list = newList;
128 }
129
130 parameterList_ = list;
131 std::string verbosityLevel = parameterList_.get<std::string>("verbosity", MasterList::getDefault<std::string>("verbosity"));
133 std::string outputFilename = parameterList_.get<std::string>("output filename", MasterList::getDefault<std::string>("output filename"));
134 if (outputFilename != "")
136 if (parameterList_.isType<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"))
137 VerboseObject::SetMueLuOStream(parameterList_.get<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"));
138
139 if (parameterList_.get("print initial parameters",MasterList::getDefault<bool>("print initial parameters")))
140 GetOStream(static_cast<MsgType>(Runtime1), 0) << parameterList_ << std::endl;
141 disable_addon_ = list.get("refmaxwell: disable addon", MasterList::getDefault<bool>("refmaxwell: disable addon"));
142 mode_ = list.get("refmaxwell: mode", MasterList::getDefault<std::string>("refmaxwell: mode"));
143 use_as_preconditioner_ = list.get("refmaxwell: use as preconditioner", MasterList::getDefault<bool>("refmaxwell: use as preconditioner"));
144 dump_matrices_ = list.get("refmaxwell: dump matrices", MasterList::getDefault<bool>("refmaxwell: dump matrices"));
145 enable_reuse_ = list.get("refmaxwell: enable reuse", MasterList::getDefault<bool>("refmaxwell: enable reuse"));
146 implicitTranspose_ = list.get("transpose: use implicit", MasterList::getDefault<bool>("transpose: use implicit"));
147 fuseProlongationAndUpdate_ = list.get("fuse prolongation and update", MasterList::getDefault<bool>("fuse prolongation and update"));
148 skipFirstLevel_ = list.get("refmaxwell: skip first (1,1) level", MasterList::getDefault<bool>("refmaxwell: skip first (1,1) level"));
149 syncTimers_ = list.get("sync timers", false);
150 numItersH_ = list.get("refmaxwell: num iters H", 1);
151 numIters22_ = list.get("refmaxwell: num iters 22", 1);
152 applyBCsToAnodal_ = list.get("refmaxwell: apply BCs to Anodal", false);
153 applyBCsToH_ = list.get("refmaxwell: apply BCs to H", true);
154 applyBCsTo22_ = list.get("refmaxwell: apply BCs to 22", true);
155
156 if(list.isSublist("refmaxwell: 11list"))
157 precList11_ = list.sublist("refmaxwell: 11list");
158 if(!precList11_.isType<std::string>("Preconditioner Type") &&
159 !precList11_.isType<std::string>("smoother: type") &&
160 !precList11_.isType<std::string>("smoother: pre type") &&
161 !precList11_.isType<std::string>("smoother: post type")) {
162 precList11_.set("smoother: type", "CHEBYSHEV");
163 precList11_.sublist("smoother: params").set("chebyshev: degree",2);
164 precList11_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",5.4);
165 precList11_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
166 }
167
168 if(list.isSublist("refmaxwell: 22list"))
169 precList22_ = list.sublist("refmaxwell: 22list");
170 if(!precList22_.isType<std::string>("Preconditioner Type") &&
171 !precList22_.isType<std::string>("smoother: type") &&
172 !precList22_.isType<std::string>("smoother: pre type") &&
173 !precList22_.isType<std::string>("smoother: post type")) {
174 precList22_.set("smoother: type", "CHEBYSHEV");
175 precList22_.sublist("smoother: params").set("chebyshev: degree",2);
176 precList22_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",7.0);
177 precList22_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
178 }
179
180 if(!list.isType<std::string>("smoother: type") && !list.isType<std::string>("smoother: pre type") && !list.isType<std::string>("smoother: post type")) {
181 list.set("smoother: type", "CHEBYSHEV");
182 list.sublist("smoother: params").set("chebyshev: degree",2);
183 list.sublist("smoother: params").set("chebyshev: ratio eigenvalue",20.0);
184 list.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
185 }
186
187 if(list.isSublist("smoother: params")) {
188 smootherList_ = list.sublist("smoother: params");
189 }
190
191 if (enable_reuse_ &&
192 !precList11_.isType<std::string>("Preconditioner Type") &&
193 !precList11_.isParameter("reuse: type"))
194 precList11_.set("reuse: type", "full");
195 if (enable_reuse_ &&
196 !precList22_.isType<std::string>("Preconditioner Type") &&
197 !precList22_.isParameter("reuse: type"))
198 precList22_.set("reuse: type", "full");
199
200# ifdef HAVE_MUELU_SERIAL
201 if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSerialWrapperNode).name())
202 useKokkos_ = false;
203# endif
204# ifdef HAVE_MUELU_OPENMP
205 if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosOpenMPWrapperNode).name())
206 useKokkos_ = true;
207# endif
208# ifdef HAVE_MUELU_CUDA
209 if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name())
210 useKokkos_ = true;
211# endif
212# ifdef HAVE_MUELU_HIP
213 if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name())
214 useKokkos_ = true;
215# endif
216# ifdef HAVE_MUELU_SYCL
217 if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name())
218 useKokkos_ = true;
219# endif
220 useKokkos_ = list.get("use kokkos refactor",useKokkos_);
221 }
222
223
224
225 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
227
228#ifdef HAVE_MUELU_CUDA
229 if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStart();
230#endif
231
232 std::string timerLabel;
233 if (reuse)
234 timerLabel = "MueLu RefMaxwell: compute (reuse)";
235 else
236 timerLabel = "MueLu RefMaxwell: compute";
237 RCP<Teuchos::TimeMonitor> tmCompute = getTimer(timerLabel);
238
240 // Remove explicit zeros from matrices
242
243 if (IsPrint(Statistics2)) {
244 RCP<ParameterList> params = rcp(new ParameterList());;
245 params->set("printLoadBalancingInfo", true);
246 params->set("printCommInfo", true);
248 }
249
251 // Detect Dirichlet boundary conditions
252 if (!reuse) {
253 magnitudeType rowSumTol = parameterList_.get("refmaxwell: row sum drop tol (1,1)",-1.0);
258 if (IsPrint(Statistics2)) {
259 GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCedges_ << " BC rows and " << BCnodes_ << " BC columns." << std::endl;
260 }
261 }
262
263 if (allEdgesBoundary_) {
264 // All edges have been detected as boundary edges.
265 // Do not attempt to construct sub-hierarchies, but just set up a single level preconditioner.
266 GetOStream(Warnings0) << "All edges are detected as boundary edges!" << std::endl;
267 mode_ = "none";
269 return;
270 }
271
273 // build nullspace if necessary
274
275 if(Nullspace_ != null) {
276 // no need to do anything - nullspace is built
277 TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
278 }
279 else if(Nullspace_ == null && Coords_ != null) {
280 RCP<MultiVector> CoordsSC;
282 Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
283 D0_Matrix_->apply(*CoordsSC,*Nullspace_);
284
285 bool normalize = parameterList_.get<bool>("refmaxwell: normalize nullspace", MasterList::getDefault<bool>("refmaxwell: normalize nullspace"));
286
287 coordinateType minLen, maxLen, meanLen;
288 if (IsPrint(Statistics2) || normalize){
289 // compute edge lengths
290 ArrayRCP<ArrayRCP<const Scalar> > localNullspace(Nullspace_->getNumVectors());
291 for (size_t i = 0; i < Nullspace_->getNumVectors(); i++)
292 localNullspace[i] = Nullspace_->getData(i);
293 coordinateType localMinLen = Teuchos::ScalarTraits<coordinateType>::rmax();
294 coordinateType localMeanLen = Teuchos::ScalarTraits<coordinateType>::zero();
295 coordinateType localMaxLen = Teuchos::ScalarTraits<coordinateType>::zero();
296 for (size_t j=0; j < Nullspace_->getMap()->getLocalNumElements(); j++) {
297 Scalar lenSC = Teuchos::ScalarTraits<Scalar>::zero();
298 for (size_t i=0; i < Nullspace_->getNumVectors(); i++)
299 lenSC += localNullspace[i][j]*localNullspace[i][j];
300 coordinateType len = Teuchos::as<coordinateType>(Teuchos::ScalarTraits<Scalar>::real(Teuchos::ScalarTraits<Scalar>::squareroot(lenSC)));
301 localMinLen = std::min(localMinLen, len);
302 localMaxLen = std::max(localMaxLen, len);
303 localMeanLen += len;
304 }
305
306 RCP<const Teuchos::Comm<int> > comm = Nullspace_->getMap()->getComm();
307 MueLu_minAll(comm, localMinLen, minLen);
308 MueLu_sumAll(comm, localMeanLen, meanLen);
309 MueLu_maxAll(comm, localMaxLen, maxLen);
310 meanLen /= Nullspace_->getMap()->getGlobalNumElements();
311 }
312
313 if (IsPrint(Statistics2)) {
314 GetOStream(Statistics0) << "Edge length (min/mean/max): " << minLen << " / " << meanLen << " / " << maxLen << std::endl;
315 }
316
317 if (normalize) {
318 // normalize the nullspace
319 GetOStream(Runtime0) << "RefMaxwell::compute(): normalizing nullspace" << std::endl;
320
321 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
322
323 Array<Scalar> normsSC(Coords_->getNumVectors(), one / Teuchos::as<Scalar>(meanLen));
324 Nullspace_->scale(normsSC());
325 }
326 }
327 else {
328 GetOStream(Errors) << "MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
329 }
330
331 if (!reuse && skipFirstLevel_) {
332 // Nuke the BC edges in nullspace
334 dump(*Nullspace_, "nullspace.m");
335 }
336
338 // build special prolongator for (1,1)-block
339
340 if(P11_.is_null()) {
341 if (skipFirstLevel_) {
342 // Form A_nodal = D0* Ms D0 (aka TMT_agg)
343 std::string label("D0*Ms*D0");
345
346 if (applyBCsToAnodal_) {
347 // Apply boundary conditions to A_nodal
349 }
350 dump(*A_nodal_Matrix_, "A_nodal.m");
351 }
352
353 // build special prolongator
354 GetOStream(Runtime0) << "RefMaxwell::compute(): building special prolongator" << std::endl;
356 }
357
358#ifdef HAVE_MPI
359 bool doRebalancing = false;
360 doRebalancing = parameterList_.get<bool>("refmaxwell: subsolves on subcommunicators", MasterList::getDefault<bool>("refmaxwell: subsolves on subcommunicators"));
361 int rebalanceStriding = parameterList_.get<int>("refmaxwell: subsolves striding", -1);
362 int numProcsAH, numProcsA22;
363 int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
364 if (numProcs == 1)
365 doRebalancing = false;
366#endif
367 {
368 // build coarse grid operator for (1,1)-block
370
371 if (!reuse) {
372#ifdef HAVE_MPI
373 if (doRebalancing) {
374
375 {
376 // decide on number of ranks for coarse (1, 1) problem
377
378 Level level;
379 level.SetFactoryManager(null);
380 level.SetLevelID(0);
381 level.Set("A",AH_);
382
383 auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
384 ParameterList repartheurParams;
385 repartheurParams.set("repartition: start level", 0);
386 // Setting min == target on purpose.
387 int defaultTargetRows = 10000;
388 repartheurParams.set("repartition: min rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
389 repartheurParams.set("repartition: target rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
390 repartheurParams.set("repartition: min rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
391 repartheurParams.set("repartition: target rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
392 repartheurParams.set("repartition: max imbalance", precList11_.get<double>("repartition: max imbalance", 1.1));
393 repartheurFactory->SetParameterList(repartheurParams);
394
395 level.Request("number of partitions", repartheurFactory.get());
396 repartheurFactory->Build(level);
397 numProcsAH = level.Get<int>("number of partitions", repartheurFactory.get());
398 numProcsAH = std::min(numProcsAH,numProcs);
399 }
400
401 {
402 // decide on number of ranks for (2, 2) problem
403
404 Level level;
405 level.SetFactoryManager(null);
406 level.SetLevelID(0);
407
408 level.Set("Map",D0_Matrix_->getDomainMap());
409
410 auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
411 ParameterList repartheurParams;
412 repartheurParams.set("repartition: start level", 0);
413 repartheurParams.set("repartition: use map", true);
414 // Setting min == target on purpose.
415 int defaultTargetRows = 10000;
416 repartheurParams.set("repartition: min rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
417 repartheurParams.set("repartition: target rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
418 repartheurParams.set("repartition: min rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
419 repartheurParams.set("repartition: target rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
420 // repartheurParams.set("repartition: max imbalance", precList22_.get<double>("repartition: max imbalance", 1.1));
421 repartheurFactory->SetParameterList(repartheurParams);
422
423 level.Request("number of partitions", repartheurFactory.get());
424 repartheurFactory->Build(level);
425 numProcsA22 = level.Get<int>("number of partitions", repartheurFactory.get());
426 numProcsA22 = std::min(numProcsA22,numProcs);
427 }
428
429 if (rebalanceStriding >= 1) {
430 TEUCHOS_ASSERT(rebalanceStriding*numProcsAH<=numProcs);
431 TEUCHOS_ASSERT(rebalanceStriding*numProcsA22<=numProcs);
432 if (rebalanceStriding*(numProcsAH+numProcsA22)>numProcs) {
433 GetOStream(Warnings0) << "RefMaxwell::compute(): Disabling striding = " << rebalanceStriding << ", since AH needs " << numProcsAH
434 << " procs and A22 needs " << numProcsA22 << " procs."<< std::endl;
435 rebalanceStriding = -1;
436 }
437 }
438
439 // }
440
441 if (parameterList_.get<bool>("refmaxwell: fill communicator", false)) {
442 int temp;
443 temp = std::nearbyint(Teuchos::as<double>(numProcsAH * numProcs) / Teuchos::as<double>(numProcsAH+numProcsA22));
444 numProcsA22 = std::nearbyint(Teuchos::as<double>(numProcsA22 * numProcs) / Teuchos::as<double>(numProcsAH+numProcsA22));
445 numProcsAH = temp;
446 if (numProcsAH+numProcsA22 == numProcs+1)
447 numProcsA22 -= 1;
448 TEUCHOS_ASSERT(numProcsAH + numProcsA22 == numProcs);
449 }
450
451 if ((numProcsAH < 0) || (numProcsA22 < 0) || (numProcsAH + numProcsA22 > numProcs)) {
452 GetOStream(Warnings0) << "RefMaxwell::compute(): Disabling rebalancing of subsolves, since partition heuristic resulted "
453 << "in undesirable number of partitions: " << numProcsAH << ", " << numProcsA22 << std::endl;
454 doRebalancing = false;
455 }
456
457 // check again, as we could have changed the value above
458 if (doRebalancing) {
459 // rebalance AH
460 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Rebalance AH");
461
462 Level fineLevel, coarseLevel;
463 fineLevel.SetFactoryManager(null);
464 coarseLevel.SetFactoryManager(null);
465 coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
466 fineLevel.SetLevelID(0);
467 coarseLevel.SetLevelID(1);
468 coarseLevel.Set("A",AH_);
469 coarseLevel.Set("P",P11_);
470 coarseLevel.Set("Coordinates",CoordsH_);
471 if (!NullspaceH_.is_null())
472 coarseLevel.Set("Nullspace",NullspaceH_);
473 coarseLevel.Set("number of partitions", numProcsAH);
474 coarseLevel.Set("repartition: heuristic target rows per process", 1000);
475
476 coarseLevel.setlib(AH_->getDomainMap()->lib());
477 fineLevel.setlib(AH_->getDomainMap()->lib());
478 coarseLevel.setObjectLabel("RefMaxwell coarse (1,1)");
479 fineLevel.setObjectLabel("RefMaxwell coarse (1,1)");
480
481 std::string partName = precList11_.get<std::string>("repartition: partitioner", "zoltan2");
482 RCP<Factory> partitioner;
483 if (partName == "zoltan") {
484#ifdef HAVE_MUELU_ZOLTAN
485 partitioner = rcp(new ZoltanInterface());
486 // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
487 // partitioner->SetFactory("number of partitions", repartheurFactory);
488#else
489 throw Exceptions::RuntimeError("Zoltan interface is not available");
490#endif
491 } else if (partName == "zoltan2") {
492#ifdef HAVE_MUELU_ZOLTAN2
493 partitioner = rcp(new Zoltan2Interface());
494 ParameterList partParams;
495 RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList11_.sublist("repartition: params", false)));
496 partParams.set("ParameterList", partpartParams);
497 partitioner->SetParameterList(partParams);
498 // partitioner->SetFactory("number of partitions", repartheurFactory);
499#else
500 throw Exceptions::RuntimeError("Zoltan2 interface is not available");
501#endif
502 }
503
504 auto repartFactory = rcp(new RepartitionFactory());
505 ParameterList repartParams;
506 repartParams.set("repartition: print partition distribution", precList11_.get<bool>("repartition: print partition distribution", false));
507 repartParams.set("repartition: remap parts", precList11_.get<bool>("repartition: remap parts", true));
508 if (rebalanceStriding >= 1) {
509 bool acceptPart = (SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
510 if (SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsAH*rebalanceStriding)
511 acceptPart = false;
512 repartParams.set("repartition: remap accept partition", acceptPart);
513 }
514 repartFactory->SetParameterList(repartParams);
515 // repartFactory->SetFactory("number of partitions", repartheurFactory);
516 repartFactory->SetFactory("Partition", partitioner);
517
518 auto newP = rcp(new RebalanceTransferFactory());
519 ParameterList newPparams;
520 newPparams.set("type", "Interpolation");
521 newPparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
522 newPparams.set("repartition: use subcommunicators", true);
523 newPparams.set("repartition: rebalance Nullspace", !NullspaceH_.is_null());
524 newP->SetFactory("Coordinates", NoFactory::getRCP());
525 if (!NullspaceH_.is_null())
526 newP->SetFactory("Nullspace", NoFactory::getRCP());
527 newP->SetParameterList(newPparams);
528 newP->SetFactory("Importer", repartFactory);
529
530 auto newA = rcp(new RebalanceAcFactory());
531 ParameterList rebAcParams;
532 rebAcParams.set("repartition: use subcommunicators", true);
533 newA->SetParameterList(rebAcParams);
534 newA->SetFactory("Importer", repartFactory);
535
536 coarseLevel.Request("P", newP.get());
537 coarseLevel.Request("Importer", repartFactory.get());
538 coarseLevel.Request("A", newA.get());
539 coarseLevel.Request("Coordinates", newP.get());
540 if (!NullspaceH_.is_null())
541 coarseLevel.Request("Nullspace", newP.get());
542 repartFactory->Build(coarseLevel);
543
544 if (!precList11_.get<bool>("repartition: rebalance P and R", false))
545 ImporterH_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
546 P11_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
547 AH_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
548 CoordsH_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
549 if (!NullspaceH_.is_null())
550 NullspaceH_ = coarseLevel.Get< RCP<MultiVector> >("Nullspace", newP.get());
551
552 AH_AP_reuse_data_ = Teuchos::null;
553 AH_RAP_reuse_data_ = Teuchos::null;
554
556 // Rebalance the addon for next setup
557 RCP<const Import> ImporterH = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
558 RCP<const Map> targetMap = ImporterH->getTargetMap();
559 ParameterList XpetraList;
560 XpetraList.set("Restrict Communicator",true);
561 Addon_Matrix_ = MatrixFactory::Build(Addon_Matrix_, *ImporterH, *ImporterH, targetMap, targetMap, rcp(&XpetraList,false));
562 }
563 }
564 }
565#endif // HAVE_MPI
566
567 // This should be taken out again as soon as
568 // CoalesceDropFactory_kokkos supports BlockSize > 1 and
569 // drop tol != 0.0
570 if (useKokkos_ && precList11_.isParameter("aggregation: drop tol") && precList11_.get<double>("aggregation: drop tol") != 0.0) {
571 GetOStream(Warnings0) << "RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
572 << "support BlockSize > 1 and drop tol != 0.0" << std::endl;
573 precList11_.set("aggregation: drop tol", 0.0);
574 }
575 dump(*P11_, "P11.m");
576
577 if (!implicitTranspose_) {
579 dump(*R11_, "R11.m");
580 }
581 }
582
584 if (!AH_.is_null()) {
585 // set fixed block size for vector nodal matrix
586 size_t dim = Nullspace_->getNumVectors();
587 AH_->SetFixedBlockSize(dim);
588 AH_->setObjectLabel("RefMaxwell coarse (1,1)");
589 dump(*AH_, "AH.m");
590 int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
591 if (IsPrint(Statistics2)) {
592 RCP<ParameterList> params = rcp(new ParameterList());;
593 params->set("printLoadBalancingInfo", true);
594 params->set("printCommInfo", true);
596 }
597#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
598 if (precList11_.isType<std::string>("Preconditioner Type")) {
599 // build a Stratimikos preconditioner
600 if (precList11_.get<std::string>("Preconditioner Type") == "MueLu") {
601 ParameterList& userParamList = precList11_.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
602 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
603 }
604 thyraPrecOpH_ = rcp(new XpetraThyraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,Node>(AH_, rcp(&precList11_, false)));
605 } else
606#endif
607 {
608 // build a MueLu hierarchy
609
610 if (!reuse) {
611 dumpCoords(*CoordsH_, "coordsH.m");
612 if (!NullspaceH_.is_null())
613 dump(*NullspaceH_, "NullspaceH.m");
614 ParameterList& userParamList = precList11_.sublist("user data");
615 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
616 if (!NullspaceH_.is_null())
617 userParamList.set<RCP<MultiVector> >("Nullspace", NullspaceH_);
619 } else {
620 RCP<MueLu::Level> level0 = HierarchyH_->GetLevel(0);
621 level0->Set("A", AH_);
622 HierarchyH_->SetupRe();
623 }
624 }
625 SetProcRankVerbose(oldRank);
626 }
628
629 }
630
631 if(!reuse && applyBCsTo22_) {
632 GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC nodes of D0" << std::endl;
633
634 D0_Matrix_->resumeFill();
635 Scalar replaceWith;
636 if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
637 replaceWith= Teuchos::ScalarTraits<SC>::eps();
638 else
639 replaceWith = Teuchos::ScalarTraits<SC>::zero();
641 D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
642 }
643
645 // Build A22 = D0* SM D0 and hierarchy for A22
646 if (!allNodesBoundary_) {
647 GetOStream(Runtime0) << "RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
648
649 if (!reuse) { // build fine grid operator for (2,2)-block, D0* SM D0 (aka TMT)
650 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
651
652 Level fineLevel, coarseLevel;
653 fineLevel.SetFactoryManager(null);
654 coarseLevel.SetFactoryManager(null);
655 coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
656 fineLevel.SetLevelID(0);
657 coarseLevel.SetLevelID(1);
658 fineLevel.Set("A",SM_Matrix_);
659 coarseLevel.Set("P",D0_Matrix_);
660 coarseLevel.Set("Coordinates",Coords_);
661
662 coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
663 fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
664 coarseLevel.setObjectLabel("RefMaxwell (2,2)");
665 fineLevel.setObjectLabel("RefMaxwell (2,2)");
666
667 RCP<RAPFactory> rapFact = rcp(new RAPFactory());
668 ParameterList rapList = *(rapFact->GetValidParameterList());
669 rapList.set("transpose: use implicit", true);
670 rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
671 rapList.set("rap: fix zero diagonals threshold", parameterList_.get<double>("rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
672 rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
673 rapFact->SetParameterList(rapList);
674
675 if (!A22_AP_reuse_data_.is_null()) {
676 coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
677 coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", A22_AP_reuse_data_, rapFact.get());
678 }
679 if (!A22_RAP_reuse_data_.is_null()) {
680 coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
681 coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", A22_RAP_reuse_data_, rapFact.get());
682 }
683
684#ifdef HAVE_MPI
685 if (doRebalancing) {
686
687 coarseLevel.Set("number of partitions", numProcsA22);
688 coarseLevel.Set("repartition: heuristic target rows per process", 1000);
689
690 std::string partName = precList22_.get<std::string>("repartition: partitioner", "zoltan2");
691 RCP<Factory> partitioner;
692 if (partName == "zoltan") {
693#ifdef HAVE_MUELU_ZOLTAN
694 partitioner = rcp(new ZoltanInterface());
695 partitioner->SetFactory("A", rapFact);
696 // partitioner->SetFactory("number of partitions", repartheurFactory);
697 // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
698#else
699 throw Exceptions::RuntimeError("Zoltan interface is not available");
700#endif
701 } else if (partName == "zoltan2") {
702#ifdef HAVE_MUELU_ZOLTAN2
703 partitioner = rcp(new Zoltan2Interface());
704 ParameterList partParams;
705 RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList22_.sublist("repartition: params", false)));
706 partParams.set("ParameterList", partpartParams);
707 partitioner->SetParameterList(partParams);
708 partitioner->SetFactory("A", rapFact);
709 // partitioner->SetFactory("number of partitions", repartheurFactory);
710#else
711 throw Exceptions::RuntimeError("Zoltan2 interface is not available");
712#endif
713 }
714
715 auto repartFactory = rcp(new RepartitionFactory());
716 ParameterList repartParams;
717 repartParams.set("repartition: print partition distribution", precList22_.get<bool>("repartition: print partition distribution", false));
718 repartParams.set("repartition: remap parts", precList22_.get<bool>("repartition: remap parts", true));
719 if (rebalanceStriding >= 1) {
720 bool acceptPart = ((SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
721 if (SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22*rebalanceStriding)
722 acceptPart = false;
723 if (acceptPart)
724 TEUCHOS_ASSERT(AH_.is_null());
725 repartParams.set("repartition: remap accept partition", acceptPart);
726 } else
727 repartParams.set("repartition: remap accept partition", AH_.is_null());
728 repartFactory->SetParameterList(repartParams);
729 repartFactory->SetFactory("A", rapFact);
730 // repartFactory->SetFactory("number of partitions", repartheurFactory);
731 repartFactory->SetFactory("Partition", partitioner);
732
733 auto newP = rcp(new RebalanceTransferFactory());
734 ParameterList newPparams;
735 newPparams.set("type", "Interpolation");
736 newPparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
737 newPparams.set("repartition: use subcommunicators", true);
738 newPparams.set("repartition: rebalance Nullspace", false);
739 newP->SetFactory("Coordinates", NoFactory::getRCP());
740 newP->SetParameterList(newPparams);
741 newP->SetFactory("Importer", repartFactory);
742
743 auto newA = rcp(new RebalanceAcFactory());
744 ParameterList rebAcParams;
745 rebAcParams.set("repartition: use subcommunicators", true);
746 newA->SetParameterList(rebAcParams);
747 newA->SetFactory("A", rapFact);
748 newA->SetFactory("Importer", repartFactory);
749
750 coarseLevel.Request("P", newP.get());
751 coarseLevel.Request("Importer", repartFactory.get());
752 coarseLevel.Request("A", newA.get());
753 coarseLevel.Request("Coordinates", newP.get());
754 rapFact->Build(fineLevel,coarseLevel);
755 repartFactory->Build(coarseLevel);
756
757 if (!precList22_.get<bool>("repartition: rebalance P and R", false))
758 Importer22_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
759 D0_Matrix_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
760 A22_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
761 Coords_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
762
763 } else
764#endif // HAVE_MPI
765 {
766 coarseLevel.Request("A", rapFact.get());
767 if (enable_reuse_) {
768 coarseLevel.Request("AP reuse data", rapFact.get());
769 coarseLevel.Request("RAP reuse data", rapFact.get());
770 }
771
772 A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
773
774 if (enable_reuse_) {
775 if (coarseLevel.IsAvailable("AP reuse data", rapFact.get()))
776 A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
777 if (coarseLevel.IsAvailable("RAP reuse data", rapFact.get()))
778 A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
779 }
780 }
781 } else {
782 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
783 if (Importer22_.is_null()) {
784 RCP<Matrix> temp;
785 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*D0_Matrix_,false,temp,GetOStream(Runtime0),true,true);
787 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,false,*temp,false,A22_,GetOStream(Runtime0),true,true);
788 else
789 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*temp,false,A22_,GetOStream(Runtime0),true,true);
790 } else {
791 // we replaced domain map and importer on D0, reverse that
792 RCP<const Import> D0importer = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
793 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(D0origDomainMap_, D0origImporter_);
794
795 RCP<Matrix> temp, temp2;
796 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*D0_Matrix_,false,temp,GetOStream(Runtime0),true,true);
798 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,false,*temp,false,temp2,GetOStream(Runtime0),true,true);
799 else
800 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*temp,false,temp2,GetOStream(Runtime0),true,true);
801
802 // and back again
803 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), D0importer);
804
805 ParameterList XpetraList;
806 XpetraList.set("Restrict Communicator",true);
807 XpetraList.set("Timer Label","MueLu::RebalanceA22");
808 RCP<const Map> targetMap = Importer22_->getTargetMap();
809 A22_ = MatrixFactory::Build(temp2, *Importer22_, *Importer22_, targetMap, targetMap, rcp(&XpetraList,false));
810 }
811 }
812
813 if (!implicitTranspose_ && !reuse) {
815 }
816
818 if (!A22_.is_null()) {
819 dump(*A22_, "A22.m");
820 A22_->setObjectLabel("RefMaxwell (2,2)");
821 int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
822 if (IsPrint(Statistics2)) {
823 RCP<ParameterList> params = rcp(new ParameterList());;
824 params->set("printLoadBalancingInfo", true);
825 params->set("printCommInfo", true);
827 }
828#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
829 if (precList22_.isType<std::string>("Preconditioner Type")) {
830 // build a Stratimikos preconditioner
831 if (precList22_.get<std::string>("Preconditioner Type") == "MueLu") {
832 ParameterList& userParamList = precList22_.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
833 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
834 }
835 thyraPrecOp22_ = rcp(new XpetraThyraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A22_, rcp(&precList22_, false)));
836 } else
837#endif
838 {
839 // build a MueLu hierarchy
840 if (!reuse) {
841 ParameterList& userParamList = precList22_.sublist("user data");
842 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
843 // If we detected no boundary conditions, the (2,2) problem is singular.
844 // Therefore, if we want to use a direct coarse solver, we need to fix up the nullspace.
845 std::string coarseType = "";
846 if (precList22_.isParameter("coarse: type")) {
847 coarseType = precList22_.get<std::string>("coarse: type");
848 // Transform string to "Abcde" notation
849 std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
850 std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
851 }
852 if (BCedges_ == 0 &&
853 (coarseType == "" ||
854 coarseType == "Klu" ||
855 coarseType == "Klu2") &&
856 (!precList22_.isSublist("coarse: params") ||
857 !precList22_.sublist("coarse: params").isParameter("fix nullspace")))
858 precList22_.sublist("coarse: params").set("fix nullspace",true);
860 } else {
861 RCP<MueLu::Level> level0 = Hierarchy22_->GetLevel(0);
862 level0->Set("A", A22_);
863 Hierarchy22_->SetupRe();
864 }
865 }
866 SetProcRankVerbose(oldRank);
867 }
869
870 }
871
872 if(!reuse && !allNodesBoundary_ && applyBCsTo22_) {
873 GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
874
875 D0_Matrix_->resumeFill();
876 Scalar replaceWith;
877 if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
878 replaceWith= Teuchos::ScalarTraits<SC>::eps();
879 else
880 replaceWith = Teuchos::ScalarTraits<SC>::zero();
882 D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
883 dump(*D0_Matrix_, "D0_nuked.m");
884 }
885
887
888 if (!reuse) {
889 if (!ImporterH_.is_null()) {
890 RCP<const Import> ImporterP11 = ImportFactory::Build(ImporterH_->getTargetMap(),P11_->getColMap());
891 rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix()->replaceDomainMapAndImporter(ImporterH_->getTargetMap(), ImporterP11);
892 }
893
894 if (!Importer22_.is_null()) {
895 if (enable_reuse_) {
896 D0origDomainMap_ = D0_Matrix_->getDomainMap();
897 D0origImporter_ = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
898 }
899 RCP<const Import> ImporterD0 = ImportFactory::Build(Importer22_->getTargetMap(),D0_Matrix_->getColMap());
900 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), ImporterD0);
901 }
902
903 if ((!D0_T_Matrix_.is_null()) &&
904 (!R11_.is_null()) &&
905 (!rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
906 (!rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
907 (D0_T_Matrix_->getColMap()->lib() == Xpetra::UseTpetra) &&
908 (R11_->getColMap()->lib() == Xpetra::UseTpetra))
909 D0_T_R11_colMapsMatch_ = D0_T_Matrix_->getColMap()->isSameAs(*R11_->getColMap());
910 else
913 GetOStream(Runtime0) << "RefMaxwell::compute(): D0_T and R11 have matching colMaps" << std::endl;
914
915 // Allocate temporary MultiVectors for solve
917
918 if (parameterList_.isSublist("matvec params"))
919 {
920 RCP<ParameterList> matvecParams = rcpFromRef(parameterList_.sublist("matvec params"));
925 if (!R11_.is_null()) Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*R11_, matvecParams);
926 if (!ImporterH_.is_null()) ImporterH_->setDistributorParameters(matvecParams);
927 if (!Importer22_.is_null()) Importer22_->setDistributorParameters(matvecParams);
928 }
929 if (!ImporterH_.is_null() && parameterList_.isSublist("refmaxwell: ImporterH params")){
930 RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: ImporterH params"));
931 ImporterH_->setDistributorParameters(importerParams);
932 }
933 if (!Importer22_.is_null() && parameterList_.isSublist("refmaxwell: Importer22 params")){
934 RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: Importer22 params"));
935 Importer22_->setDistributorParameters(importerParams);
936 }
937 }
938
940
941#ifdef HAVE_MUELU_CUDA
942 if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStop();
943#endif
944 }
945
946
947 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
949
950 Level level;
951 RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
952 level.SetFactoryManager(factoryHandler);
953 level.SetLevelID(0);
954 level.setObjectLabel("RefMaxwell (1,1)");
955 level.Set("A",SM_Matrix_);
956 level.setlib(SM_Matrix_->getDomainMap()->lib());
957 // For Hiptmair
958 level.Set("NodeMatrix", A22_);
959 level.Set("D0", D0_Matrix_);
960
961 if (parameterList_.isType<std::string>("smoother: pre type") && parameterList_.isType<std::string>("smoother: post type")) {
962 std::string preSmootherType = parameterList_.get<std::string>("smoother: pre type");
963 std::string postSmootherType = parameterList_.get<std::string>("smoother: post type");
964
965 ParameterList preSmootherList, postSmootherList;
966 if (parameterList_.isSublist("smoother: pre params"))
967 preSmootherList = parameterList_.sublist("smoother: pre params");
968 if (parameterList_.isSublist("smoother: post params"))
969 postSmootherList = parameterList_.sublist("smoother: post params");
970
971 RCP<SmootherPrototype> preSmootherPrototype = rcp(new TrilinosSmoother(preSmootherType, preSmootherList));
972 RCP<SmootherPrototype> postSmootherPrototype = rcp(new TrilinosSmoother(postSmootherType, postSmootherList));
973 RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(preSmootherPrototype, postSmootherPrototype));
974
975 level.Request("PreSmoother",smootherFact.get());
976 level.Request("PostSmoother",smootherFact.get());
977 if (enable_reuse_) {
978 ParameterList smootherFactoryParams;
979 smootherFactoryParams.set("keep smoother data", true);
980 smootherFact->SetParameterList(smootherFactoryParams);
981 level.Request("PreSmoother data", smootherFact.get());
982 level.Request("PostSmoother data", smootherFact.get());
983 if (!PreSmootherData_.is_null())
984 level.Set("PreSmoother data", PreSmootherData_, smootherFact.get());
985 if (!PostSmootherData_.is_null())
986 level.Set("PostSmoother data", PostSmootherData_, smootherFact.get());
987 }
988 smootherFact->Build(level);
989 PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",smootherFact.get());
990 PostSmoother_ = level.Get<RCP<SmootherBase> >("PostSmoother",smootherFact.get());
991 if (enable_reuse_) {
992 PreSmootherData_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data",smootherFact.get());
993 PostSmootherData_ = level.Get<RCP<SmootherPrototype> >("PostSmoother data",smootherFact.get());
994 }
995 } else {
996 std::string smootherType = parameterList_.get<std::string>("smoother: type", "CHEBYSHEV");
997
998 RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(smootherType, smootherList_));
999 RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(smootherPrototype));
1000 level.Request("PreSmoother",smootherFact.get());
1001 if (enable_reuse_) {
1002 ParameterList smootherFactoryParams;
1003 smootherFactoryParams.set("keep smoother data", true);
1004 smootherFact->SetParameterList(smootherFactoryParams);
1005 level.Request("PreSmoother data", smootherFact.get());
1006 if (!PreSmootherData_.is_null())
1007 level.Set("PreSmoother data", PreSmootherData_, smootherFact.get());
1008 }
1009 smootherFact->Build(level);
1010 PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",smootherFact.get());
1012 if (enable_reuse_)
1013 PreSmootherData_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data",smootherFact.get());
1014 }
1015
1016 }
1017
1018
1019 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1021
1022 RCP<Teuchos::TimeMonitor> tmAlloc = getTimer("MueLu RefMaxwell: Allocate MVs");
1023
1024 if (!R11_.is_null())
1025 P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), numVectors);
1026 else
1027 P11res_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1028 P11res_->setObjectLabel("P11res");
1030 D0TR11Tmp_ = MultiVectorFactory::Build(R11_->getColMap(), numVectors);
1031 D0TR11Tmp_->setObjectLabel("D0TR11Tmp");
1032 }
1033 if (!ImporterH_.is_null()) {
1034 P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1035 P11resTmp_->setObjectLabel("P11resTmp");
1036 P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1037 } else
1038 P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1039 P11x_->setObjectLabel("P11x");
1040 if (!D0_T_Matrix_.is_null())
1041 D0res_ = MultiVectorFactory::Build(D0_T_Matrix_->getRangeMap(), numVectors);
1042 else
1043 D0res_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1044 D0res_->setObjectLabel("D0res");
1045 if (!Importer22_.is_null()) {
1046 D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1047 D0resTmp_->setObjectLabel("D0resTmp");
1048 D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1049 } else
1050 D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1051 D0x_->setObjectLabel("D0x");
1052 if (!AH_.is_null()) {
1053 if (!ImporterH_.is_null() && !implicitTranspose_)
1054 P11resSubComm_ = MultiVectorFactory::Build(P11resTmp_, Teuchos::View);
1055 else
1056 P11resSubComm_ = MultiVectorFactory::Build(P11res_, Teuchos::View);
1057 P11resSubComm_->replaceMap(AH_->getRangeMap());
1058 P11resSubComm_->setObjectLabel("P11resSubComm");
1059
1060 P11xSubComm_ = MultiVectorFactory::Build(P11x_, Teuchos::View);
1061 P11xSubComm_->replaceMap(AH_->getDomainMap());
1062 P11xSubComm_->setObjectLabel("P11xSubComm");
1063 }
1064 if (!A22_.is_null()) {
1065 if (!Importer22_.is_null() && !implicitTranspose_)
1066 D0resSubComm_ = MultiVectorFactory::Build(D0resTmp_, Teuchos::View);
1067 else
1068 D0resSubComm_ = MultiVectorFactory::Build(D0res_, Teuchos::View);
1069 D0resSubComm_->replaceMap(A22_->getRangeMap());
1070 D0resSubComm_->setObjectLabel("D0resSubComm");
1071
1072 D0xSubComm_ = MultiVectorFactory::Build(D0x_, Teuchos::View);
1073 D0xSubComm_->replaceMap(A22_->getDomainMap());
1074 D0xSubComm_->setObjectLabel("D0xSubComm");
1075 }
1076 residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), numVectors);
1077 residual_->setObjectLabel("residual");
1078 }
1079
1080
1081 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1082 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Matrix& A, std::string name) const {
1083 if (dump_matrices_) {
1084 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1085 Xpetra::IO<SC, LO, GO, NO>::Write(name, A);
1086 }
1087 }
1088
1089
1090 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1091 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const MultiVector& X, std::string name) const {
1092 if (dump_matrices_) {
1093 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1094 Xpetra::IO<SC, LO, GO, NO>::Write(name, X);
1095 }
1096 }
1097
1098
1099 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1101 if (dump_matrices_) {
1102 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1103 Xpetra::IO<coordinateType, LO, GO, NO>::Write(name, X);
1104 }
1105 }
1106
1107
1108 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1109 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Teuchos::ArrayRCP<bool>& v, std::string name) const {
1110 if (dump_matrices_) {
1111 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1112 std::ofstream out(name);
1113 for (size_t i = 0; i < Teuchos::as<size_t>(v.size()); i++)
1114 out << v[i] << "\n";
1115 }
1116 }
1117
1118 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1119 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Kokkos::View<bool*, typename Node::device_type>& v, std::string name) const {
1120 if (dump_matrices_) {
1121 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1122 std::ofstream out(name);
1123 auto vH = Kokkos::create_mirror_view (v);
1124 Kokkos::deep_copy(vH , v);
1125 for (size_t i = 0; i < vH.size(); i++)
1126 out << vH[i] << "\n";
1127 }
1128 }
1129
1130 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1131 Teuchos::RCP<Teuchos::TimeMonitor> RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getTimer(std::string name, RCP<const Teuchos::Comm<int> > comm) const {
1132 if (IsPrint(Timings)) {
1133 if (!syncTimers_)
1134 return Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name)));
1135 else {
1136 if (comm.is_null())
1137 return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), SM_Matrix_->getRowMap()->getComm().ptr()));
1138 else
1139 return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), comm.ptr()));
1140 }
1141 } else
1142 return Teuchos::null;
1143 }
1144
1145
1146 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1148 // The P11 matrix maps node based aggregrates { A_j } to edges { e_i }.
1149 //
1150 // The old implementation used
1151 // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i) * P(n_l, A_j)
1152 // yet the paper gives
1153 // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i)
1154 // where phi_k is the k-th nullspace vector.
1155 //
1156 // The graph of D0 contains the incidence from nodes to edges.
1157 // The nodal prolongator P maps aggregates to nodes.
1158
1159 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1160 const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1161 const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1162 size_t dim = Nullspace_->getNumVectors();
1163 size_t numLocalRows = SM_Matrix_->getLocalNumRows();
1164
1165 RCP<Matrix> P_nodal;
1166 RCP<CrsMatrix> P_nodal_imported;
1167 RCP<MultiVector> Nullspace_nodal;
1168 if (skipFirstLevel_) {
1169 // build prolongator: algorithm 1 in the reference paper
1170 // First, build nodal unsmoothed prolongator using the matrix A_nodal
1171 bool read_P_from_file = parameterList_.get("refmaxwell: read_P_from_file",false);
1172 if (read_P_from_file) {
1173 // This permits to read in an ML prolongator, so that we get the same hierarchy.
1174 // (ML and MueLu typically produce different aggregates.)
1175 std::string P_filename = parameterList_.get("refmaxwell: P_filename",std::string("P.m"));
1176 std::string domainmap_filename = parameterList_.get("refmaxwell: P_domainmap_filename",std::string("domainmap_P.m"));
1177 std::string colmap_filename = parameterList_.get("refmaxwell: P_colmap_filename",std::string("colmap_P.m"));
1178 std::string coords_filename = parameterList_.get("refmaxwell: CoordsH",std::string("coordsH.m"));
1179 RCP<const Map> colmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(colmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1180 RCP<const Map> domainmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(domainmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1181 P_nodal = Xpetra::IO<SC, LO, GO, NO>::Read(P_filename, A_nodal_Matrix_->getDomainMap(), colmap, domainmap, A_nodal_Matrix_->getDomainMap());
1182 CoordsH_ = Xpetra::IO<typename RealValuedMultiVector::scalar_type, LO, GO, NO>::ReadMultiVector(coords_filename, domainmap);
1183 } else {
1184 Level fineLevel, coarseLevel;
1185 fineLevel.SetFactoryManager(null);
1186 coarseLevel.SetFactoryManager(null);
1187 coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1188 fineLevel.SetLevelID(0);
1189 coarseLevel.SetLevelID(1);
1190 fineLevel.Set("A",A_nodal_Matrix_);
1191 fineLevel.Set("Coordinates",Coords_);
1192 fineLevel.Set("DofsPerNode",1);
1193 coarseLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1194 fineLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1195 coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1196 fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1197
1198 LocalOrdinal NSdim = 1;
1199 RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
1200 nullSpace->putScalar(SC_ONE);
1201 fineLevel.Set("Nullspace",nullSpace);
1202
1203 RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1204 if (useKokkos_) {
1205 amalgFact = rcp(new AmalgamationFactory());
1206 dropFact = rcp(new CoalesceDropFactory_kokkos());
1207 UncoupledAggFact = rcp(new UncoupledAggregationFactory_kokkos());
1208 coarseMapFact = rcp(new CoarseMapFactory());
1209 TentativePFact = rcp(new TentativePFactory_kokkos());
1210 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1211 SaPFact = rcp(new SaPFactory_kokkos());
1212 Tfact = rcp(new CoordinatesTransferFactory());
1213 } else
1214 {
1215 amalgFact = rcp(new AmalgamationFactory());
1216 dropFact = rcp(new CoalesceDropFactory());
1217 UncoupledAggFact = rcp(new UncoupledAggregationFactory());
1218 coarseMapFact = rcp(new CoarseMapFactory());
1219 TentativePFact = rcp(new TentativePFactory());
1220 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1221 SaPFact = rcp(new SaPFactory());
1222 Tfact = rcp(new CoordinatesTransferFactory());
1223 }
1224 dropFact->SetFactory("UnAmalgamationInfo", amalgFact);
1225 double dropTol = parameterList_.get("aggregation: drop tol",0.0);
1226 std::string dropScheme = parameterList_.get("aggregation: drop scheme","classical");
1227 std::string distLaplAlgo = parameterList_.get("aggregation: distance laplacian algo","default");
1228 dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1229 dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme));
1230 if (!useKokkos_)
1231 dropFact->SetParameter("aggregation: distance laplacian algo",Teuchos::ParameterEntry(distLaplAlgo));
1232
1233 UncoupledAggFact->SetFactory("Graph", dropFact);
1234 int minAggSize = parameterList_.get("aggregation: min agg size",2);
1235 UncoupledAggFact->SetParameter("aggregation: min agg size",Teuchos::ParameterEntry(minAggSize));
1236 int maxAggSize = parameterList_.get("aggregation: max agg size",-1);
1237 UncoupledAggFact->SetParameter("aggregation: max agg size",Teuchos::ParameterEntry(maxAggSize));
1238 bool matchMLbehavior = parameterList_.get("aggregation: match ML phase2a",MasterList::getDefault<bool>("aggregation: match ML phase2a"));
1239 UncoupledAggFact->SetParameter("aggregation: match ML phase2a",Teuchos::ParameterEntry(matchMLbehavior));
1240 bool avoidSingletons = parameterList_.get("aggregation: phase3 avoid singletons",true);
1241 UncoupledAggFact->SetParameter("aggregation: phase3 avoid singletons",Teuchos::ParameterEntry(avoidSingletons));
1242
1243 coarseMapFact->SetFactory("Aggregates", UncoupledAggFact);
1244
1245 TentativePFact->SetFactory("Aggregates", UncoupledAggFact);
1246 TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact);
1247 TentativePFact->SetFactory("CoarseMap", coarseMapFact);
1248
1249 Tfact->SetFactory("Aggregates", UncoupledAggFact);
1250 Tfact->SetFactory("CoarseMap", coarseMapFact);
1251
1252 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa") {
1253 SaPFact->SetFactory("P", TentativePFact);
1254 coarseLevel.Request("P", SaPFact.get());
1255 } else
1256 coarseLevel.Request("P",TentativePFact.get());
1257 coarseLevel.Request("Nullspace",TentativePFact.get());
1258 coarseLevel.Request("Coordinates",Tfact.get());
1259
1260 RCP<AggregationExportFactory> aggExport;
1261 if (parameterList_.get("aggregation: export visualization data",false)) {
1262 aggExport = rcp(new AggregationExportFactory());
1263 ParameterList aggExportParams;
1264 aggExportParams.set("aggregation: output filename", "aggs.vtk");
1265 aggExportParams.set("aggregation: output file: agg style", "Jacks");
1266 aggExport->SetParameterList(aggExportParams);
1267
1268 aggExport->SetFactory("Aggregates", UncoupledAggFact);
1269 aggExport->SetFactory("UnAmalgamationInfo", amalgFact);
1270 fineLevel.Request("Aggregates",UncoupledAggFact.get());
1271 fineLevel.Request("UnAmalgamationInfo",amalgFact.get());
1272 }
1273
1274 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1275 coarseLevel.Get("P",P_nodal,SaPFact.get());
1276 else
1277 coarseLevel.Get("P",P_nodal,TentativePFact.get());
1278 coarseLevel.Get("Nullspace",Nullspace_nodal,TentativePFact.get());
1279 coarseLevel.Get("Coordinates",CoordsH_,Tfact.get());
1280
1281
1282 if (parameterList_.get("aggregation: export visualization data",false))
1283 aggExport->Build(fineLevel, coarseLevel);
1284 }
1285 dump(*P_nodal, "P_nodal.m");
1286 dump(*Nullspace_nodal, "Nullspace_nodal.m");
1287
1288
1289 RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
1290
1291 // Import off-rank rows of P_nodal into P_nodal_imported
1292 int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1293 if (numProcs > 1) {
1294 RCP<CrsMatrixWrap> P_nodal_temp;
1295 RCP<const Map> targetMap = D0Crs->getColMap();
1296 P_nodal_temp = rcp(new CrsMatrixWrap(targetMap));
1297 RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1298 P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1299 P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
1300 rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
1301 P_nodal_imported = P_nodal_temp->getCrsMatrix();
1302 dump(*P_nodal_temp, "P_nodal_imported.m");
1303 } else
1304 P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1305
1306 }
1307
1308 if (useKokkos_) {
1309
1310 using ATS = Kokkos::ArithTraits<SC>;
1311 using impl_Scalar = typename ATS::val_type;
1312 using impl_ATS = Kokkos::ArithTraits<impl_Scalar>;
1313 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1314
1315 typedef typename Matrix::local_matrix_type KCRS;
1316 typedef typename KCRS::StaticCrsGraphType graph_t;
1317 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1318 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1319 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1320
1321 const impl_Scalar impl_SC_ZERO = impl_ATS::zero();
1322 const impl_Scalar impl_SC_ONE = impl_ATS::one();
1323 const impl_Scalar impl_half = impl_SC_ONE / (impl_SC_ONE + impl_SC_ONE);
1324
1325
1326 // Which algorithm should we use for the construction of the special prolongator?
1327 // Option "mat-mat":
1328 // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1329 std::string defaultAlgo = "mat-mat";
1330 std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1331
1332 if (skipFirstLevel_) {
1333 // Get data out of P_nodal_imported and D0.
1334
1335 if (algo == "mat-mat") {
1336 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1337 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1338
1339#ifdef HAVE_MUELU_DEBUG
1340 TEUCHOS_ASSERT(D0_P_nodal->getColMap()->isSameAs(*P_nodal_imported->getColMap()));
1341#endif
1342
1343 // Get data out of D0*P.
1344 auto localD0P = D0_P_nodal->getLocalMatrixDevice();
1345
1346 // Create the matrix object
1347 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1348 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1349
1350 size_t nnzEstimate = dim*localD0P.graph.entries.size();
1351 lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1352 lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1353 scalar_view_t P11vals("P11_vals",nnzEstimate);
1354
1355 // adjust rowpointer
1356 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1357 KOKKOS_LAMBDA(const size_t i) {
1358 P11rowptr(i) = dim*localD0P.graph.row_map(i);
1359 });
1360
1361 // adjust column indices
1362 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1363 KOKKOS_LAMBDA(const size_t jj) {
1364 for (size_t k = 0; k < dim; k++) {
1365 P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1366 P11vals(dim*jj+k) = impl_SC_ZERO;
1367 }
1368 });
1369
1370 auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1371
1372 // enter values
1373 if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1374 // The matrix D0 has too many entries per row.
1375 // Therefore we need to check whether its entries are actually non-zero.
1376 // This is the case for the matrices built by MiniEM.
1377 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1378
1379 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1380
1381 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1382 auto localP = P_nodal_imported->getLocalMatrixDevice();
1383 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1384 KOKKOS_LAMBDA(const size_t i) {
1385 for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1386 LO l = localD0.graph.entries(ll);
1387 impl_Scalar p = localD0.values(ll);
1388 if (impl_ATS::magnitude(p) < tol)
1389 continue;
1390 for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1391 LO j = localP.graph.entries(jj);
1392 impl_Scalar v = localP.values(jj);
1393 for (size_t k = 0; k < dim; k++) {
1394 LO jNew = dim*j+k;
1395 impl_Scalar n = localNullspace(i,k);
1396 size_t m;
1397 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1398 if (P11colind(m) == jNew)
1399 break;
1400#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP) && !defined(HAVE_MUELU_SYCL)
1401 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1402#endif
1403 P11vals(m) += impl_half * v * n;
1404 }
1405 }
1406 }
1407 });
1408
1409 } else {
1410 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1411 auto localP = P_nodal_imported->getLocalMatrixDevice();
1412 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1413 KOKKOS_LAMBDA(const size_t i) {
1414 for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1415 LO l = localD0.graph.entries(ll);
1416 for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1417 LO j = localP.graph.entries(jj);
1418 impl_Scalar v = localP.values(jj);
1419 for (size_t k = 0; k < dim; k++) {
1420 LO jNew = dim*j+k;
1421 impl_Scalar n = localNullspace(i,k);
1422 size_t m;
1423 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1424 if (P11colind(m) == jNew)
1425 break;
1426#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP) && !defined(HAVE_MUELU_SYCL)
1427 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1428#endif
1429 P11vals(m) += impl_half * v * n;
1430 }
1431 }
1432 }
1433 });
1434 }
1435
1436 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1437 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1438 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1439 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1440
1441 } else
1442 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1443
1444 NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1445
1446 auto localNullspace_nodal = Nullspace_nodal->getDeviceLocalView(Xpetra::Access::ReadOnly);
1447 auto localNullspaceH = NullspaceH_->getDeviceLocalView(Xpetra::Access::ReadWrite);
1448 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_nullspace", range_type(0,Nullspace_nodal->getLocalLength()),
1449 KOKKOS_LAMBDA(const size_t i) {
1450 impl_Scalar val = localNullspace_nodal(i,0);
1451 for (size_t j = 0; j < dim; j++)
1452 localNullspaceH(dim*i+j, j) = val;
1453 });
1454
1455 } else { // !skipFirstLevel_
1456 // Get data out of P_nodal_imported and D0.
1457 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1458
1459 CoordsH_ = Coords_;
1460
1461 if (algo == "mat-mat") {
1462
1463 // Create the matrix object
1464 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1465 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1466
1467 size_t nnzEstimate = dim*localD0.graph.entries.size();
1468 lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1469 lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1470 scalar_view_t P11vals("P11_vals",nnzEstimate);
1471
1472 // adjust rowpointer
1473 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1474 KOKKOS_LAMBDA(const size_t i) {
1475 P11rowptr(i) = dim*localD0.graph.row_map(i);
1476 });
1477
1478 // adjust column indices
1479 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0.graph.entries.size()),
1480 KOKKOS_LAMBDA(const size_t jj) {
1481 for (size_t k = 0; k < dim; k++) {
1482 P11colind(dim*jj+k) = dim*localD0.graph.entries(jj)+k;
1483 P11vals(dim*jj+k) = impl_SC_ZERO;
1484 }
1485 });
1486
1487 auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1488
1489 // enter values
1490 if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1491 // The matrix D0 has too many entries per row.
1492 // Therefore we need to check whether its entries are actually non-zero.
1493 // This is the case for the matrices built by MiniEM.
1494 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1495
1496 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1497
1498 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1499 KOKKOS_LAMBDA(const size_t i) {
1500 for (size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1501 LO j = localD0.graph.entries(jj);
1502 impl_Scalar p = localD0.values(jj);
1503 if (impl_ATS::magnitude(p) < tol)
1504 continue;
1505 for (size_t k = 0; k < dim; k++) {
1506 LO jNew = dim*j+k;
1507 impl_Scalar n = localNullspace(i,k);
1508 size_t m;
1509 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1510 if (P11colind(m) == jNew)
1511 break;
1512#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP) && !defined(HAVE_MUELU_SYCL)
1513 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1514#endif
1515 P11vals(m) += impl_half * n;
1516 }
1517 }
1518 });
1519
1520 } else {
1521 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1522 KOKKOS_LAMBDA(const size_t i) {
1523 for (size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1524 LO j = localD0.graph.entries(jj);
1525 for (size_t k = 0; k < dim; k++) {
1526 LO jNew = dim*j+k;
1527 impl_Scalar n = localNullspace(i,k);
1528 size_t m;
1529 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1530 if (P11colind(m) == jNew)
1531 break;
1532#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP) && !defined(HAVE_MUELU_SYCL)
1533 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1534#endif
1535 P11vals(m) += impl_half * n;
1536 }
1537 }
1538 });
1539 }
1540
1541 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1542 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1543 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1544 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1545 } else
1546 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1547
1548 }
1549 } else
1550 {
1551 // get nullspace vectors
1552 ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1553 ArrayRCP<ArrayView<const SC> > nullspace(dim);
1554 for(size_t i=0; i<dim; i++) {
1555 nullspaceRCP[i] = Nullspace_->getData(i);
1556 nullspace[i] = nullspaceRCP[i]();
1557 }
1558
1559 // Get data out of P_nodal_imported and D0.
1560 ArrayRCP<size_t> P11rowptr_RCP;
1561 ArrayRCP<LO> P11colind_RCP;
1562 ArrayRCP<SC> P11vals_RCP;
1563
1564
1565 // Which algorithm should we use for the construction of the special prolongator?
1566 // Option "mat-mat":
1567 // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1568 // Option "gustavson":
1569 // Loop over D0, P and nullspace and allocate directly. (Gustavson-like)
1570 // More efficient, but only available for serial node.
1571 std::string defaultAlgo = "mat-mat";
1572 std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1573
1574 if (skipFirstLevel_) {
1575
1576 if (algo == "mat-mat") {
1577 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1578 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1579
1580
1581 ArrayRCP<const size_t> D0rowptr_RCP;
1582 ArrayRCP<const LO> D0colind_RCP;
1583 ArrayRCP<const SC> D0vals_RCP;
1584 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1585 // For efficiency
1586 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1587 // slower than Teuchos::ArrayView::operator[].
1588 ArrayView<const size_t> D0rowptr;
1589 ArrayView<const LO> D0colind;
1590 ArrayView<const SC> D0vals;
1591 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1592
1593 // Get data out of P_nodal_imported and D0.
1594 ArrayRCP<const size_t> Prowptr_RCP;
1595 ArrayRCP<const LO> Pcolind_RCP;
1596 ArrayRCP<const SC> Pvals_RCP;
1597 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1598 ArrayView<const size_t> Prowptr;
1599 ArrayView<const LO> Pcolind;
1600 ArrayView<const SC> Pvals;
1601 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1602
1603 // Get data out of D0*P.
1604 ArrayRCP<const size_t> D0Prowptr_RCP;
1605 ArrayRCP<const LO> D0Pcolind_RCP;
1606 ArrayRCP<const SC> D0Pvals_RCP;
1607 rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1608
1609 // For efficiency
1610 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1611 // slower than Teuchos::ArrayView::operator[].
1612 ArrayView<const size_t> D0Prowptr;
1613 ArrayView<const LO> D0Pcolind;
1614 D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1615
1616 // Create the matrix object
1617 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1618 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1619 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1620 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1621 size_t nnzEstimate = dim*D0Prowptr[numLocalRows];
1622 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1623
1624 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1625 ArrayView<LO> P11colind = P11colind_RCP();
1626 ArrayView<SC> P11vals = P11vals_RCP();
1627
1628 // adjust rowpointer
1629 for (size_t i = 0; i < numLocalRows+1; i++) {
1630 P11rowptr[i] = dim*D0Prowptr[i];
1631 }
1632
1633 // adjust column indices
1634 for (size_t jj = 0; jj < (size_t) D0Prowptr[numLocalRows]; jj++)
1635 for (size_t k = 0; k < dim; k++) {
1636 P11colind[dim*jj+k] = dim*D0Pcolind[jj]+k;
1637 P11vals[dim*jj+k] = SC_ZERO;
1638 }
1639
1640 RCP<const Map> P_nodal_imported_colmap = P_nodal_imported->getColMap();
1641 RCP<const Map> D0_P_nodal_colmap = D0_P_nodal->getColMap();
1642 // enter values
1643 if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1644 // The matrix D0 has too many entries per row.
1645 // Therefore we need to check whether its entries are actually non-zero.
1646 // This is the case for the matrices built by MiniEM.
1647 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1648
1649 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1650 for (size_t i = 0; i < numLocalRows; i++) {
1651 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1652 LO l = D0colind[ll];
1653 SC p = D0vals[ll];
1654 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1655 continue;
1656 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1657 LO j = Pcolind[jj];
1658 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1659 SC v = Pvals[jj];
1660 for (size_t k = 0; k < dim; k++) {
1661 LO jNew = dim*j+k;
1662 SC n = nullspace[k][i];
1663 size_t m;
1664 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1665 if (P11colind[m] == jNew)
1666 break;
1667#ifdef HAVE_MUELU_DEBUG
1668 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1669#endif
1670 P11vals[m] += half * v * n;
1671 }
1672 }
1673 }
1674 }
1675 } else {
1676 // enter values
1677 for (size_t i = 0; i < numLocalRows; i++) {
1678 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1679 LO l = D0colind[ll];
1680 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1681 LO j = Pcolind[jj];
1682 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1683 SC v = Pvals[jj];
1684 for (size_t k = 0; k < dim; k++) {
1685 LO jNew = dim*j+k;
1686 SC n = nullspace[k][i];
1687 size_t m;
1688 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1689 if (P11colind[m] == jNew)
1690 break;
1691#ifdef HAVE_MUELU_DEBUG
1692 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1693#endif
1694 P11vals[m] += half * v * n;
1695 }
1696 }
1697 }
1698 }
1699 }
1700
1701 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1702 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1703
1704 } else if (algo == "gustavson") {
1705 ArrayRCP<const size_t> D0rowptr_RCP;
1706 ArrayRCP<const LO> D0colind_RCP;
1707 ArrayRCP<const SC> D0vals_RCP;
1708 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1709 // For efficiency
1710 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1711 // slower than Teuchos::ArrayView::operator[].
1712 ArrayView<const size_t> D0rowptr;
1713 ArrayView<const LO> D0colind;
1714 ArrayView<const SC> D0vals;
1715 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1716
1717 // Get data out of P_nodal_imported and D0.
1718 ArrayRCP<const size_t> Prowptr_RCP;
1719 ArrayRCP<const LO> Pcolind_RCP;
1720 ArrayRCP<const SC> Pvals_RCP;
1721 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1722 ArrayView<const size_t> Prowptr;
1723 ArrayView<const LO> Pcolind;
1724 ArrayView<const SC> Pvals;
1725 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1726
1727 LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1728 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1729 Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1730 // This is ad-hoc and should maybe be replaced with some better heuristics.
1731 size_t nnz_alloc = dim*D0vals_RCP.size();
1732
1733 // Create the matrix object
1734 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1735 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1736 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1737 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1738 P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1739
1740 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1741 ArrayView<LO> P11colind = P11colind_RCP();
1742 ArrayView<SC> P11vals = P11vals_RCP();
1743
1744 size_t nnz;
1745 if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1746 // The matrix D0 has too many entries per row.
1747 // Therefore we need to check whether its entries are actually non-zero.
1748 // This is the case for the matrices built by MiniEM.
1749 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1750
1751 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1752 nnz = 0;
1753 size_t nnz_old = 0;
1754 for (size_t i = 0; i < numLocalRows; i++) {
1755 P11rowptr[i] = nnz;
1756 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1757 LO l = D0colind[ll];
1758 SC p = D0vals[ll];
1759 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1760 continue;
1761 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1762 LO j = Pcolind[jj];
1763 SC v = Pvals[jj];
1764 for (size_t k = 0; k < dim; k++) {
1765 LO jNew = dim*j+k;
1766 SC n = nullspace[k][i];
1767 // do we already have an entry for (i, jNew)?
1768 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1769 P11_status[jNew] = nnz;
1770 P11colind[nnz] = jNew;
1771 P11vals[nnz] = half * v * n;
1772 // or should it be
1773 // P11vals[nnz] = half * n;
1774 nnz++;
1775 } else {
1776 P11vals[P11_status[jNew]] += half * v * n;
1777 // or should it be
1778 // P11vals[P11_status[jNew]] += half * n;
1779 }
1780 }
1781 }
1782 }
1783 nnz_old = nnz;
1784 }
1785 P11rowptr[numLocalRows] = nnz;
1786 } else {
1787 nnz = 0;
1788 size_t nnz_old = 0;
1789 for (size_t i = 0; i < numLocalRows; i++) {
1790 P11rowptr[i] = nnz;
1791 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1792 LO l = D0colind[ll];
1793 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1794 LO j = Pcolind[jj];
1795 SC v = Pvals[jj];
1796 for (size_t k = 0; k < dim; k++) {
1797 LO jNew = dim*j+k;
1798 SC n = nullspace[k][i];
1799 // do we already have an entry for (i, jNew)?
1800 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1801 P11_status[jNew] = nnz;
1802 P11colind[nnz] = jNew;
1803 P11vals[nnz] = half * v * n;
1804 // or should it be
1805 // P11vals[nnz] = half * n;
1806 nnz++;
1807 } else {
1808 P11vals[P11_status[jNew]] += half * v * n;
1809 // or should it be
1810 // P11vals[P11_status[jNew]] += half * n;
1811 }
1812 }
1813 }
1814 }
1815 nnz_old = nnz;
1816 }
1817 P11rowptr[numLocalRows] = nnz;
1818 }
1819
1820 if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1821 // Downward resize
1822 // - Cannot resize for Epetra, as it checks for same pointers
1823 // - Need to resize for Tpetra, as it checks ().size() == P11rowptr[numLocalRows]
1824 P11vals_RCP.resize(nnz);
1825 P11colind_RCP.resize(nnz);
1826 }
1827
1828 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1829 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1830 } else
1831 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1832
1833 NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1834
1835 ArrayRCP<const Scalar> ns_rcp = Nullspace_nodal->getData(0);
1836 ArrayView<const Scalar> ns_view = ns_rcp();
1837 for (size_t i = 0; i < Nullspace_nodal->getLocalLength(); i++) {
1838 Scalar val = ns_view[i];
1839 for (size_t j = 0; j < dim; j++)
1840 NullspaceH_->replaceLocalValue(dim*i+j, j, val);
1841 }
1842
1843
1844 } else { // !skipFirstLevel_
1845 ArrayRCP<const size_t> D0rowptr_RCP;
1846 ArrayRCP<const LO> D0colind_RCP;
1847 ArrayRCP<const SC> D0vals_RCP;
1848 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1849 // For efficiency
1850 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1851 // slower than Teuchos::ArrayView::operator[].
1852 ArrayView<const size_t> D0rowptr;
1853 ArrayView<const LO> D0colind;
1854 ArrayView<const SC> D0vals;
1855 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1856
1857 CoordsH_ = Coords_;
1858 if (algo == "mat-mat") {
1859
1860 // Create the matrix object
1861 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1862 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1863 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1864 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1865 size_t nnzEstimate = dim*D0rowptr[numLocalRows];
1866 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1867
1868 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1869 ArrayView<LO> P11colind = P11colind_RCP();
1870 ArrayView<SC> P11vals = P11vals_RCP();
1871
1872 // adjust rowpointer
1873 for (size_t i = 0; i < numLocalRows+1; i++) {
1874 P11rowptr[i] = dim*D0rowptr[i];
1875 }
1876
1877 // adjust column indices
1878 for (size_t jj = 0; jj < (size_t) D0rowptr[numLocalRows]; jj++)
1879 for (size_t k = 0; k < dim; k++) {
1880 P11colind[dim*jj+k] = dim*D0colind[jj]+k;
1881 P11vals[dim*jj+k] = SC_ZERO;
1882 }
1883
1884 // enter values
1885 if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1886 // The matrix D0 has too many entries per row.
1887 // Therefore we need to check whether its entries are actually non-zero.
1888 // This is the case for the matrices built by MiniEM.
1889 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1890
1891 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1892 for (size_t i = 0; i < numLocalRows; i++) {
1893 for (size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
1894 LO j = D0colind[jj];
1895 SC p = D0vals[jj];
1896 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1897 continue;
1898 for (size_t k = 0; k < dim; k++) {
1899 LO jNew = dim*j+k;
1900 SC n = nullspace[k][i];
1901 size_t m;
1902 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1903 if (P11colind[m] == jNew)
1904 break;
1905#ifdef HAVE_MUELU_DEBUG
1906 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1907#endif
1908 P11vals[m] += half * n;
1909 }
1910 }
1911 }
1912 } else {
1913 // enter values
1914 for (size_t i = 0; i < numLocalRows; i++) {
1915 for (size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
1916 LO j = D0colind[jj];
1917
1918 for (size_t k = 0; k < dim; k++) {
1919 LO jNew = dim*j+k;
1920 SC n = nullspace[k][i];
1921 size_t m;
1922 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1923 if (P11colind[m] == jNew)
1924 break;
1925#ifdef HAVE_MUELU_DEBUG
1926 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1927#endif
1928 P11vals[m] += half * n;
1929 }
1930 }
1931 }
1932 }
1933
1934 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1935 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1936
1937 } else
1938 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1939 }
1940 }
1941 }
1942
1943 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1945 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build coarse (1,1) matrix");
1946
1947 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1948
1949 // coarse matrix for P11* (M1 + D1* M2 D1) P11
1950 RCP<Matrix> temp;
1951 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*P11_,false,temp,GetOStream(Runtime0),true,true);
1952 if (ImporterH_.is_null())
1953 AH_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*P11_,true,*temp,false,AH_,GetOStream(Runtime0),true,true);
1954 else {
1955
1956 RCP<Matrix> temp2;
1957 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*P11_,true,*temp,false,temp2,GetOStream(Runtime0),true,true);
1958
1959 RCP<const Map> map = ImporterH_->getTargetMap()->removeEmptyProcesses();
1960 temp2->removeEmptyProcessesInPlace(map);
1961 if (!temp2.is_null() && temp2->getRowMap().is_null())
1962 temp2 = Teuchos::null;
1963 AH_ = temp2;
1964 }
1965
1966 if (!disable_addon_) {
1967
1968 RCP<Matrix> addon;
1969
1970 if (!AH_.is_null() && Addon_Matrix_.is_null()) {
1971 // construct addon
1972 RCP<Teuchos::TimeMonitor> tmAddon = getTimer("MueLu RefMaxwell: Build coarse addon matrix");
1973 // catch a failure
1974 TEUCHOS_TEST_FOR_EXCEPTION(M0inv_Matrix_==Teuchos::null,std::invalid_argument,
1975 "MueLu::RefMaxwell::formCoarseMatrix(): Inverse of "
1976 "lumped mass matrix required for add-on (i.e. M0inv_Matrix is null)");
1977
1978 // coarse matrix for add-on, i.e P11* (M1 D0 M0inv D0* M1) P11
1979 RCP<Matrix> Zaux, Z;
1980
1981 // construct Zaux = M1 P11
1982 Zaux = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M1_Matrix_,false,*P11_,false,Zaux,GetOStream(Runtime0),true,true);
1983 // construct Z = D0* M1 P11 = D0* Zaux
1984 Z = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*Zaux,false,Z,GetOStream(Runtime0),true,true);
1985
1986 // construct Z* M0inv Z
1987 if (M0inv_Matrix_->getGlobalMaxNumRowEntries()<=1) {
1988 // We assume that if M0inv has at most one entry per row then
1989 // these are all diagonal entries.
1990 RCP<Vector> diag = VectorFactory::Build(M0inv_Matrix_->getRowMap());
1991 M0inv_Matrix_->getLocalDiagCopy(*diag);
1992 {
1993 ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
1994 for (size_t j=0; j < diag->getMap()->getLocalNumElements(); j++) {
1995 diagVals[j] = Teuchos::ScalarTraits<Scalar>::squareroot(diagVals[j]);
1996 }
1997 }
1998 if (Z->getRowMap()->isSameAs(*(diag->getMap())))
1999 Z->leftScale(*diag);
2000 else {
2001 RCP<Import> importer = ImportFactory::Build(diag->getMap(),Z->getRowMap());
2002 RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
2003 diag2->doImport(*diag,*importer,Xpetra::INSERT);
2004 Z->leftScale(*diag2);
2005 }
2006 addon = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*Z,false,addon,GetOStream(Runtime0),true,true);
2007 } else if (parameterList_.get<bool>("rap: triple product", false) == false) {
2008 RCP<Matrix> C2;
2009 // construct C2 = M0inv Z
2010 C2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M0inv_Matrix_,false,*Z,false,C2,GetOStream(Runtime0),true,true);
2011 // construct Matrix2 = Z* M0inv Z = Z* C2
2012 addon = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*C2,false,addon,GetOStream(Runtime0),true,true);
2013 } else {
2014 addon = MatrixFactory::Build(Z->getDomainMap());
2015 // construct Matrix2 = Z* M0inv Z
2016 Xpetra::TripleMatrixMultiply<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
2017 MultiplyRAP(*Z, true, *M0inv_Matrix_, false, *Z, false, *addon, true, true);
2018 }
2019 // Should we keep the addon for next setup?
2020 if (enable_reuse_)
2021 Addon_Matrix_ = addon;
2022 } else
2023 addon = Addon_Matrix_;
2024
2025 if (!AH_.is_null()) {
2026 // add matrices together
2027 RCP<Matrix> newAH;
2028 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*AH_,false,one,*addon,false,one,newAH,GetOStream(Runtime0));
2029 newAH->fillComplete();
2030 AH_ = newAH;
2031 }
2032 }
2033
2034 if (!AH_.is_null() && !skipFirstLevel_) {
2035 ArrayRCP<bool> AHBCrows;
2036 AHBCrows.resize(AH_->getRowMap()->getLocalNumElements());
2037 size_t dim = Nullspace_->getNumVectors();
2038 for (size_t i = 0; i < BCdomain_.size(); i++)
2039 for (size_t k = 0; k < dim; k++)
2040 AHBCrows[i*dim+k] = BCdomain_(i);
2041 magnitudeType rowSumTol = parameterList_.get("refmaxwell: row sum drop tol (1,1)",-1.0);
2042 if (rowSumTol > 0.)
2043 Utilities::ApplyRowSumCriterion(*AH_, rowSumTol, AHBCrows);
2044 if (applyBCsToH_)
2046 }
2047
2048 if (!AH_.is_null()) {
2049 // If we already applied BCs to A_nodal, we likely do not need
2050 // to fix up AH.
2051 // If we did not apply BCs to A_nodal, we now need to correct
2052 // the zero diagonals of AH, since we did nuke the nullspace.
2053
2054 bool fixZeroDiagonal = !applyBCsToAnodal_;
2055 if (precList11_.isParameter("rap: fix zero diagonals"))
2056 fixZeroDiagonal = precList11_.get<bool>("rap: fix zero diagonals");
2057
2058 if (fixZeroDiagonal) {
2059 magnitudeType threshold = 1e-16;
2060 Scalar replacement = 1.0;
2061 if (precList11_.isType<magnitudeType>("rap: fix zero diagonals threshold"))
2062 threshold = precList11_.get<magnitudeType>("rap: fix zero diagonals threshold");
2063 else if (precList11_.isType<double>("rap: fix zero diagonals threshold"))
2064 threshold = Teuchos::as<magnitudeType>(precList11_.get<double>("rap: fix zero diagonals threshold"));
2065 if (precList11_.isType<double>("rap: fix zero diagonals replacement"))
2066 replacement = Teuchos::as<Scalar>(precList11_.get<double>("rap: fix zero diagonals replacement"));
2067 Xpetra::MatrixUtils<SC,LO,GO,NO>::CheckRepairMainDiagonal(AH_, true, GetOStream(Warnings1), threshold, replacement);
2068 }
2069
2070 // Set block size
2071 size_t dim = Nullspace_->getNumVectors();
2072 AH_->SetFixedBlockSize(dim);
2073 }
2074 }
2075
2076
2077 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2078 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::resetMatrix(RCP<Matrix> SM_Matrix_new, bool ComputePrec) {
2079 bool reuse = !SM_Matrix_.is_null();
2080 SM_Matrix_ = SM_Matrix_new;
2081 dump(*SM_Matrix_, "SM.m");
2082 if (ComputePrec)
2083 compute(reuse);
2084 }
2085
2086
2087 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2088 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverseAdditive(const MultiVector& RHS, MultiVector& X) const {
2089
2090 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2091
2092 { // compute residual
2093
2094 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2096 }
2097
2098 { // restrict residual to sub-hierarchies
2099
2100 if (implicitTranspose_) {
2101 {
2102 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (implicit)");
2103 P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2104 }
2105 if (!allNodesBoundary_) {
2106 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (implicit)");
2107 D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2108 }
2109 } else {
2111 // Column maps of D0_T and R11 match, and we're running Tpetra
2112 {
2113 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restrictions import");
2114 D0TR11Tmp_->doImport(*residual_, *rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter(), Xpetra::INSERT);
2115 }
2116 if (!allNodesBoundary_) {
2117 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2118 rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*D0res_),Teuchos::NO_TRANS);
2119 }
2120 {
2121 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2122 rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*P11res_),Teuchos::NO_TRANS);
2123 }
2124 } else {
2125 {
2126 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2127 R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2128 }
2129 if (!allNodesBoundary_) {
2130 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2131 D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2132 }
2133 }
2134 }
2135 }
2136
2137 {
2138 RCP<Teuchos::TimeMonitor> tmSubSolves = getTimer("MueLu RefMaxwell: subsolves");
2139
2140 // block diagonal preconditioner on 2x2 (V-cycle for diagonal blocks)
2141
2142 if (!ImporterH_.is_null() && !implicitTranspose_) {
2143 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2144 P11resTmp_->beginImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2145 }
2146 if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_) {
2147 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2148 D0resTmp_->beginImport(*D0res_, *Importer22_, Xpetra::INSERT);
2149 }
2150
2151 // iterate on coarse (1, 1) block
2152 if (!AH_.is_null()) {
2153 if (!ImporterH_.is_null() && !implicitTranspose_)
2154 P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2155
2156 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2157
2158#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2159 if (!thyraPrecOpH_.is_null()) {
2160 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2161 thyraPrecOpH_->apply(*P11resSubComm_, *P11xSubComm_, Teuchos::NO_TRANS, one, zero);
2162 }
2163 else
2164#endif
2166 }
2167
2168 // iterate on (2, 2) block
2169 if (!A22_.is_null()) {
2170 if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2171 D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2172
2173 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2174
2175#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2176 if (!thyraPrecOp22_.is_null()) {
2177 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2178 thyraPrecOp22_->apply(*D0resSubComm_, *D0xSubComm_, Teuchos::NO_TRANS, one, zero);
2179 }
2180 else
2181#endif
2183 }
2184
2185 if (AH_.is_null() && !ImporterH_.is_null() && !implicitTranspose_)
2186 P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2187 if (A22_.is_null() && !allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2188 D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2189 }
2190
2192 { // prolongate (1,1) block
2193 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (fused)");
2194 P11_->apply(*P11x_,X,Teuchos::NO_TRANS,one,one);
2195 }
2196
2197 if (!allNodesBoundary_) { // prolongate (2,2) block
2198 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (fused)");
2199 D0_Matrix_->apply(*D0x_,X,Teuchos::NO_TRANS,one,one);
2200 }
2201 } else {
2202 { // prolongate (1,1) block
2203 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (unfused)");
2204 P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2205 }
2206
2207 if (!allNodesBoundary_) { // prolongate (2,2) block
2208 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (unfused)");
2209 D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS,one,one);
2210 }
2211
2212 { // update current solution
2213 RCP<Teuchos::TimeMonitor> tmUpdate = getTimer("MueLu RefMaxwell: update");
2214 X.update(one, *residual_, one);
2215 }
2216 }
2217
2218 }
2219
2220
2221 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2222 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solveH(const MultiVector& RHS, MultiVector& X) const {
2223
2224 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2225
2226 { // compute residual
2227 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2230 P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2231 else
2232 R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2233 }
2234
2235 { // solve coarse (1,1) block
2236 if (!ImporterH_.is_null() && !implicitTranspose_) {
2237 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2238 P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2239 }
2240 if (!AH_.is_null()) {
2241 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2243 }
2244 }
2245
2246 { // update current solution
2247 RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2248 P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2249 X.update(one, *residual_, one);
2250 }
2251
2252 }
2253
2254
2255 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2256 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solve22(const MultiVector& RHS, MultiVector& X) const {
2257
2259 return;
2260
2261 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2262
2263 { // compute residual
2264 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2267 D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2268 else
2269 D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2270 }
2271
2272 { // solve (2,2) block
2273 if (!Importer22_.is_null() && !implicitTranspose_) {
2274 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2275 D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
2276 }
2277 if (!A22_.is_null()) {
2278 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2280 }
2281 }
2282
2283 { //update current solution
2284 RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2285 D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
2286 X.update(one, *residual_, one);
2287 }
2288
2289 }
2290
2291
2292 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2293 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply (const MultiVector& RHS, MultiVector& X,
2294 Teuchos::ETransp /* mode */,
2295 Scalar /* alpha */,
2296 Scalar /* beta */) const {
2297
2298 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: solve");
2299
2300 // make sure that we have enough temporary memory
2301 if (!allEdgesBoundary_ && X.getNumVectors() != P11res_->getNumVectors())
2302 allocateMemory(X.getNumVectors());
2303
2304 { // apply pre-smoothing
2305
2306 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2307
2308 PreSmoother_->Apply(X, RHS, use_as_preconditioner_);
2309 }
2310
2311 // do solve for the 2x2 block system
2312 if(mode_=="additive")
2313 applyInverseAdditive(RHS,X);
2314 else if(mode_=="121") {
2315 solveH(RHS,X);
2316 solve22(RHS,X);
2317 solveH(RHS,X);
2318 } else if(mode_=="212") {
2319 solve22(RHS,X);
2320 solveH(RHS,X);
2321 solve22(RHS,X);
2322 } else if(mode_=="1")
2323 solveH(RHS,X);
2324 else if(mode_=="2")
2325 solve22(RHS,X);
2326 else if(mode_=="7") {
2327 solveH(RHS,X);
2328 { // apply pre-smoothing
2329
2330 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2331
2332 PreSmoother_->Apply(X, RHS, false);
2333 }
2334 solve22(RHS,X);
2335 { // apply post-smoothing
2336
2337 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2338
2339 PostSmoother_->Apply(X, RHS, false);
2340 }
2341 solveH(RHS,X);
2342 } else if(mode_=="none") {
2343 // do nothing
2344 }
2345 else
2346 applyInverseAdditive(RHS,X);
2347
2348 { // apply post-smoothing
2349
2350 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2351
2352 PostSmoother_->Apply(X, RHS, false);
2353 }
2354
2355 }
2356
2357
2358 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2362
2363
2364 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2366 initialize(const Teuchos::RCP<Matrix> & D0_Matrix,
2367 const Teuchos::RCP<Matrix> & Ms_Matrix,
2368 const Teuchos::RCP<Matrix> & M0inv_Matrix,
2369 const Teuchos::RCP<Matrix> & M1_Matrix,
2370 const Teuchos::RCP<MultiVector> & Nullspace,
2371 const Teuchos::RCP<RealValuedMultiVector> & Coords,
2372 Teuchos::ParameterList& List)
2373 {
2374 // some pre-conditions
2375 TEUCHOS_ASSERT(D0_Matrix!=Teuchos::null);
2376 TEUCHOS_ASSERT(Ms_Matrix!=Teuchos::null);
2377 TEUCHOS_ASSERT(M1_Matrix!=Teuchos::null);
2378
2379#ifdef HAVE_MUELU_DEBUG
2380 TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRangeMap()));
2381 TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRowMap()));
2382 TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2383
2384 TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRangeMap()));
2385 TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRowMap()));
2386 TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2387
2388 TEUCHOS_ASSERT(D0_Matrix->getRangeMap()->isSameAs(*D0_Matrix->getRowMap()));
2389
2390 if (!M0inv_Matrix) {
2391 TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRowMap()));
2392 TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRangeMap()));
2393 TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getDomainMap()));
2394 }
2395#endif
2396
2397 HierarchyH_ = Teuchos::null;
2398 Hierarchy22_ = Teuchos::null;
2399 PreSmoother_ = Teuchos::null;
2400 PostSmoother_ = Teuchos::null;
2401 disable_addon_ = false;
2402 mode_ = "additive";
2403
2404 // set parameters
2405 setParameters(List);
2406
2407 if (D0_Matrix->getRowMap()->lib() == Xpetra::UseTpetra) {
2408 // We will remove boundary conditions from D0, and potentially change maps, so we copy the input.
2409 // Fortunately, D0 is quite sparse.
2410 // We cannot use the Tpetra copy constructor, since it does not copy the graph.
2411
2412 RCP<Matrix> D0copy = MatrixFactory::Build(D0_Matrix->getRowMap(), D0_Matrix->getColMap(), 0);
2413 RCP<CrsMatrix> D0copyCrs = rcp_dynamic_cast<CrsMatrixWrap>(D0copy,true)->getCrsMatrix();
2414 ArrayRCP<const size_t> D0rowptr_RCP;
2415 ArrayRCP<const LO> D0colind_RCP;
2416 ArrayRCP<const SC> D0vals_RCP;
2417 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getAllValues(D0rowptr_RCP,
2418 D0colind_RCP,
2419 D0vals_RCP);
2420
2421 ArrayRCP<size_t> D0copyrowptr_RCP;
2422 ArrayRCP<LO> D0copycolind_RCP;
2423 ArrayRCP<SC> D0copyvals_RCP;
2424 D0copyCrs->allocateAllValues(D0vals_RCP.size(),D0copyrowptr_RCP,D0copycolind_RCP,D0copyvals_RCP);
2425 D0copyrowptr_RCP.deepCopy(D0rowptr_RCP());
2426 D0copycolind_RCP.deepCopy(D0colind_RCP());
2427 D0copyvals_RCP.deepCopy(D0vals_RCP());
2428 D0copyCrs->setAllValues(D0copyrowptr_RCP,
2429 D0copycolind_RCP,
2430 D0copyvals_RCP);
2431 D0copyCrs->expertStaticFillComplete(D0_Matrix->getDomainMap(), D0_Matrix->getRangeMap(),
2432 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getImporter(),
2433 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getExporter());
2434 D0_Matrix_ = D0copy;
2435 } else
2436 D0_Matrix_ = MatrixFactory::BuildCopy(D0_Matrix);
2437
2438
2439 M0inv_Matrix_ = M0inv_Matrix;
2440 Ms_Matrix_ = Ms_Matrix;
2441 M1_Matrix_ = M1_Matrix;
2442 Coords_ = Coords;
2443 Nullspace_ = Nullspace;
2444
2445 dump(*D0_Matrix_, "D0_clean.m");
2446 dump(*Ms_Matrix_, "Ms.m");
2447 dump(*M1_Matrix_, "M1.m");
2448 if (!M0inv_Matrix_.is_null()) dump(*M0inv_Matrix_, "M0inv.m");
2449 if (!Coords_.is_null()) dumpCoords(*Coords_, "coords.m");
2450
2451 }
2452
2453
2454 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2456 describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel /* verbLevel */) const {
2457
2458 std::ostringstream oss;
2459
2460 RCP<const Teuchos::Comm<int> > comm = SM_Matrix_->getDomainMap()->getComm();
2461
2462#ifdef HAVE_MPI
2463 int root;
2464 if (!AH_.is_null())
2465 root = comm->getRank();
2466 else
2467 root = -1;
2468
2469 int actualRoot;
2470 reduceAll(*comm, Teuchos::REDUCE_MAX, root, Teuchos::ptr(&actualRoot));
2471 root = actualRoot;
2472#endif
2473
2474
2475 oss << "\n--------------------------------------------------------------------------------\n" <<
2476 "--- RefMaxwell Summary ---\n"
2477 "--------------------------------------------------------------------------------" << std::endl;
2478 oss << std::endl;
2479
2480 GlobalOrdinal numRows;
2481 GlobalOrdinal nnz;
2482
2483 SM_Matrix_->getRowMap()->getComm()->barrier();
2484
2485 numRows = SM_Matrix_->getGlobalNumRows();
2486 nnz = SM_Matrix_->getGlobalNumEntries();
2487
2488 Xpetra::global_size_t tt = numRows;
2489 int rowspacer = 3; while (tt != 0) { tt /= 10; rowspacer++; }
2490 tt = nnz;
2491 int nnzspacer = 2; while (tt != 0) { tt /= 10; nnzspacer++; }
2492
2493 oss << "block " << std::setw(rowspacer) << " rows " << std::setw(nnzspacer) << " nnz " << std::setw(9) << " nnz/row" << std::endl;
2494 oss << "(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2495
2496 if (!A22_.is_null()) {
2497 // ToDo: make sure that this is printed correctly
2498 numRows = A22_->getGlobalNumRows();
2499 nnz = A22_->getGlobalNumEntries();
2500
2501 oss << "(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2502 }
2503
2504 oss << std::endl;
2505
2506 {
2507 if (PreSmoother_ != null && PreSmoother_ == PostSmoother_)
2508 oss << "Smoother both : " << PreSmoother_->description() << std::endl;
2509 else {
2510 oss << "Smoother pre : "
2511 << (PreSmoother_ != null ? PreSmoother_->description() : "no smoother") << std::endl;
2512 oss << "Smoother post : "
2513 << (PostSmoother_ != null ? PostSmoother_->description() : "no smoother") << std::endl;
2514 }
2515 }
2516 oss << std::endl;
2517
2518 std::string outstr = oss.str();
2519
2520#ifdef HAVE_MPI
2521 RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2522 MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
2523
2524 int strLength = outstr.size();
2525 MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
2526 if (comm->getRank() != root)
2527 outstr.resize(strLength);
2528 MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
2529#endif
2530
2531 out << outstr;
2532
2533 if (!HierarchyH_.is_null())
2534 HierarchyH_->describe(out, GetVerbLevel());
2535
2536 if (!Hierarchy22_.is_null())
2537 Hierarchy22_->describe(out, GetVerbLevel());
2538
2539 if (IsPrint(Statistics2)) {
2540 // Print the grid of processors
2541 std::ostringstream oss2;
2542
2543 oss2 << "Sub-solver distribution over ranks" << std::endl;
2544 oss2 << "( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
2545
2546 int numProcs = comm->getSize();
2547#ifdef HAVE_MPI
2548 RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2549 TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
2550 RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
2551#endif
2552
2553 char status = 0;
2554 if (!AH_.is_null())
2555 status += 1;
2556 if (!A22_.is_null())
2557 status += 2;
2558 std::vector<char> states(numProcs, 0);
2559#ifdef HAVE_MPI
2560 MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
2561#else
2562 states.push_back(status);
2563#endif
2564
2565 int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
2566 for (int proc = 0; proc < numProcs; proc += rowWidth) {
2567 for (int j = 0; j < rowWidth; j++)
2568 if (proc + j < numProcs)
2569 if (states[proc+j] == 0)
2570 oss2 << ".";
2571 else if (states[proc+j] == 1)
2572 oss2 << "1";
2573 else if (states[proc+j] == 2)
2574 oss2 << "2";
2575 else
2576 oss2 << "B";
2577 else
2578 oss2 << " ";
2579
2580 oss2 << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
2581 }
2582 oss2 << std::endl;
2583 GetOStream(Statistics2) << oss2.str();
2584 }
2585
2586
2587 }
2588
2589
2590} // namespace
2591
2592#define MUELU_REFMAXWELL_SHORT
2593#endif //ifdef MUELU_REFMAXWELL_DEF_HPP
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
#define MueLu_maxAll(rcpComm, in, out)
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_minAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Factory to export aggregation info or visualize aggregates using VTK.
AmalgamationFactory for subblocks of strided map based amalgamation data.
Factory for creating a graph based on a given matrix.
Factory for creating a graph based on a given matrix.
Factory for generating coarse level map. Used by TentativePFactory.
Class for transferring coordinates from a finer level to a coarser one.
Exception throws to report errors in the internal logical of the program.
This class specifies the default factory that should generate some data on a Level if the data does n...
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void setlib(Xpetra::UnderlyingLib lib2)
void SetLevelID(int levelID)
Set level number.
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
void SetPreviousLevel(const RCP< Level > &previousLevel)
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
static std::string translate(Teuchos::ParameterList &paramList, const std::string &defaultVals="")
: Translate ML parameters to MueLu parameter XML string
static const T & getDefault(const std::string &name)
Returns default value on the "master" list for a parameter with the specified name and type.
static void removeExplicitZeros(Teuchos::ParameterList &parameterList, RCP< Matrix > &D0_Matrix, RCP< Matrix > &SM_Matrix, RCP< Matrix > &M1_Matrix, RCP< Matrix > &Ms_Matrix)
Remove explicit zeros.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > PtAPWrapper(const RCP< Matrix > &A, const RCP< Matrix > &P, Teuchos::ParameterList &params, std::string &label)
Performs an P^T AP.
static void detectBoundaryConditionsSM(RCP< Matrix > &SM_Matrix, RCP< Matrix > &D0_Matrix, magnitudeType rowSumTol, bool useKokkos_, Kokkos::View< bool *, typename Node::device_type > &BCrowsKokkos, Kokkos::View< bool *, typename Node::device_type > &BCcolsKokkos, Kokkos::View< bool *, typename Node::device_type > &BCdomainKokkos, int &BCedges, int &BCnodes, Teuchos::ArrayRCP< bool > &BCrows, Teuchos::ArrayRCP< bool > &BCcols, Teuchos::ArrayRCP< bool > &BCdomain, bool &allEdgesBoundary, bool &allNodesBoundary)
Detect Dirichlet boundary conditions.
static void setMatvecParams(Matrix &A, RCP< ParameterList > matvecParams)
Sets matvec params on a matrix.
static const RCP< const NoFactory > getRCP()
Static Get() functions.
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Factory for building coarse matrices.
Factory for building coarse matrices.
Applies permutation to grid transfer operators.
Teuchos::RCP< MultiVector > D0xSubComm_
Teuchos::RCP< MultiVector > NullspaceH_
Nullspace for (1,1) problem.
Teuchos::RCP< MultiVector > P11resTmp_
void dumpCoords(const RealValuedMultiVector &X, std::string name) const
dump out real-valued multivector
Teuchos::ScalarTraits< Scalar >::coordinateType coordinateType
Teuchos::RCP< SmootherBase > PreSmoother_
Teuchos::RCP< MultiVector > P11x_
Teuchos::RCP< MultiVector > residual_
Teuchos::RCP< Matrix > D0_Matrix_
Teuchos::RCP< const Import > D0origImporter_
Teuchos::RCP< Teuchos::ParameterList > A22_AP_reuse_data_
Teuchos::RCP< MultiVector > P11res_
Temporary memory.
Xpetra::MultiVector< coordinateType, LO, GO, NO > RealValuedMultiVector
Teuchos::RCP< RealValuedMultiVector > Coords_
Coordinates.
void formCoarseMatrix()
Compute P11^{T}*A*P11 efficiently.
Teuchos::RCP< Matrix > AH_
Teuchos::RCP< Teuchos::TimeMonitor > getTimer(std::string name, RCP< const Teuchos::Comm< int > > comm=Teuchos::null) const
get a (synced) timer
void allocateMemory(int numVectors) const
allocate multivectors for solve
Teuchos::RCP< Matrix > M0inv_Matrix_
Teuchos::RCP< MultiVector > D0x_
Teuchos::RCP< Matrix > SM_Matrix_
Various matrices.
Teuchos::RCP< Teuchos::ParameterList > AH_AP_reuse_data_
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_HIGH) const
void initialize(const Teuchos::RCP< Matrix > &D0_Matrix, const Teuchos::RCP< Matrix > &Ms_Matrix, const Teuchos::RCP< Matrix > &M0inv_Matrix, const Teuchos::RCP< Matrix > &M1_Matrix, const Teuchos::RCP< MultiVector > &Nullspace, const Teuchos::RCP< RealValuedMultiVector > &Coords, Teuchos::ParameterList &List)
Teuchos::RCP< Matrix > D0_T_Matrix_
Teuchos::ParameterList smootherList_
Teuchos::RCP< MultiVector > D0res_
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Kokkos::View< bool *, typename Node::device_type > BCrows_
Vectors for BCs.
Teuchos::RCP< SmootherPrototype > PostSmootherData_
Teuchos::RCP< Teuchos::ParameterList > A22_RAP_reuse_data_
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
Teuchos::RCP< SmootherPrototype > PreSmootherData_
bool disable_addon_
Some options.
void solveH(const MultiVector &RHS, MultiVector &X) const
apply solve to 1-1 block only
Teuchos::ParameterList precList22_
Teuchos::RCP< const Import > ImporterH_
Importer to coarse (1,1) hierarchy.
Teuchos::RCP< Matrix > A22_
Teuchos::RCP< Matrix > R11_
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
Teuchos::RCP< const Import > Importer22_
void buildProlongator()
Setup the prolongator for the (1,1)-block.
Teuchos::RCP< Teuchos::ParameterList > AH_RAP_reuse_data_
Teuchos::RCP< Matrix > A_nodal_Matrix_
Teuchos::RCP< Matrix > P11_
Kokkos::View< bool *, typename Node::device_type > BCcols_
Teuchos::RCP< MultiVector > P11resSubComm_
Teuchos::RCP< MultiVector > P11xSubComm_
Teuchos::RCP< Hierarchy > Hierarchy22_
Teuchos::RCP< SmootherBase > PostSmoother_
Teuchos::RCP< MultiVector > D0resTmp_
Teuchos::RCP< Matrix > Addon_Matrix_
Teuchos::RCP< Matrix > M1_Matrix_
void setFineLevelSmoother()
Set the fine level smoother.
Teuchos::RCP< MultiVector > D0TR11Tmp_
void applyInverseAdditive(const MultiVector &RHS, MultiVector &X) const
apply additive algorithm for 2x2 solve
Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
Teuchos::RCP< RealValuedMultiVector > CoordsH_
void compute(bool reuse=false)
Setup the preconditioner.
Teuchos::RCP< Hierarchy > HierarchyH_
Two hierarchies: one for the coarse (1,1)-block, another for the (2,2)-block.
Teuchos::ParameterList precList11_
Teuchos::RCP< Matrix > Ms_Matrix_
Teuchos::ParameterList parameterList_
Parameter lists.
Kokkos::View< bool *, typename Node::device_type > BCdomain_
Teuchos::RCP< MultiVector > D0resSubComm_
void resetMatrix(Teuchos::RCP< Matrix > SM_Matrix_new, bool ComputePrec=true)
Reset system matrix.
void setParameters(Teuchos::ParameterList &list)
Set parameters.
Teuchos::RCP< MultiVector > Nullspace_
Nullspace.
void dump(const Matrix &A, std::string name) const
dump out matrix
void solve22(const MultiVector &RHS, MultiVector &X) const
apply solve to 2-2 block only
Teuchos::RCP< const Map > D0origDomainMap_
Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
Factory for building permutation matrix that can be be used to shuffle data (matrices,...
Factory for determing the number of partitions for rebalancing.
Factory for building Smoothed Aggregation prolongators.
Factory for building Smoothed Aggregation prolongators.
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
Factory for building tentative prolongator.
Class that encapsulates external library smoothers.
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > > &A, const std::vector< DefaultLocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static void ZeroDirichletCols(Teuchos::RCP< Matrix > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > > &A, const std::vector< DefaultLocalOrdinal > &dirichletRows)
static RCP< MultiVector > Residual(const Xpetra::Operator< Scalar, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &Op, const MultiVector &X, const MultiVector &RHS)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::coordinateType, LocalOrdinal, GlobalOrdinal, Node > > X)
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
VerbLevel GetVerbLevel() const
Get the verbosity level.
int SetProcRankVerbose(int procRank) const
Set proc rank used for printing.
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
bool IsPrint(MsgType type, int thisProcRankOnly=-1) const
Find out whether we need to print out information for a specific message type.
static void SetMueLuOStream(const Teuchos::RCP< Teuchos::FancyOStream > &mueluOStream)
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
static void SetMueLuOFileStream(const std::string &filename)
Interface to Zoltan2 library.
Interface to Zoltan library.
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line).
@ Statistics2
Print even more statistics.
@ Runtime0
One-liner description of what is happening.
@ Runtime1
Description of what is happening (more verbose).
@ Warnings1
Additional warnings.
@ Timings
Print all timing information.
@ Statistics0
Print statistics that do not involve significant additional computation.
MsgType toVerbLevel(const std::string &verbLevelStr)
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix,...