Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Relaxation_def.hpp
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ***********************************************************************
38//@HEADER
39*/
40
41#ifndef IFPACK2_RELAXATION_DEF_HPP
42#define IFPACK2_RELAXATION_DEF_HPP
43
44#include "Teuchos_StandardParameterEntryValidators.hpp"
45#include "Teuchos_TimeMonitor.hpp"
46#include "Tpetra_CrsMatrix.hpp"
47#include "Tpetra_BlockCrsMatrix.hpp"
48#include "Tpetra_BlockView.hpp"
49#include "Ifpack2_Utilities.hpp"
50#include "Ifpack2_Details_getCrsMatrix.hpp"
51#include "MatrixMarket_Tpetra.hpp"
52#include "Tpetra_Details_residual.hpp"
53#include <cstdlib>
54#include <memory>
55#include <sstream>
56#include "KokkosSparse_gauss_seidel.hpp"
57
58namespace {
59 // Validate that a given int is nonnegative.
60 class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
61 public:
62 // Constructor (does nothing).
63 NonnegativeIntValidator () {}
64
65 // ParameterEntryValidator wants this method.
66 Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
67 return Teuchos::null;
68 }
69
70 // Actually validate the parameter's value.
71 void
72 validate (const Teuchos::ParameterEntry& entry,
73 const std::string& paramName,
74 const std::string& sublistName) const
75 {
76 using std::endl;
77 Teuchos::any anyVal = entry.getAny (true);
78 const std::string entryName = entry.getAny (false).typeName ();
79
80 TEUCHOS_TEST_FOR_EXCEPTION(
81 anyVal.type () != typeid (int),
82 Teuchos::Exceptions::InvalidParameterType,
83 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
84 << "\" has the wrong type." << endl << "Parameter: " << paramName
85 << endl << "Type specified: " << entryName << endl
86 << "Type required: int" << endl);
87
88 const int val = Teuchos::any_cast<int> (anyVal);
89 TEUCHOS_TEST_FOR_EXCEPTION(
90 val < 0, Teuchos::Exceptions::InvalidParameterValue,
91 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
92 << "\" is negative." << endl << "Parameter: " << paramName
93 << endl << "Value specified: " << val << endl
94 << "Required range: [0, INT_MAX]" << endl);
95 }
96
97 // ParameterEntryValidator wants this method.
98 const std::string getXMLTypeName () const {
99 return "NonnegativeIntValidator";
100 }
101
102 // ParameterEntryValidator wants this method.
103 void
104 printDoc (const std::string& docString,
105 std::ostream &out) const
106 {
107 Teuchos::StrUtils::printLines (out, "# ", docString);
108 out << "#\tValidator Used: " << std::endl;
109 out << "#\t\tNonnegativeIntValidator" << std::endl;
110 }
111 };
112
113 // A way to get a small positive number (eps() for floating-point
114 // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
115 // define it (for example, for integer values).
116 template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
117 class SmallTraits {
118 public:
119 // Return eps if Scalar is a floating-point type, else return 1.
120 static const Scalar eps ();
121 };
122
123 // Partial specialization for when Scalar is not a floating-point type.
124 template<class Scalar>
125 class SmallTraits<Scalar, true> {
126 public:
127 static const Scalar eps () {
128 return Teuchos::ScalarTraits<Scalar>::one ();
129 }
130 };
131
132 // Partial specialization for when Scalar is a floating-point type.
133 template<class Scalar>
134 class SmallTraits<Scalar, false> {
135 public:
136 static const Scalar eps () {
137 return Teuchos::ScalarTraits<Scalar>::eps ();
138 }
139 };
140
141 // Work-around for GitHub Issue #5269.
142 template<class Scalar,
143 const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
144 struct RealTraits {};
145
146 template<class Scalar>
147 struct RealTraits<Scalar, false> {
148 using val_type = Scalar;
149 using mag_type = Scalar;
150 static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
151 return z;
152 }
153 };
154
155 template<class Scalar>
156 struct RealTraits<Scalar, true> {
157 using val_type = Scalar;
158 using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
159 static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
160 return Kokkos::ArithTraits<val_type>::real (z);
161 }
162 };
163
164 template<class Scalar>
165 KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
166 getRealValue (const Scalar& z) {
167 return RealTraits<Scalar>::real (z);
168 }
169
170} // namespace (anonymous)
171
172namespace Ifpack2 {
173
174template<class MatrixType>
175void
177updateCachedMultiVector (const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
178 size_t numVecs) const
179{
180 // Allocate a multivector if the cached one isn't perfect. Checking
181 // for map pointer equality is much cheaper than Map::isSameAs.
182 if (cachedMV_.is_null () ||
183 map.get () != cachedMV_->getMap ().get () ||
184 cachedMV_->getNumVectors () != numVecs) {
185 using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
187 cachedMV_ = Teuchos::rcp (new MV (map, numVecs, false));
188 }
189}
190
191template<class MatrixType>
193setMatrix(const Teuchos::RCP<const row_matrix_type>& A)
194{
195 if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
196 Importer_ = Teuchos::null;
197 pointImporter_ = Teuchos::null;
198 Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
199 isInitialized_ = false;
200 IsComputed_ = false;
201 diagOffsets_ = Kokkos::View<size_t*, device_type>();
202 savedDiagOffsets_ = false;
203 hasBlockCrsMatrix_ = false;
204 serialGaussSeidel_ = Teuchos::null;
205 if (! A.is_null ()) {
206 IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
207 }
208 A_ = A;
209 }
210}
211
212template<class MatrixType>
214Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
215: A_ (A),
216 IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
217 false : // a reasonable default if there's no communicator
218 A->getRowMap ()->getComm ()->getSize () > 1)
219{
220 this->setObjectLabel ("Ifpack2::Relaxation");
221}
222
223
224template<class MatrixType>
225Teuchos::RCP<const Teuchos::ParameterList>
227{
228 using Teuchos::Array;
229 using Teuchos::ParameterList;
230 using Teuchos::parameterList;
231 using Teuchos::RCP;
232 using Teuchos::rcp;
233 using Teuchos::rcp_const_cast;
234 using Teuchos::rcp_implicit_cast;
235 using Teuchos::setStringToIntegralParameter;
236 typedef Teuchos::ParameterEntryValidator PEV;
237
238 if (validParams_.is_null ()) {
239 RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
240
241 // Set a validator that automatically converts from the valid
242 // string options to their enum values.
243 Array<std::string> precTypes (8);
244 precTypes[0] = "Jacobi";
245 precTypes[1] = "Gauss-Seidel";
246 precTypes[2] = "Symmetric Gauss-Seidel";
247 precTypes[3] = "MT Gauss-Seidel";
248 precTypes[4] = "MT Symmetric Gauss-Seidel";
249 precTypes[5] = "Richardson";
250 precTypes[6] = "Two-stage Gauss-Seidel";
251 precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
252 Array<Details::RelaxationType> precTypeEnums (8);
253 precTypeEnums[0] = Details::JACOBI;
254 precTypeEnums[1] = Details::GS;
255 precTypeEnums[2] = Details::SGS;
256 precTypeEnums[3] = Details::MTGS;
257 precTypeEnums[4] = Details::MTSGS;
258 precTypeEnums[5] = Details::RICHARDSON;
259 precTypeEnums[6] = Details::GS2;
260 precTypeEnums[7] = Details::SGS2;
261 const std::string defaultPrecType ("Jacobi");
262 setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
263 defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
264 pl.getRawPtr ());
265
266 const int numSweeps = 1;
267 RCP<PEV> numSweepsValidator =
268 rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
269 pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
270 rcp_const_cast<const PEV> (numSweepsValidator));
271
272 // number of 'local' outer sweeps for two-stage GS
273 const int numOuterSweeps = 1;
274 RCP<PEV> numOuterSweepsValidator =
275 rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
276 pl->set ("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
277 rcp_const_cast<const PEV> (numOuterSweepsValidator));
278 // number of 'local' inner sweeps for two-stage GS
279 const int numInnerSweeps = 1;
280 RCP<PEV> numInnerSweepsValidator =
281 rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
282 pl->set ("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
283 rcp_const_cast<const PEV> (numInnerSweepsValidator));
284 // specify damping factor for the inner sweeps of two-stage GS
285 const scalar_type innerDampingFactor = STS::one ();
286 pl->set ("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
287 // specify whether to use sptrsv instead of inner-iterations for two-stage GS
288 const bool innerSpTrsv = false;
289 pl->set ("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
290 // specify whether to use compact form of recurrence for two-stage GS
291 const bool compactForm = false;
292 pl->set ("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
293
294 const scalar_type dampingFactor = STS::one ();
295 pl->set ("relaxation: damping factor", dampingFactor);
296
297 const bool zeroStartingSolution = true;
298 pl->set ("relaxation: zero starting solution", zeroStartingSolution);
299
300 const bool doBackwardGS = false;
301 pl->set ("relaxation: backward mode", doBackwardGS);
302
303 const bool doL1Method = false;
304 pl->set ("relaxation: use l1", doL1Method);
305
306 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
307 (STM::one() + STM::one()); // 1.5
308 pl->set ("relaxation: l1 eta", l1eta);
309
310 const scalar_type minDiagonalValue = STS::zero ();
311 pl->set ("relaxation: min diagonal value", minDiagonalValue);
312
313 const bool fixTinyDiagEntries = false;
314 pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
315
316 const bool checkDiagEntries = false;
317 pl->set ("relaxation: check diagonal entries", checkDiagEntries);
318
319 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
320 pl->set("relaxation: local smoothing indices", localSmoothingIndices);
321
322 const bool is_matrix_structurally_symmetric = false;
323 pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
324
325 const bool ifpack2_dump_matrix = false;
326 pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
327
328 const int cluster_size = 1;
329 pl->set("relaxation: mtgs cluster size", cluster_size);
330
331 pl->set("relaxation: mtgs coloring algorithm", "Default");
332
333 const int long_row_threshold = 0;
334 pl->set("relaxation: long row threshold", long_row_threshold);
335
336 const bool timer_for_apply = true;
337 pl->set("timer for apply", timer_for_apply);
338
339 validParams_ = rcp_const_cast<const ParameterList> (pl);
340 }
341 return validParams_;
342}
343
344
345template<class MatrixType>
346void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
347{
348 using Teuchos::getIntegralValue;
349 using Teuchos::ParameterList;
350 using Teuchos::RCP;
351 typedef scalar_type ST; // just to make code below shorter
352
353 if (pl.isType<double>("relaxation: damping factor")) {
354 // Make sure that ST=complex can run with a damping factor that is
355 // a double.
356 ST df = pl.get<double>("relaxation: damping factor");
357 pl.remove("relaxation: damping factor");
358 pl.set("relaxation: damping factor",df);
359 }
360
361 pl.validateParametersAndSetDefaults (* getValidParameters ());
362
363 const Details::RelaxationType precType =
364 getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
365 const int numSweeps = pl.get<int> ("relaxation: sweeps");
366 const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
367 const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
368 const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
369 const bool doL1Method = pl.get<bool> ("relaxation: use l1");
370 const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
371 const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
372 const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
373 const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
374 const bool is_matrix_structurally_symmetric = pl.get<bool> ("relaxation: symmetric matrix structure");
375 const bool ifpack2_dump_matrix = pl.get<bool> ("relaxation: ifpack2 dump matrix");
376 const bool timer_for_apply = pl.get<bool> ("timer for apply");
377 int cluster_size = 1;
378 if(pl.isParameter ("relaxation: mtgs cluster size")) //optional parameter
379 cluster_size = pl.get<int> ("relaxation: mtgs cluster size");
380 int long_row_threshold = 0;
381 if(pl.isParameter ("relaxation: long row threshold")) //optional parameter
382 long_row_threshold = pl.get<int> ("relaxation: long row threshold");
383 std::string color_algo_name = pl.get<std::string>("relaxation: mtgs coloring algorithm");
384 //convert to lowercase
385 for(char& c : color_algo_name)
386 c = tolower(c);
387 if(color_algo_name == "default")
388 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_DEFAULT;
389 else if(color_algo_name == "serial")
390 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_SERIAL;
391 else if(color_algo_name == "vb")
392 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VB;
393 else if(color_algo_name == "vbbit")
394 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBBIT;
395 else if(color_algo_name == "vbcs")
396 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBCS;
397 else if(color_algo_name == "vbd")
398 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBD;
399 else if(color_algo_name == "vbdbit")
400 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBDBIT;
401 else if(color_algo_name == "eb")
402 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_EB;
403 else
404 {
405 std::ostringstream msg;
406 msg << "Ifpack2::Relaxation: 'relaxation: mtgs coloring algorithm' = '" << color_algo_name << "' is not valid.\n";
407 msg << "Choices (not case sensitive) are: Default, Serial, VB, VBBIT, VBCS, VBD, VBDBIT, EB.";
408 TEUCHOS_TEST_FOR_EXCEPTION(
409 true, std::invalid_argument, msg.str());
410 }
411
412 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
413
414 // for Two-stage Gauss-Seidel
415 if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
416 // Make sure that ST=complex can run with a damping factor that is
417 // a double.
418 ST df = pl.get<double>("relaxation: inner damping factor");
419 pl.remove("relaxation: inner damping factor");
420 pl.set("relaxation: inner damping factor",df);
421 }
422 //If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
423 if (long_row_threshold > 0) {
424 TEUCHOS_TEST_FOR_EXCEPTION(
425 cluster_size != 1, std::invalid_argument, "Ifpack2::Relaxation: "
426 "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
427 TEUCHOS_TEST_FOR_EXCEPTION(
428 precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
429 std::invalid_argument, "Ifpack2::Relaxation: "
430 "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
431 "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
432 }
433
434 const ST innerDampingFactor = pl.get<ST> ("relaxation: inner damping factor");
435 const int numInnerSweeps = pl.get<int> ("relaxation: inner sweeps");
436 const int numOuterSweeps = pl.get<int> ("relaxation: outer sweeps");
437 const bool innerSpTrsv = pl.get<bool> ("relaxation: inner sparse-triangular solve");
438 const bool compactForm = pl.get<bool> ("relaxation: compact form");
439
440 // "Commit" the changes, now that we've validated everything.
441 PrecType_ = precType;
442 NumSweeps_ = numSweeps;
443 DampingFactor_ = dampingFactor;
444 ZeroStartingSolution_ = zeroStartSol;
445 DoBackwardGS_ = doBackwardGS;
446 DoL1Method_ = doL1Method;
447 L1Eta_ = l1Eta;
448 MinDiagonalValue_ = minDiagonalValue;
449 fixTinyDiagEntries_ = fixTinyDiagEntries;
450 checkDiagEntries_ = checkDiagEntries;
451 clusterSize_ = cluster_size;
452 longRowThreshold_ = long_row_threshold;
453 is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
454 ifpack2_dump_matrix_ = ifpack2_dump_matrix;
455 localSmoothingIndices_ = localSmoothingIndices;
456 // for Two-stage GS
457 NumInnerSweeps_ = numInnerSweeps;
458 NumOuterSweeps_ = numOuterSweeps;
459 InnerSpTrsv_ = innerSpTrsv;
460 InnerDampingFactor_ = innerDampingFactor;
461 CompactForm_ = compactForm;
462 TimerForApply_ = timer_for_apply;
463}
464
465
466template<class MatrixType>
467void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
468{
469 // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
470 // but otherwise, we will get [unused] in pl
471 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
472}
473
474
475template<class MatrixType>
476Teuchos::RCP<const Teuchos::Comm<int> >
478 TEUCHOS_TEST_FOR_EXCEPTION(
479 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
480 "The input matrix A is null. Please call setMatrix() with a nonnull "
481 "input matrix before calling this method.");
482 return A_->getRowMap ()->getComm ();
483}
484
485
486template<class MatrixType>
487Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
489 return A_;
490}
491
492
493template<class MatrixType>
494Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
495 typename MatrixType::global_ordinal_type,
496 typename MatrixType::node_type> >
498 TEUCHOS_TEST_FOR_EXCEPTION(
499 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
500 "The input matrix A is null. Please call setMatrix() with a nonnull "
501 "input matrix before calling this method.");
502 return A_->getDomainMap ();
503}
504
505
506template<class MatrixType>
507Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
508 typename MatrixType::global_ordinal_type,
509 typename MatrixType::node_type> >
511 TEUCHOS_TEST_FOR_EXCEPTION(
512 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
513 "The input matrix A is null. Please call setMatrix() with a nonnull "
514 "input matrix before calling this method.");
515 return A_->getRangeMap ();
516}
517
518
519template<class MatrixType>
521 return true;
522}
523
524
525template<class MatrixType>
527 return(NumInitialize_);
528}
529
530
531template<class MatrixType>
533 return(NumCompute_);
534}
535
536
537template<class MatrixType>
539 return(NumApply_);
540}
541
542
543template<class MatrixType>
545 return(InitializeTime_);
546}
547
548
549template<class MatrixType>
551 return(ComputeTime_);
552}
553
554
555template<class MatrixType>
557 return(ApplyTime_);
558}
559
560
561template<class MatrixType>
563 return(ComputeFlops_);
564}
565
566
567template<class MatrixType>
569 return(ApplyFlops_);
570}
571
572
573
574template<class MatrixType>
576 TEUCHOS_TEST_FOR_EXCEPTION(
577 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
578 "The input matrix A is null. Please call setMatrix() with a nonnull "
579 "input matrix, then call compute(), before calling this method.");
580 // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
581 return A_->getLocalNumRows() + A_->getLocalNumEntries();
582}
583
584
585template<class MatrixType>
586void
588apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
589 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
590 Teuchos::ETransp /* mode */,
591 scalar_type alpha,
592 scalar_type beta) const
593{
594 using Teuchos::as;
595 using Teuchos::RCP;
596 using Teuchos::rcp;
597 using Teuchos::rcpFromRef;
598 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
600 TEUCHOS_TEST_FOR_EXCEPTION(
601 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
602 "The input matrix A is null. Please call setMatrix() with a nonnull "
603 "input matrix, then call compute(), before calling this method.");
604 TEUCHOS_TEST_FOR_EXCEPTION(
605 ! isComputed (),
606 std::runtime_error,
607 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
608 "preconditioner instance before you may call apply(). You may call "
609 "isComputed() to find out if compute() has been called already.");
610 TEUCHOS_TEST_FOR_EXCEPTION(
611 X.getNumVectors() != Y.getNumVectors(),
612 std::runtime_error,
613 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
614 "X has " << X.getNumVectors() << " columns, but Y has "
615 << Y.getNumVectors() << " columns.");
616 TEUCHOS_TEST_FOR_EXCEPTION(
617 beta != STS::zero (), std::logic_error,
618 "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
619 "implemented.");
620
621 Teuchos::RCP<Teuchos::Time> timer;
622 const std::string timerName ("Ifpack2::Relaxation::apply");
623 if (TimerForApply_) {
624 timer = Teuchos::TimeMonitor::lookupCounter (timerName);
625 if (timer.is_null ()) {
626 timer = Teuchos::TimeMonitor::getNewCounter (timerName);
627 }
628 }
629
630 Teuchos::Time time = Teuchos::Time(timerName);
631 double startTime = time.wallTime();
632
633 {
634 Teuchos::RCP<Teuchos::TimeMonitor> timeMon;
635 if (TimerForApply_)
636 timeMon = Teuchos::rcp(new Teuchos::TimeMonitor(*timer));
637
638 // Special case: alpha == 0.
639 if (alpha == STS::zero ()) {
640 // No floating-point operations, so no need to update a count.
641 Y.putScalar (STS::zero ());
642 }
643 else {
644 // If X and Y alias one another, then we need to create an
645 // auxiliary vector, Xcopy (a deep copy of X).
646 RCP<const MV> Xcopy;
647 {
648 if (X.aliases(Y)) {
649 Xcopy = rcp (new MV (X, Teuchos::Copy));
650 } else {
651 Xcopy = rcpFromRef (X);
652 }
653 }
654
655 // Each of the following methods updates the flop count itself.
656 // All implementations handle zeroing out the starting solution
657 // (if necessary) themselves.
658 switch (PrecType_) {
659 case Ifpack2::Details::JACOBI:
660 ApplyInverseJacobi(*Xcopy,Y);
661 break;
662 case Ifpack2::Details::GS:
663 ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
664 break;
665 case Ifpack2::Details::SGS:
666 ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
667 break;
668 case Ifpack2::Details::MTGS:
669 case Ifpack2::Details::GS2:
670 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
671 break;
672 case Ifpack2::Details::MTSGS:
673 case Ifpack2::Details::SGS2:
674 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
675 break;
676 case Ifpack2::Details::RICHARDSON:
677 ApplyInverseRichardson(*Xcopy,Y);
678 break;
679
680 default:
681 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
682 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
683 << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
684 }
685 if (alpha != STS::one ()) {
686 Y.scale (alpha);
687 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
688 const double numVectors = as<double> (Y.getNumVectors ());
689 ApplyFlops_ += numGlobalRows * numVectors;
690 }
691 }
692 }
693 ApplyTime_ += (time.wallTime() - startTime);
694 ++NumApply_;
695}
696
697
698template<class MatrixType>
699void
701applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
702 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
703 Teuchos::ETransp mode) const
704{
705 TEUCHOS_TEST_FOR_EXCEPTION(
706 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
707 "The input matrix A is null. Please call setMatrix() with a nonnull "
708 "input matrix, then call compute(), before calling this method.");
709 TEUCHOS_TEST_FOR_EXCEPTION(
710 ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
711 "isComputed() must be true before you may call applyMat(). "
712 "Please call compute() before calling this method.");
713 TEUCHOS_TEST_FOR_EXCEPTION(
714 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
715 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
716 << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
717 A_->apply (X, Y, mode);
718}
719
720
721template<class MatrixType>
723{
724 const char methodName[] = "Ifpack2::Relaxation::initialize";
725
726 TEUCHOS_TEST_FOR_EXCEPTION
727 (A_.is_null (), std::runtime_error, methodName << ": The "
728 "input matrix A is null. Please call setMatrix() with "
729 "a nonnull input matrix before calling this method.");
730
731 Teuchos::RCP<Teuchos::Time> timer =
732 Teuchos::TimeMonitor::getNewCounter (methodName);
733
734 double startTime = timer->wallTime();
735
736 { // Timing of initialize starts here
737 Teuchos::TimeMonitor timeMon (*timer);
738 isInitialized_ = false;
739
740 {
741 auto rowMap = A_->getRowMap ();
742 auto comm = rowMap.is_null () ? Teuchos::null : rowMap->getComm ();
743 IsParallel_ = ! comm.is_null () && comm->getSize () > 1;
744 }
745
746 // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
747 // but in that case, the domain and column Maps are the same and
748 // we don't need to Import anyway.
749 //
750 // mfh 07 May 2019: A_->getGraph() might be an
751 // OverlappingRowGraph, which doesn't have an Import object.
752 // However, in that case, the comm should have size 1.
753 Importer_ = IsParallel_ ? A_->getGraph ()->getImporter () :
754 Teuchos::null;
755
756 {
757 Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
758 Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
759 hasBlockCrsMatrix_ = ! A_bcrs.is_null ();
760 }
761
762 serialGaussSeidel_ = Teuchos::null;
763
764 if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
765 PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
766 auto crsMat = Details::getCrsMatrix(A_);
767 TEUCHOS_TEST_FOR_EXCEPTION
768 (crsMat.is_null(), std::logic_error, methodName << ": "
769 "Multithreaded Gauss-Seidel methods currently only work "
770 "when the input matrix is a Tpetra::CrsMatrix.");
771
772 // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
773 // compute, not initialize, since users may change the matrix's
774 // values at any time before calling compute.
775 if (ifpack2_dump_matrix_) {
776 static int sequence_number = 0;
777 const std::string file_name = "Ifpack2_MT_GS_" +
778 std::to_string (sequence_number++) + ".mtx";
779 using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
780 writer_type::writeSparseFile (file_name, crsMat);
781 }
782
783 this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
784 if (mtKernelHandle_->get_gs_handle () == nullptr) {
785 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
786 mtKernelHandle_->create_gs_handle (KokkosSparse::GS_TWOSTAGE);
787 else if(this->clusterSize_ == 1) {
788 mtKernelHandle_->create_gs_handle (KokkosSparse::GS_DEFAULT, this->mtColoringAlgorithm_);
789 mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
790 }
791 else
792 mtKernelHandle_->create_gs_handle (KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_, this->mtColoringAlgorithm_);
793 }
794 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
795 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
796 // set parameters for two-stage GS
797 mtKernelHandle_->set_gs_set_num_inner_sweeps (NumInnerSweeps_);
798 mtKernelHandle_->set_gs_set_num_outer_sweeps (NumOuterSweeps_);
799 mtKernelHandle_->set_gs_set_inner_damp_factor (InnerDampingFactor_);
800 mtKernelHandle_->set_gs_twostage (!InnerSpTrsv_, A_->getLocalNumRows ());
801 mtKernelHandle_->set_gs_twostage_compact_form (CompactForm_);
802 }
803
804 KokkosSparse::Experimental::gauss_seidel_symbolic(
805 mtKernelHandle_.getRawPtr (),
806 A_->getLocalNumRows (),
807 A_->getLocalNumCols (),
808 kcsr.graph.row_map,
809 kcsr.graph.entries,
810 is_matrix_structurally_symmetric_);
811 }
812 } // timing of initialize stops here
813
814 InitializeTime_ += (timer->wallTime() - startTime);
815 ++NumInitialize_;
816 isInitialized_ = true;
817}
818
819namespace Impl {
820template <typename BlockDiagView>
821struct InvertDiagBlocks {
822 typedef typename BlockDiagView::size_type Size;
823
824private:
825 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
826 template <typename View>
827 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
828 typename View::device_type, Unmanaged>;
829
830 typedef typename BlockDiagView::non_const_value_type Scalar;
831 typedef typename BlockDiagView::device_type Device;
832 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
833 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
834
835 UnmanagedView<BlockDiagView> block_diag_;
836 // TODO Use thread team and scratch memory space. In this first
837 // pass, provide workspace for each block.
838 RWrk rwrk_buf_;
839 UnmanagedView<RWrk> rwrk_;
840 IWrk iwrk_buf_;
841 UnmanagedView<IWrk> iwrk_;
842
843public:
844 InvertDiagBlocks (BlockDiagView& block_diag)
845 : block_diag_(block_diag)
846 {
847 const auto blksz = block_diag.extent(1);
848 Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
849 rwrk_ = rwrk_buf_;
850 Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
851 iwrk_ = iwrk_buf_;
852 }
853
854 KOKKOS_INLINE_FUNCTION
855 void operator() (const Size i, int& jinfo) const {
856 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
857 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
858 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
859 int info = 0;
860 Tpetra::GETF2(D_cur, ipiv, info);
861 if (info) {
862 ++jinfo;
863 return;
864 }
865 Tpetra::GETRI(D_cur, ipiv, work, info);
866 if (info) ++jinfo;
867 }
868};
869}
870
871template<class MatrixType>
872void Relaxation<MatrixType>::computeBlockCrs ()
873{
874 using Kokkos::ALL;
875 using Teuchos::Array;
876 using Teuchos::ArrayRCP;
877 using Teuchos::ArrayView;
878 using Teuchos::as;
879 using Teuchos::Comm;
880 using Teuchos::RCP;
881 using Teuchos::rcp;
882 using Teuchos::REDUCE_MAX;
883 using Teuchos::REDUCE_MIN;
884 using Teuchos::REDUCE_SUM;
885 using Teuchos::rcp_dynamic_cast;
886 using Teuchos::reduceAll;
887 typedef local_ordinal_type LO;
888
889 const std::string timerName ("Ifpack2::Relaxation::computeBlockCrs");
890 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
891 if (timer.is_null ()) {
892 timer = Teuchos::TimeMonitor::getNewCounter (timerName);
893 }
894 double startTime = timer->wallTime();
895 {
896 Teuchos::TimeMonitor timeMon (*timer);
897
898 TEUCHOS_TEST_FOR_EXCEPTION(
899 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
900 "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
901 "with a nonnull input matrix, then call initialize(), before calling "
902 "this method.");
903 auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type> (A_);
904 TEUCHOS_TEST_FOR_EXCEPTION(
905 blockCrsA.is_null(), std::logic_error, "Ifpack2::Relaxation::"
906 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
907 "got this far. Please report this bug to the Ifpack2 developers.");
908
909 const scalar_type one = STS::one ();
910
911 // Reset state.
912 IsComputed_ = false;
913
914 const LO lclNumMeshRows =
915 blockCrsA->getCrsGraph ().getLocalNumRows ();
916 const LO blockSize = blockCrsA->getBlockSize ();
917
918 if (! savedDiagOffsets_) {
919 blockDiag_ = block_diag_type (); // clear it before reallocating
920 blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
921 lclNumMeshRows, blockSize, blockSize);
922 if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
923 // Clear diagOffsets_ first (by assigning an empty View to it)
924 // to save memory, before reallocating.
925 diagOffsets_ = Kokkos::View<size_t*, device_type> ();
926 diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
927 }
928 blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
929 TEUCHOS_TEST_FOR_EXCEPTION
930 (static_cast<size_t> (diagOffsets_.extent (0)) !=
931 static_cast<size_t> (blockDiag_.extent (0)),
932 std::logic_error, "diagOffsets_.extent(0) = " <<
933 diagOffsets_.extent (0) << " != blockDiag_.extent(0) = "
934 << blockDiag_.extent (0) <<
935 ". Please report this bug to the Ifpack2 developers.");
936 savedDiagOffsets_ = true;
937 }
938 blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
939
940 // Use an unmanaged View in this method, so that when we take
941 // subviews of it (to get each diagonal block), we don't have to
942 // touch the reference count. Reference count updates are a
943 // thread scalability bottleneck and have a performance cost even
944 // without using threads.
945 unmanaged_block_diag_type blockDiag = blockDiag_;
946
947 if (DoL1Method_ && IsParallel_) {
948 const scalar_type two = one + one;
949 const size_t maxLength = A_->getLocalMaxNumRowEntries ();
950 nonconst_local_inds_host_view_type indices ("indices",maxLength);
951 nonconst_values_host_view_type values_ ("values",maxLength * blockSize * blockSize);
952 size_t numEntries = 0;
953
954 for (LO i = 0; i < lclNumMeshRows; ++i) {
955 // FIXME (mfh 16 Dec 2015) Get views instead of copies.
956 blockCrsA->getLocalRowCopy (i, indices, values_, numEntries);
957 scalar_type * values = reinterpret_cast<scalar_type*>(values_.data());
958
959 auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
960 for (LO subRow = 0; subRow < blockSize; ++subRow) {
961 magnitude_type diagonal_boost = STM::zero ();
962 for (size_t k = 0 ; k < numEntries ; ++k) {
963 if (indices[k] >= lclNumMeshRows) {
964 const size_t offset = blockSize*blockSize*k + subRow*blockSize;
965 for (LO subCol = 0; subCol < blockSize; ++subCol) {
966 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
967 }
968 }
969 }
970 if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
971 diagBlock(subRow, subRow) += diagonal_boost;
972 }
973 }
974 }
975 }
976
977 int info = 0;
978 {
979 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
980 typedef typename unmanaged_block_diag_type::execution_space exec_space;
981 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
982
983 Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
984 // FIXME (mfh 19 May 2017) Different processes may not
985 // necessarily have this error all at the same time, so it would
986 // be better just to preserve a local error state and let users
987 // check.
988 TEUCHOS_TEST_FOR_EXCEPTION
989 (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
990 << " diagonal blocks.");
991 }
992
993 // In a debug build, do an extra test to make sure that all the
994 // factorizations were computed correctly.
995#ifdef HAVE_IFPACK2_DEBUG
996 const int numResults = 2;
997 // Use "max = -min" trick to get min and max in a single all-reduce.
998 int lclResults[2], gblResults[2];
999 lclResults[0] = info;
1000 lclResults[1] = -info;
1001 gblResults[0] = 0;
1002 gblResults[1] = 0;
1003 reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
1004 numResults, lclResults, gblResults);
1005 TEUCHOS_TEST_FOR_EXCEPTION
1006 (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
1007 "Ifpack2::Relaxation::compute: When processing the input "
1008 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
1009 "failed on one or more (MPI) processes.");
1010#endif // HAVE_IFPACK2_DEBUG
1011 serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
1012 } // end TimeMonitor scope
1013
1014 ComputeTime_ += (timer->wallTime() - startTime);
1015 ++NumCompute_;
1016 IsComputed_ = true;
1017}
1018
1019template<class MatrixType>
1021{
1022 using Teuchos::Array;
1023 using Teuchos::ArrayRCP;
1024 using Teuchos::ArrayView;
1025 using Teuchos::as;
1026 using Teuchos::Comm;
1027 using Teuchos::RCP;
1028 using Teuchos::rcp;
1029 using Teuchos::REDUCE_MAX;
1030 using Teuchos::REDUCE_MIN;
1031 using Teuchos::REDUCE_SUM;
1032 using Teuchos::reduceAll;
1033 using LO = local_ordinal_type;
1034 using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
1036 using IST = typename vector_type::impl_scalar_type;
1037 using KAT = Kokkos::ArithTraits<IST>;
1038
1039 const char methodName[] = "Ifpack2::Relaxation::compute";
1040 const scalar_type zero = STS::zero ();
1041 const scalar_type one = STS::one ();
1042
1043 TEUCHOS_TEST_FOR_EXCEPTION
1044 (A_.is_null (), std::runtime_error, methodName << ": "
1045 "The input matrix A is null. Please call setMatrix() with a nonnull "
1046 "input matrix, then call initialize(), before calling this method.");
1047
1048 // We don't count initialization in compute() time.
1049 if (! isInitialized ()) {
1050 initialize ();
1051 }
1052
1053 if (hasBlockCrsMatrix_) {
1054 computeBlockCrs ();
1055 return;
1056 }
1057
1058 Teuchos::RCP<Teuchos::Time> timer =
1059 Teuchos::TimeMonitor::getNewCounter (methodName);
1060 double startTime = timer->wallTime();
1061
1062 { // Timing of compute starts here.
1063 Teuchos::TimeMonitor timeMon (*timer);
1064
1065 // Precompute some quantities for "fixing" zero or tiny diagonal
1066 // entries. We'll only use them if this "fixing" is enabled.
1067 //
1068 // SmallTraits covers for the lack of eps() in
1069 // Teuchos::ScalarTraits when its template parameter is not a
1070 // floating-point type. (Ifpack2 sometimes gets instantiated for
1071 // integer Scalar types.)
1072 IST oneOverMinDiagVal = KAT::one () / static_cast<IST> (SmallTraits<scalar_type>::eps ());
1073 if ( MinDiagonalValue_ != zero)
1074 oneOverMinDiagVal = KAT::one () / static_cast<IST> (MinDiagonalValue_);
1075
1076 // It's helpful not to have to recompute this magnitude each time.
1077 const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1078
1079 const LO numMyRows = static_cast<LO> (A_->getLocalNumRows ());
1080
1081 TEUCHOS_TEST_FOR_EXCEPTION
1082 (NumSweeps_ < 0, std::logic_error, methodName
1083 << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1084 "Please report this bug to the Ifpack2 developers.");
1085 IsComputed_ = false;
1086
1087 if (Diagonal_.is_null()) {
1088 // A_->getLocalDiagCopy fills in all Vector entries, even if the
1089 // matrix has no stored entries in the corresponding rows.
1090 Diagonal_ = rcp (new vector_type (A_->getRowMap (), false));
1091 }
1092
1093 if (checkDiagEntries_) {
1094 //
1095 // Check diagonal elements, replace zero elements with the minimum
1096 // diagonal value, and store their inverses. Count the number of
1097 // "small" and zero diagonal entries while we're at it.
1098 //
1099 size_t numSmallDiagEntries = 0; // "small" includes zero
1100 size_t numZeroDiagEntries = 0; // # zero diagonal entries
1101 size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1102 magnitude_type minMagDiagEntryMag = STM::zero ();
1103 magnitude_type maxMagDiagEntryMag = STM::zero ();
1104
1105 // FIXME: We are extracting the diagonal more than once. That
1106 // isn't very efficient, but we shouldn't be checking
1107 // diagonal entries at scale anyway.
1108 A_->getLocalDiagCopy (*Diagonal_); // slow path for row matrix
1109 std::unique_ptr<vector_type> origDiag;
1110 origDiag = std::unique_ptr<vector_type> (new vector_type (*Diagonal_, Teuchos::Copy));
1111 auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1112 auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1113 // As we go, keep track of the diagonal entries with the
1114 // least and greatest magnitude. We could use the trick of
1115 // starting min with +Inf and max with -Inf, but that
1116 // doesn't work if scalar_type is a built-in integer type.
1117 // Thus, we have to start by reading the first diagonal
1118 // entry redundantly.
1119 if (numMyRows != 0) {
1120 const magnitude_type d_0_mag = KAT::abs (diag(0));
1121 minMagDiagEntryMag = d_0_mag;
1122 maxMagDiagEntryMag = d_0_mag;
1123 }
1124
1125 // Go through all the diagonal entries. Compute counts of
1126 // small-magnitude, zero, and negative-real-part entries.
1127 // Invert the diagonal entries that aren't too small. For
1128 // those too small in magnitude, replace them with
1129 // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1130 // happens to be zero).
1131 for (LO i = 0; i < numMyRows; ++i) {
1132 const IST d_i = diag(i);
1133 const magnitude_type d_i_mag = KAT::abs (d_i);
1134 // Work-around for GitHub Issue #5269.
1135 //const magnitude_type d_i_real = KAT::real (d_i);
1136 const auto d_i_real = getRealValue (d_i);
1137
1138 // We can't compare complex numbers, but we can compare their
1139 // real parts.
1140 if (d_i_real < STM::zero ()) {
1141 ++numNegDiagEntries;
1142 }
1143 if (d_i_mag < minMagDiagEntryMag) {
1144 minMagDiagEntryMag = d_i_mag;
1145 }
1146 if (d_i_mag > maxMagDiagEntryMag) {
1147 maxMagDiagEntryMag = d_i_mag;
1148 }
1149
1150 if (fixTinyDiagEntries_) {
1151 // <= not <, in case minDiagValMag is zero.
1152 if (d_i_mag <= minDiagValMag) {
1153 ++numSmallDiagEntries;
1154 if (d_i_mag == STM::zero ()) {
1155 ++numZeroDiagEntries;
1156 }
1157 diag(i) = oneOverMinDiagVal;
1158 }
1159 else {
1160 diag(i) = KAT::one () / d_i;
1161 }
1162 }
1163 else { // Don't fix zero or tiny diagonal entries.
1164 // <= not <, in case minDiagValMag is zero.
1165 if (d_i_mag <= minDiagValMag) {
1166 ++numSmallDiagEntries;
1167 if (d_i_mag == STM::zero ()) {
1168 ++numZeroDiagEntries;
1169 }
1170 }
1171 diag(i) = KAT::one () / d_i;
1172 }
1173 }
1174
1175 // Collect global data about the diagonal entries. Only do this
1176 // (which involves O(1) all-reduces) if the user asked us to do
1177 // the extra work.
1178 //
1179 // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1180 // zero rows. Fixing this requires one of two options:
1181 //
1182 // 1. Use a custom reduction operation that ignores processes
1183 // with zero diagonal entries.
1184 // 2. Split the communicator, compute all-reduces using the
1185 // subcommunicator over processes that have a nonzero number
1186 // of diagonal entries, and then broadcast from one of those
1187 // processes (if there is one) to the processes in the other
1188 // subcommunicator.
1189 const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1190
1191 // Compute global min and max magnitude of entries.
1192 Array<magnitude_type> localVals (2);
1193 localVals[0] = minMagDiagEntryMag;
1194 // (- (min (- x))) is the same as (max x).
1195 localVals[1] = -maxMagDiagEntryMag;
1196 Array<magnitude_type> globalVals (2);
1197 reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1198 localVals.getRawPtr (),
1199 globalVals.getRawPtr ());
1200 globalMinMagDiagEntryMag_ = globalVals[0];
1201 globalMaxMagDiagEntryMag_ = -globalVals[1];
1202
1203 Array<size_t> localCounts (3);
1204 localCounts[0] = numSmallDiagEntries;
1205 localCounts[1] = numZeroDiagEntries;
1206 localCounts[2] = numNegDiagEntries;
1207 Array<size_t> globalCounts (3);
1208 reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1209 localCounts.getRawPtr (),
1210 globalCounts.getRawPtr ());
1211 globalNumSmallDiagEntries_ = globalCounts[0];
1212 globalNumZeroDiagEntries_ = globalCounts[1];
1213 globalNumNegDiagEntries_ = globalCounts[2];
1214
1215 // Compute and save the difference between the computed inverse
1216 // diagonal, and the original diagonal's inverse.
1217 vector_type diff (A_->getRowMap ());
1218 diff.reciprocal (*origDiag);
1219 diff.update (-one, *Diagonal_, one);
1220 globalDiagNormDiff_ = diff.norm2 ();
1221 }
1222
1223 // Extract the diagonal entries. The CrsMatrix graph version is
1224 // faster than the row matrix version for subsequent calls to
1225 // compute(), since it caches the diagonal offsets.
1226
1227 bool debugAgainstSlowPath = false;
1228
1229 auto crsMat = Details::getCrsMatrix(A_);
1230
1231 if (crsMat.get() && crsMat->isFillComplete ()) {
1232 // The invDiagKernel object computes diagonal offsets if
1233 // necessary. The "compute" call extracts diagonal enties,
1234 // optionally applies the L1 method and replacement of small
1235 // entries, and then inverts.
1236 if (invDiagKernel_.is_null())
1237 invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(crsMat));
1238 else
1239 invDiagKernel_->setMatrix(crsMat);
1240 invDiagKernel_->compute(*Diagonal_,
1241 DoL1Method_ && IsParallel_, L1Eta_,
1242 fixTinyDiagEntries_, minDiagValMag);
1243 savedDiagOffsets_ = true;
1244
1245 // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1246 // environment variable to control this behavior at run time.
1247#ifdef HAVE_IFPACK2_DEBUG
1248 debugAgainstSlowPath = true;
1249#endif
1250 }
1251
1252 if (crsMat.is_null() || ! crsMat->isFillComplete () || debugAgainstSlowPath) {
1253 // We could not call the CrsMatrix version above, or want to
1254 // debug by comparing against the slow path.
1255
1256 // FIXME: The L1 method in this code path runs on host, since we
1257 // don't have device access for row matrices.
1258
1259 RCP<vector_type> Diagonal;
1260 if (debugAgainstSlowPath)
1261 Diagonal = rcp(new vector_type(A_->getRowMap ()));
1262 else
1263 Diagonal = Diagonal_;
1264
1265 A_->getLocalDiagCopy (*Diagonal); // slow path for row matrix
1266
1267 // Setup for L1 Methods.
1268 // Here we add half the value of the off-processor entries in the row,
1269 // but only if diagonal isn't sufficiently large.
1270 //
1271 // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1272 // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1273 // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1274 //
1275 if (DoL1Method_ && IsParallel_) {
1276 const row_matrix_type& A_row = *A_;
1277 auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1278 const magnitude_type two = STM::one () + STM::one ();
1279 const size_t maxLength = A_row.getLocalMaxNumRowEntries ();
1280 nonconst_local_inds_host_view_type indices("indices",maxLength);
1281 nonconst_values_host_view_type values("values",maxLength);
1282 size_t numEntries;
1283
1284 for (LO i = 0; i < numMyRows; ++i) {
1285 A_row.getLocalRowCopy (i, indices, values, numEntries);
1286 magnitude_type diagonal_boost = STM::zero ();
1287 for (size_t k = 0 ; k < numEntries; ++k) {
1288 if (indices[k] >= numMyRows) {
1289 diagonal_boost += STS::magnitude (values[k] / two);
1290 }
1291 }
1292 if (KAT::magnitude (diag(i, 0)) < L1Eta_ * diagonal_boost) {
1293 diag(i, 0) += diagonal_boost;
1294 }
1295 }
1296 }
1297
1298 //
1299 // Invert the matrix's diagonal entries (in Diagonal).
1300 //
1301 if (fixTinyDiagEntries_) {
1302 // Go through all the diagonal entries. Invert those that
1303 // aren't too small in magnitude. For those that are too
1304 // small in magnitude, replace them with oneOverMinDiagVal.
1305 auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1306 Kokkos::parallel_for(Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1307 KOKKOS_LAMBDA (local_ordinal_type i) {
1308 auto d_i = localDiag(i, 0);
1309 const magnitude_type d_i_mag = KAT::magnitude (d_i);
1310 // <= not <, in case minDiagValMag is zero.
1311 if (d_i_mag <= minDiagValMag) {
1312 d_i = oneOverMinDiagVal;
1313 }
1314 else {
1315 // For Stokhos types, operator/ returns an expression
1316 // type. Explicitly convert to IST before returning.
1317 d_i = IST (KAT::one () / d_i);
1318 }
1319 localDiag(i, 0) = d_i;
1320 });
1321 }
1322 else { // don't fix tiny or zero diagonal entries
1323 Diagonal->reciprocal (*Diagonal);
1324 }
1325
1326 if (debugAgainstSlowPath) {
1327 // Validate the fast-path diagonal against the slow-path diagonal.
1328 Diagonal->update (STS::one (), *Diagonal_, -STS::one ());
1329 const magnitude_type err = Diagonal->normInf ();
1330 // The two diagonals should be exactly the same, so their
1331 // difference should be exactly zero.
1332 TEUCHOS_TEST_FOR_EXCEPTION
1333 (err > 100*STM::eps(), std::logic_error, methodName << ": "
1334 << "\"fast-path\" diagonal computation failed. "
1335 "\\|D1 - D2\\|_inf = " << err << ".");
1336 }
1337 }
1338
1339 // Count floating-point operations of computing the inverse diagonal.
1340 //
1341 // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1342 if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1343 ComputeFlops_ += 4.0 * numMyRows;
1344 }
1345 else {
1346 ComputeFlops_ += numMyRows;
1347 }
1348
1349 if (PrecType_ == Ifpack2::Details::MTGS ||
1350 PrecType_ == Ifpack2::Details::MTSGS ||
1351 PrecType_ == Ifpack2::Details::GS2 ||
1352 PrecType_ == Ifpack2::Details::SGS2) {
1353
1354 //KokkosKernels GaussSeidel Initialization.
1355 using scalar_view_t = typename local_matrix_device_type::values_type;
1356
1357 TEUCHOS_TEST_FOR_EXCEPTION
1358 (crsMat.is_null(), std::logic_error, methodName << ": "
1359 "Multithreaded Gauss-Seidel methods currently only work "
1360 "when the input matrix is a Tpetra::CrsMatrix.");
1361 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
1362
1363 //TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1364 //const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1365 auto diagView_2d = Diagonal_->getLocalViewDevice (Tpetra::Access::ReadWrite);
1366 scalar_view_t diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1367 KokkosSparse::Experimental::gauss_seidel_numeric(
1368 mtKernelHandle_.getRawPtr (),
1369 A_->getLocalNumRows (),
1370 A_->getLocalNumCols (),
1371 kcsr.graph.row_map,
1372 kcsr.graph.entries,
1373 kcsr.values,
1374 diagView_1d,
1375 is_matrix_structurally_symmetric_);
1376 }
1377 else if(PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1378 if(crsMat)
1379 serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1380 else
1381 serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1382 }
1383 } // end TimeMonitor scope
1384
1385 ComputeTime_ += (timer->wallTime() - startTime);
1386 ++NumCompute_;
1387 IsComputed_ = true;
1388}
1389
1390
1391template<class MatrixType>
1392void
1394ApplyInverseRichardson (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1395 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1396{
1397 using Teuchos::as;
1398 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1399 const double numVectors = as<double> (X.getNumVectors ());
1400 if (ZeroStartingSolution_) {
1401 // For the first Richardson sweep, if we are allowed to assume that
1402 // the initial guess is zero, then Richardson is just alpha times the RHS
1403 // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1404 Y.scale(DampingFactor_,X);
1405
1406 // Count (global) floating-point operations. Ifpack2 represents
1407 // this as a floating-point number rather than an integer, so that
1408 // overflow (for a very large number of calls, or a very large
1409 // problem) is approximate instead of catastrophic.
1410 double flopUpdate = 0.0;
1411 if (DampingFactor_ == STS::one ()) {
1412 // Y(i,j) = X(i,j): one multiply for each entry of Y.
1413 flopUpdate = numGlobalRows * numVectors;
1414 } else {
1415 // Y(i,j) = DampingFactor_ * X(i,j):
1416 // One multiplies per entry of Y.
1417 flopUpdate = numGlobalRows * numVectors;
1418 }
1419 ApplyFlops_ += flopUpdate;
1420 if (NumSweeps_ == 1) {
1421 return;
1422 }
1423 }
1424 // If we were allowed to assume that the starting guess was zero,
1425 // then we have already done the first sweep above.
1426 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1427
1428 // Allocate a multivector if the cached one isn't perfect
1429 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1430
1431 for (int j = startSweep; j < NumSweeps_; ++j) {
1432 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1433 Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1434 Y.update(DampingFactor_,*cachedMV_,STS::one());
1435 }
1436
1437 // For each column of output, for each pass over the matrix:
1438 //
1439 // - One + and one * for each matrix entry
1440 // - One / and one + for each row of the matrix
1441 // - If the damping factor is not one: one * for each row of the
1442 // matrix. (It's not fair to count this if the damping factor is
1443 // one, since the implementation could skip it. Whether it does
1444 // or not is the implementation's choice.)
1445
1446 // Floating-point operations due to the damping factor, per matrix
1447 // row, per direction, per columm of output.
1448 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1449 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1450 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1451 (2.0 * numGlobalNonzeros + dampingFlops);
1452}
1453
1454
1455template<class MatrixType>
1456void
1458ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1459 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1460{
1461 using Teuchos::as;
1462 if (hasBlockCrsMatrix_) {
1463 ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1464 return;
1465 }
1466
1467 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1468 const double numVectors = as<double> (X.getNumVectors ());
1469 if (ZeroStartingSolution_) {
1470 // For the first Jacobi sweep, if we are allowed to assume that
1471 // the initial guess is zero, then Jacobi is just diagonal
1472 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1473 // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1474 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1475
1476 // Count (global) floating-point operations. Ifpack2 represents
1477 // this as a floating-point number rather than an integer, so that
1478 // overflow (for a very large number of calls, or a very large
1479 // problem) is approximate instead of catastrophic.
1480 double flopUpdate = 0.0;
1481 if (DampingFactor_ == STS::one ()) {
1482 // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1483 flopUpdate = numGlobalRows * numVectors;
1484 } else {
1485 // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1486 // Two multiplies per entry of Y.
1487 flopUpdate = 2.0 * numGlobalRows * numVectors;
1488 }
1489 ApplyFlops_ += flopUpdate;
1490 if (NumSweeps_ == 1) {
1491 return;
1492 }
1493 }
1494 // If we were allowed to assume that the starting guess was zero,
1495 // then we have already done the first sweep above.
1496 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1497
1498 // Allocate a multivector if the cached one isn't perfect
1499 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1500
1501 for (int j = startSweep; j < NumSweeps_; ++j) {
1502 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1503 Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1504 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1505 }
1506
1507 // For each column of output, for each pass over the matrix:
1508 //
1509 // - One + and one * for each matrix entry
1510 // - One / and one + for each row of the matrix
1511 // - If the damping factor is not one: one * for each row of the
1512 // matrix. (It's not fair to count this if the damping factor is
1513 // one, since the implementation could skip it. Whether it does
1514 // or not is the implementation's choice.)
1515
1516 // Floating-point operations due to the damping factor, per matrix
1517 // row, per direction, per columm of output.
1518 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1519 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1520 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1521 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1522}
1523
1524template<class MatrixType>
1525void
1527ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1530 node_type>& X,
1531 Tpetra::MultiVector<scalar_type,
1534 node_type>& Y) const
1535{
1536 using Tpetra::BlockMultiVector;
1537 using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1539
1540 const block_crs_matrix_type* blockMatConst =
1541 dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1542 TEUCHOS_TEST_FOR_EXCEPTION
1543 (blockMatConst == nullptr, std::logic_error, "This method should "
1544 "never be called if the matrix A_ is not a BlockCrsMatrix. "
1545 "Please report this bug to the Ifpack2 developers.");
1546 // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1547 // This is because applyBlock() is nonconst (more accurate), while
1548 // apply() is const (required by Tpetra::Operator interface, but a
1549 // lie, because it possibly allocates temporary buffers).
1550 block_crs_matrix_type* blockMat =
1551 const_cast<block_crs_matrix_type*> (blockMatConst);
1552
1553 auto meshRowMap = blockMat->getRowMap ();
1554 auto meshColMap = blockMat->getColMap ();
1555 const local_ordinal_type blockSize = blockMat->getBlockSize ();
1556
1557 BMV X_blk (X, *meshColMap, blockSize);
1558 BMV Y_blk (Y, *meshRowMap, blockSize);
1559
1560 if (ZeroStartingSolution_) {
1561 // For the first sweep, if we are allowed to assume that the
1562 // initial guess is zero, then block Jacobi is just block diagonal
1563 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1564 Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1565 if (NumSweeps_ == 1) {
1566 return;
1567 }
1568 }
1569
1570 auto pointRowMap = Y.getMap ();
1571 const size_t numVecs = X.getNumVectors ();
1572
1573 // We don't need to initialize the result MV, since the sparse
1574 // mat-vec will clobber its contents anyway.
1575 BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1576
1577 // If we were allowed to assume that the starting guess was zero,
1578 // then we have already done the first sweep above.
1579 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1580
1581 for (int j = startSweep; j < NumSweeps_; ++j) {
1582 blockMat->applyBlock (Y_blk, A_times_Y);
1583 // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1584 Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1585 X_blk, A_times_Y, STS::one ());
1586 }
1587}
1588
1589template<class MatrixType>
1590void
1592ApplyInverseSerialGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1593 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1594 Tpetra::ESweepDirection direction) const
1595{
1596 using this_type = Relaxation<MatrixType>;
1597 // The CrsMatrix version is faster, because it can access the sparse
1598 // matrix data directly, rather than by copying out each row's data
1599 // in turn. Thus, we check whether the RowMatrix is really a
1600 // CrsMatrix.
1601 //
1602 // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1603 // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1604 // will still be correct if the cast fails, but it will use an
1605 // unoptimized kernel.
1606 auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
1607 auto crsMat = Details::getCrsMatrix(A_);
1608 if (blockCrsMat.get()) {
1609 const_cast<this_type&> (*this).ApplyInverseSerialGS_BlockCrsMatrix (*blockCrsMat, X, Y, direction);
1610 }
1611 else if (crsMat.get()) {
1612 ApplyInverseSerialGS_CrsMatrix (*crsMat, X, Y, direction);
1613 }
1614 else {
1615 ApplyInverseSerialGS_RowMatrix (X, Y, direction);
1616 }
1617}
1618
1619
1620template<class MatrixType>
1621void
1623ApplyInverseSerialGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1624 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1625 Tpetra::ESweepDirection direction) const {
1626 using Teuchos::Array;
1627 using Teuchos::ArrayRCP;
1628 using Teuchos::ArrayView;
1629 using Teuchos::as;
1630 using Teuchos::RCP;
1631 using Teuchos::rcp;
1632 using Teuchos::rcpFromRef;
1633 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1634
1635 // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1636 // starting multivector itself. The generic RowMatrix version here
1637 // does not, so we have to zero out Y here.
1638 if (ZeroStartingSolution_) {
1639 Y.putScalar (STS::zero ());
1640 }
1641
1642 size_t NumVectors = X.getNumVectors();
1643
1644 RCP<MV> Y2;
1645 if (IsParallel_) {
1646 if (Importer_.is_null ()) { // domain and column Maps are the same.
1647 updateCachedMultiVector (Y.getMap (), NumVectors);
1648 }
1649 else {
1650 updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
1651 }
1652 Y2 = cachedMV_;
1653 }
1654 else {
1655 Y2 = rcpFromRef (Y);
1656 }
1657
1658 for (int j = 0; j < NumSweeps_; ++j) {
1659 // data exchange is here, once per sweep
1660 if (IsParallel_) {
1661 if (Importer_.is_null ()) {
1662 // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1663 // clear if this is correct. Reevaluate at some point.
1664
1665 *Y2 = Y; // just copy, since domain and column Maps are the same
1666 } else {
1667 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1668 }
1669 }
1670 serialGaussSeidel_->apply(*Y2, X, direction);
1671
1672 // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1673 if (IsParallel_) {
1674 Tpetra::deep_copy (Y, *Y2);
1675 }
1676 }
1677
1678 // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1679 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1680 const double numVectors = as<double> (X.getNumVectors ());
1681 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1682 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1683 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1684 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1685}
1686
1687
1688template<class MatrixType>
1689void
1691ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1692 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1693 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1694 Tpetra::ESweepDirection direction) const
1695{
1696 using Teuchos::null;
1697 using Teuchos::RCP;
1698 using Teuchos::rcp;
1699 using Teuchos::rcpFromRef;
1700 using Teuchos::rcp_const_cast;
1701 typedef scalar_type Scalar;
1702 const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1703 const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1704
1705 TEUCHOS_TEST_FOR_EXCEPTION(
1706 ! A.isFillComplete (), std::runtime_error,
1707 prefix << "The matrix is not fill complete.");
1708 TEUCHOS_TEST_FOR_EXCEPTION(
1709 NumSweeps_ < 0, std::invalid_argument,
1710 prefix << "The number of sweeps must be nonnegative, "
1711 "but you provided numSweeps = " << NumSweeps_ << " < 0.");
1712
1713 if (NumSweeps_ == 0) {
1714 return;
1715 }
1716
1717 RCP<const import_type> importer = A.getGraph ()->getImporter ();
1718
1719 RCP<const map_type> domainMap = A.getDomainMap ();
1720 RCP<const map_type> rangeMap = A.getRangeMap ();
1721 RCP<const map_type> rowMap = A.getGraph ()->getRowMap ();
1722 RCP<const map_type> colMap = A.getGraph ()->getColMap ();
1723
1724#ifdef HAVE_IFPACK2_DEBUG
1725 {
1726 // The relation 'isSameAs' is transitive. It's also a
1727 // collective, so we don't have to do a "shared" test for
1728 // exception (i.e., a global reduction on the test value).
1729 TEUCHOS_TEST_FOR_EXCEPTION(
1730 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1731 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1732 "multivector X be in the domain Map of the matrix.");
1733 TEUCHOS_TEST_FOR_EXCEPTION(
1734 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1735 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1736 "B be in the range Map of the matrix.");
1737 TEUCHOS_TEST_FOR_EXCEPTION(
1738 ! Diagonal_->getMap ()->isSameAs (*rowMap), std::runtime_error,
1739 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1740 "D be in the row Map of the matrix.");
1741 TEUCHOS_TEST_FOR_EXCEPTION(
1742 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1743 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1744 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1745 TEUCHOS_TEST_FOR_EXCEPTION(
1746 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1747 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1748 "the range Map of the matrix be the same.");
1749 }
1750#endif
1751
1752 // Fetch a (possibly cached) temporary column Map multivector
1753 // X_colMap, and a domain Map view X_domainMap of it. Both have
1754 // constant stride by construction. We know that the domain Map
1755 // must include the column Map, because our Gauss-Seidel kernel
1756 // requires that the row Map, domain Map, and range Map are all
1757 // the same, and that each process owns all of its own diagonal
1758 // entries of the matrix.
1759
1760 RCP<multivector_type> X_colMap;
1761 RCP<multivector_type> X_domainMap;
1762 bool copyBackOutput = false;
1763 if (importer.is_null ()) {
1764 X_colMap = Teuchos::rcpFromRef (X);
1765 X_domainMap = Teuchos::rcpFromRef (X);
1766 // Column Map and domain Map are the same, so there are no
1767 // remote entries. Thus, if we are not setting the initial
1768 // guess to zero, we don't have to worry about setting remote
1769 // entries to zero, even though we are not doing an Import in
1770 // this case.
1771 if (ZeroStartingSolution_) {
1772 X_colMap->putScalar (ZERO);
1773 }
1774 // No need to copy back to X at end.
1775 }
1776 else { // Column Map and domain Map are _not_ the same.
1777 updateCachedMultiVector(colMap, X.getNumVectors());
1778 X_colMap = cachedMV_;
1779 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1780
1781 if (ZeroStartingSolution_) {
1782 // No need for an Import, since we're filling with zeros.
1783 X_colMap->putScalar (ZERO);
1784 } else {
1785 // We could just copy X into X_domainMap. However, that
1786 // wastes a copy, because the Import also does a copy (plus
1787 // communication). Since the typical use case for
1788 // Gauss-Seidel is a small number of sweeps (2 is typical), we
1789 // don't want to waste that copy. Thus, we do the Import
1790 // here, and skip the first Import in the first sweep.
1791 // Importing directly from X effects the copy into X_domainMap
1792 // (which is a view of X_colMap).
1793 X_colMap->doImport (X, *importer, Tpetra::INSERT);
1794 }
1795 copyBackOutput = true; // Don't forget to copy back at end.
1796 } // if column and domain Maps are (not) the same
1797
1798 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1799 if (! importer.is_null () && sweep > 0) {
1800 // We already did the first Import for the zeroth sweep above,
1801 // if it was necessary.
1802 X_colMap->doImport (*X_domainMap, *importer, Tpetra::INSERT);
1803 }
1804 // Do local Gauss-Seidel (forward, backward or symmetric)
1805 serialGaussSeidel_->apply(*X_colMap, B, direction);
1806 }
1807
1808 if (copyBackOutput) {
1809 try {
1810 deep_copy (X , *X_domainMap); // Copy result back into X.
1811 } catch (std::exception& e) {
1812 TEUCHOS_TEST_FOR_EXCEPTION(
1813 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1814 "threw an exception: " << e.what ());
1815 }
1816 }
1817
1818 // For each column of output, for each sweep over the matrix:
1819 //
1820 // - One + and one * for each matrix entry
1821 // - One / and one + for each row of the matrix
1822 // - If the damping factor is not one: one * for each row of the
1823 // matrix. (It's not fair to count this if the damping factor is
1824 // one, since the implementation could skip it. Whether it does
1825 // or not is the implementation's choice.)
1826
1827 // Floating-point operations due to the damping factor, per matrix
1828 // row, per direction, per columm of output.
1829 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1830 const double numVectors = X.getNumVectors ();
1831 const double numGlobalRows = A_->getGlobalNumRows ();
1832 const double numGlobalNonzeros = A_->getGlobalNumEntries ();
1833 ApplyFlops_ += NumSweeps_ * numVectors *
1834 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1835}
1836
1837template<class MatrixType>
1838void
1840ApplyInverseSerialGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1841 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1842 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1843 Tpetra::ESweepDirection direction)
1844{
1845 using Tpetra::INSERT;
1846 using Teuchos::RCP;
1847 using Teuchos::rcp;
1848 using Teuchos::rcpFromRef;
1849
1850 //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1851 //
1852 // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1853 // multiple right-hand sides, unless the input or output MultiVector
1854 // does not have constant stride. We should check for that case
1855 // here, in case it doesn't work in localGaussSeidel (which is
1856 // entirely possible).
1857 block_multivector_type yBlock(Y, *(A.getGraph ()->getDomainMap()), A.getBlockSize());
1858 const block_multivector_type xBlock(X, *(A.getColMap ()), A.getBlockSize());
1859
1860 bool performImport = false;
1861 RCP<block_multivector_type> yBlockCol;
1862 if (Importer_.is_null()) {
1863 yBlockCol = rcpFromRef(yBlock);
1864 }
1865 else {
1866 if (yBlockColumnPointMap_.is_null() ||
1867 yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1868 yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1869 yBlockColumnPointMap_ =
1870 rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1871 static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1872 }
1873 yBlockCol = yBlockColumnPointMap_;
1874 if (pointImporter_.is_null()) {
1875 auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1876 auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1877 pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1878 }
1879 performImport = true;
1880 }
1881
1882 multivector_type yBlock_mv;
1883 multivector_type yBlockCol_mv;
1884 RCP<const multivector_type> yBlockColPointDomain;
1885 if (performImport) { // create views (shallow copies)
1886 yBlock_mv = yBlock.getMultiVectorView();
1887 yBlockCol_mv = yBlockCol->getMultiVectorView();
1888 yBlockColPointDomain =
1889 yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1890 }
1891
1892 if (ZeroStartingSolution_) {
1893 yBlockCol->putScalar(STS::zero ());
1894 }
1895 else if (performImport) {
1896 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1897 }
1898
1899 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1900 if (performImport && sweep > 0) {
1901 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1902 }
1903 serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1904 if (performImport) {
1905 Tpetra::deep_copy(Y, *yBlockColPointDomain);
1906 }
1907 }
1908}
1909
1910template<class MatrixType>
1911void
1914 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1915 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1916 Tpetra::ESweepDirection direction) const
1917{
1918 using Teuchos::null;
1919 using Teuchos::RCP;
1920 using Teuchos::rcp;
1921 using Teuchos::rcpFromRef;
1922 using Teuchos::rcp_const_cast;
1923 using Teuchos::as;
1924
1925 typedef scalar_type Scalar;
1926
1927 const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1928 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1929
1930 auto crsMat = Details::getCrsMatrix(A_);
1931 TEUCHOS_TEST_FOR_EXCEPTION
1932 (crsMat.is_null(), std::logic_error, "Ifpack2::Relaxation::apply: "
1933 "Multithreaded Gauss-Seidel methods currently only work when the "
1934 "input matrix is a Tpetra::CrsMatrix.");
1935
1936 //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1937 TEUCHOS_TEST_FOR_EXCEPTION
1938 (! localSmoothingIndices_.is_null (), std::logic_error,
1939 "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1940 "implement the case where the user supplies an iteration order. "
1941 "This error used to appear as \"MT GaussSeidel ignores the given "
1942 "order\". "
1943 "I tried to add more explanation, but I didn't implement \"MT "
1944 "GaussSeidel\" [sic]. "
1945 "You'll have to ask the person who did.");
1946
1947 TEUCHOS_TEST_FOR_EXCEPTION
1948 (! crsMat->isFillComplete (), std::runtime_error, prefix << "The "
1949 "input CrsMatrix is not fill complete. Please call fillComplete "
1950 "on the matrix, then do setup again, before calling apply(). "
1951 "\"Do setup\" means that if only the matrix's values have changed "
1952 "since last setup, you need only call compute(). If the matrix's "
1953 "structure may also have changed, you must first call initialize(), "
1954 "then call compute(). If you have not set up this preconditioner "
1955 "for this matrix before, you must first call initialize(), then "
1956 "call compute().");
1957 TEUCHOS_TEST_FOR_EXCEPTION
1958 (NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = "
1959 << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1960 "developers.");
1961 if (NumSweeps_ == 0) {
1962 return;
1963 }
1964
1965 RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1966 TEUCHOS_TEST_FOR_EXCEPTION(
1967 ! crsMat->getGraph ()->getExporter ().is_null (), std::runtime_error,
1968 "This method's implementation currently requires that the matrix's row, "
1969 "domain, and range Maps be the same. This cannot be the case, because "
1970 "the matrix has a nontrivial Export object.");
1971
1972 RCP<const map_type> domainMap = crsMat->getDomainMap ();
1973 RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1974 RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1975 RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1976
1977#ifdef HAVE_IFPACK2_DEBUG
1978 {
1979 // The relation 'isSameAs' is transitive. It's also a
1980 // collective, so we don't have to do a "shared" test for
1981 // exception (i.e., a global reduction on the test value).
1982 TEUCHOS_TEST_FOR_EXCEPTION(
1983 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1984 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1985 "multivector X be in the domain Map of the matrix.");
1986 TEUCHOS_TEST_FOR_EXCEPTION(
1987 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1988 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1989 "B be in the range Map of the matrix.");
1990 TEUCHOS_TEST_FOR_EXCEPTION(
1991 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1992 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1993 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1994 TEUCHOS_TEST_FOR_EXCEPTION(
1995 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1996 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1997 "the range Map of the matrix be the same.");
1998 }
1999#else
2000 // Forestall any compiler warnings for unused variables.
2001 (void) rangeMap;
2002 (void) rowMap;
2003#endif // HAVE_IFPACK2_DEBUG
2004
2005 // Fetch a (possibly cached) temporary column Map multivector
2006 // X_colMap, and a domain Map view X_domainMap of it. Both have
2007 // constant stride by construction. We know that the domain Map
2008 // must include the column Map, because our Gauss-Seidel kernel
2009 // requires that the row Map, domain Map, and range Map are all
2010 // the same, and that each process owns all of its own diagonal
2011 // entries of the matrix.
2012
2013 RCP<multivector_type> X_colMap;
2014 RCP<multivector_type> X_domainMap;
2015 bool copyBackOutput = false;
2016 if (importer.is_null ()) {
2017 if (X.isConstantStride ()) {
2018 X_colMap = rcpFromRef (X);
2019 X_domainMap = rcpFromRef (X);
2020
2021 // Column Map and domain Map are the same, so there are no
2022 // remote entries. Thus, if we are not setting the initial
2023 // guess to zero, we don't have to worry about setting remote
2024 // entries to zero, even though we are not doing an Import in
2025 // this case.
2026 if (ZeroStartingSolution_) {
2027 X_colMap->putScalar (ZERO);
2028 }
2029 // No need to copy back to X at end.
2030 }
2031 else {
2032 // We must copy X into a constant stride multivector.
2033 // Just use the cached column Map multivector for that.
2034 // force=true means fill with zeros, so no need to fill
2035 // remote entries (not in domain Map) with zeros.
2036 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2037 X_colMap = cachedMV_;
2038 // X_domainMap is always a domain Map view of the column Map
2039 // multivector. In this case, the domain and column Maps are
2040 // the same, so X_domainMap _is_ X_colMap.
2041 X_domainMap = X_colMap;
2042 if (! ZeroStartingSolution_) { // Don't copy if zero initial guess
2043 try {
2044 deep_copy (*X_domainMap , X); // Copy X into constant stride MV
2045 } catch (std::exception& e) {
2046 std::ostringstream os;
2047 os << "Ifpack2::Relaxation::MTGaussSeidel: "
2048 "deep_copy(*X_domainMap, X) threw an exception: "
2049 << e.what () << ".";
2050 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2051 }
2052 }
2053 copyBackOutput = true; // Don't forget to copy back at end.
2054 /*
2055 TPETRA_EFFICIENCY_WARNING(
2056 ! X.isConstantStride (),
2057 std::runtime_error,
2058 "MTGaussSeidel: The current implementation of the Gauss-Seidel "
2059 "kernel requires that X and B both have constant stride. Since X "
2060 "does not have constant stride, we had to make a copy. This is a "
2061 "limitation of the current implementation and not your fault, but we "
2062 "still report it as an efficiency warning for your information.");
2063 */
2064 }
2065 }
2066 else { // Column Map and domain Map are _not_ the same.
2067 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2068 X_colMap = cachedMV_;
2069
2070 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
2071
2072 if (ZeroStartingSolution_) {
2073 // No need for an Import, since we're filling with zeros.
2074 X_colMap->putScalar (ZERO);
2075 } else {
2076 // We could just copy X into X_domainMap. However, that
2077 // wastes a copy, because the Import also does a copy (plus
2078 // communication). Since the typical use case for
2079 // Gauss-Seidel is a small number of sweeps (2 is typical), we
2080 // don't want to waste that copy. Thus, we do the Import
2081 // here, and skip the first Import in the first sweep.
2082 // Importing directly from X effects the copy into X_domainMap
2083 // (which is a view of X_colMap).
2084 X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
2085 }
2086 copyBackOutput = true; // Don't forget to copy back at end.
2087 } // if column and domain Maps are (not) the same
2088
2089 // The Gauss-Seidel / SOR kernel expects multivectors of constant
2090 // stride. X_colMap is by construction, but B might not be. If
2091 // it's not, we have to make a copy.
2092 RCP<const multivector_type> B_in;
2093 if (B.isConstantStride ()) {
2094 B_in = rcpFromRef (B);
2095 }
2096 else {
2097 // Range Map and row Map are the same in this case, so we can
2098 // use the cached row Map multivector to store a constant stride
2099 // copy of B.
2100 RCP<multivector_type> B_in_nonconst = rcp (new multivector_type(rowMap, B.getNumVectors()));
2101 try {
2102 deep_copy (*B_in_nonconst, B);
2103 } catch (std::exception& e) {
2104 std::ostringstream os;
2105 os << "Ifpack2::Relaxation::MTGaussSeidel: "
2106 "deep_copy(*B_in_nonconst, B) threw an exception: "
2107 << e.what () << ".";
2108 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2109 }
2110 B_in = rcp_const_cast<const multivector_type> (B_in_nonconst);
2111
2112 /*
2113 TPETRA_EFFICIENCY_WARNING(
2114 ! B.isConstantStride (),
2115 std::runtime_error,
2116 "MTGaussSeidel: The current implementation requires that B have "
2117 "constant stride. Since B does not have constant stride, we had to "
2118 "copy it into a separate constant-stride multivector. This is a "
2119 "limitation of the current implementation and not your fault, but we "
2120 "still report it as an efficiency warning for your information.");
2121 */
2122 }
2123
2124 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
2125
2126 bool update_y_vector = true;
2127 //false as it was done up already, and we dont want to zero it in each sweep.
2128 bool zero_x_vector = false;
2129
2130 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2131 if (! importer.is_null () && sweep > 0) {
2132 // We already did the first Import for the zeroth sweep above,
2133 // if it was necessary.
2134 X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2135 }
2136
2137 if (direction == Tpetra::Symmetric) {
2138 KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2139 (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2140 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2141 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2142 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2143 zero_x_vector, update_y_vector, DampingFactor_, 1);
2144 }
2145 else if (direction == Tpetra::Forward) {
2146 KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2147 (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2148 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2149 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2150 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2151 zero_x_vector, update_y_vector, DampingFactor_, 1);
2152 }
2153 else if (direction == Tpetra::Backward) {
2154 KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2155 (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2156 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2157 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2158 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2159 zero_x_vector, update_y_vector, DampingFactor_, 1);
2160 }
2161 else {
2162 TEUCHOS_TEST_FOR_EXCEPTION(
2163 true, std::invalid_argument,
2164 prefix << "The 'direction' enum does not have any of its valid "
2165 "values: Forward, Backward, or Symmetric.");
2166 }
2167 update_y_vector = false;
2168 }
2169
2170 if (copyBackOutput) {
2171 try {
2172 deep_copy (X , *X_domainMap); // Copy result back into X.
2173 } catch (std::exception& e) {
2174 TEUCHOS_TEST_FOR_EXCEPTION(
2175 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2176 "threw an exception: " << e.what ());
2177 }
2178 }
2179
2180 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2181 const double numVectors = as<double> (X.getNumVectors ());
2182 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2183 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2184 double ApplyFlops = NumSweeps_ * numVectors *
2185 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2186 if (direction == Tpetra::Symmetric)
2187 ApplyFlops *= 2.0;
2188 ApplyFlops_ += ApplyFlops;
2189}
2190
2191template<class MatrixType>
2193{
2194 std::ostringstream os;
2195
2196 // Output is a valid YAML dictionary in flow style. If you don't
2197 // like everything on a single line, you should call describe()
2198 // instead.
2199 os << "\"Ifpack2::Relaxation\": {";
2200
2201 os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2202 << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2203
2204 // It's useful to print this instance's relaxation method (Jacobi,
2205 // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2206 // than that, call describe() instead.
2207 os << "Type: ";
2208 if (PrecType_ == Ifpack2::Details::JACOBI) {
2209 os << "Jacobi";
2210 } else if (PrecType_ == Ifpack2::Details::GS) {
2211 os << "Gauss-Seidel";
2212 } else if (PrecType_ == Ifpack2::Details::SGS) {
2213 os << "Symmetric Gauss-Seidel";
2214 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2215 os << "MT Gauss-Seidel";
2216 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2217 os << "MT Symmetric Gauss-Seidel";
2218 } else if (PrecType_ == Ifpack2::Details::GS2) {
2219 os << "Two-stage Gauss-Seidel";
2220 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2221 os << "Two-stage Symmetric Gauss-Seidel";
2222 }
2223 else {
2224 os << "INVALID";
2225 }
2226 if(hasBlockCrsMatrix_)
2227 os<<", BlockCrs";
2228
2229 os << ", " << "sweeps: " << NumSweeps_ << ", "
2230 << "damping factor: " << DampingFactor_ << ", ";
2231
2232 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2233 os << "\"relaxation: mtgs cluster size\": " << clusterSize_ << ", ";
2234 os << "\"relaxation: long row threshold\": " << longRowThreshold_ << ", ";
2235 os << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << ", ";
2236 os << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2237 switch(mtColoringAlgorithm_)
2238 {
2239 case KokkosGraph::COLORING_DEFAULT:
2240 os << "DEFAULT"; break;
2241 case KokkosGraph::COLORING_SERIAL:
2242 os << "SERIAL"; break;
2243 case KokkosGraph::COLORING_VB:
2244 os << "VB"; break;
2245 case KokkosGraph::COLORING_VBBIT:
2246 os << "VBBIT"; break;
2247 case KokkosGraph::COLORING_VBCS:
2248 os << "VBCS"; break;
2249 case KokkosGraph::COLORING_VBD:
2250 os << "VBD"; break;
2251 case KokkosGraph::COLORING_VBDBIT:
2252 os << "VBDBIT"; break;
2253 case KokkosGraph::COLORING_EB:
2254 os << "EB"; break;
2255 default:
2256 os << "*Invalid*";
2257 }
2258 os << ", ";
2259 }
2260
2261 if (PrecType_ == Ifpack2::Details::GS2 ||
2262 PrecType_ == Ifpack2::Details::SGS2) {
2263 os << "outer sweeps: " << NumOuterSweeps_ << ", "
2264 << "inner sweeps: " << NumInnerSweeps_ << ", "
2265 << "inner damping factor: " << InnerDampingFactor_ << ", ";
2266 }
2267
2268 if (DoL1Method_) {
2269 os << "use l1: " << DoL1Method_ << ", "
2270 << "l1 eta: " << L1Eta_ << ", ";
2271 }
2272
2273 if (A_.is_null ()) {
2274 os << "Matrix: null";
2275 }
2276 else {
2277 os << "Global matrix dimensions: ["
2278 << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2279 << ", Global nnz: " << A_->getGlobalNumEntries();
2280 }
2281
2282 os << "}";
2283 return os.str ();
2284}
2285
2286
2287template<class MatrixType>
2288void
2290describe (Teuchos::FancyOStream &out,
2291 const Teuchos::EVerbosityLevel verbLevel) const
2292{
2293 using Teuchos::OSTab;
2294 using Teuchos::TypeNameTraits;
2295 using Teuchos::VERB_DEFAULT;
2296 using Teuchos::VERB_NONE;
2297 using Teuchos::VERB_LOW;
2298 using Teuchos::VERB_MEDIUM;
2299 using Teuchos::VERB_HIGH;
2300 using Teuchos::VERB_EXTREME;
2301 using std::endl;
2302
2303 const Teuchos::EVerbosityLevel vl =
2304 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2305
2306 const int myRank = this->getComm ()->getRank ();
2307
2308 // none: print nothing
2309 // low: print O(1) info from Proc 0
2310 // medium:
2311 // high:
2312 // extreme:
2313
2314 if (vl != VERB_NONE && myRank == 0) {
2315 // Describable interface asks each implementation to start with a tab.
2316 OSTab tab1 (out);
2317 // Output is valid YAML; hence the quotes, to protect the colons.
2318 out << "\"Ifpack2::Relaxation\":" << endl;
2319 OSTab tab2 (out);
2320 out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2321 << endl;
2322 if (this->getObjectLabel () != "") {
2323 out << "Label: " << this->getObjectLabel () << endl;
2324 }
2325 out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2326 << "Computed: " << (isComputed () ? "true" : "false") << endl
2327 << "Parameters: " << endl;
2328 {
2329 OSTab tab3 (out);
2330 out << "\"relaxation: type\": ";
2331 if (PrecType_ == Ifpack2::Details::JACOBI) {
2332 out << "Jacobi";
2333 } else if (PrecType_ == Ifpack2::Details::GS) {
2334 out << "Gauss-Seidel";
2335 } else if (PrecType_ == Ifpack2::Details::SGS) {
2336 out << "Symmetric Gauss-Seidel";
2337 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2338 out << "MT Gauss-Seidel";
2339 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2340 out << "MT Symmetric Gauss-Seidel";
2341 } else if (PrecType_ == Ifpack2::Details::GS2) {
2342 out << "Two-stage Gauss-Seidel";
2343 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2344 out << "Two-stage Symmetric Gauss-Seidel";
2345 } else {
2346 out << "INVALID";
2347 }
2348 // We quote these parameter names because they contain colons.
2349 // YAML uses the colon to distinguish key from value.
2350 out << endl
2351 << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2352 << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2353 << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2354 << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2355 << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2356 << "\"relaxation: use l1\": " << DoL1Method_ << endl
2357 << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2358 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2359 out << "\"relaxation: mtgs cluster size\": " << clusterSize_ << endl;
2360 out << "\"relaxation: long row threshold\": " << longRowThreshold_ << endl;
2361 out << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << endl;
2362 out << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2363 switch(mtColoringAlgorithm_)
2364 {
2365 case KokkosGraph::COLORING_DEFAULT:
2366 out << "DEFAULT"; break;
2367 case KokkosGraph::COLORING_SERIAL:
2368 out << "SERIAL"; break;
2369 case KokkosGraph::COLORING_VB:
2370 out << "VB"; break;
2371 case KokkosGraph::COLORING_VBBIT:
2372 out << "VBBIT"; break;
2373 case KokkosGraph::COLORING_VBCS:
2374 out << "VBCS"; break;
2375 case KokkosGraph::COLORING_VBD:
2376 out << "VBD"; break;
2377 case KokkosGraph::COLORING_VBDBIT:
2378 out << "VBDBIT"; break;
2379 case KokkosGraph::COLORING_EB:
2380 out << "EB"; break;
2381 default:
2382 out << "*Invalid*";
2383 }
2384 out << endl;
2385 }
2386 if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2387 out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2388 out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2389 out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2390 }
2391 }
2392 out << "Computed quantities:" << endl;
2393 {
2394 OSTab tab3 (out);
2395 out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2396 << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2397 }
2398 if (checkDiagEntries_ && isComputed ()) {
2399 out << "Properties of input diagonal entries:" << endl;
2400 {
2401 OSTab tab3 (out);
2402 out << "Magnitude of minimum-magnitude diagonal entry: "
2403 << globalMinMagDiagEntryMag_ << endl
2404 << "Magnitude of maximum-magnitude diagonal entry: "
2405 << globalMaxMagDiagEntryMag_ << endl
2406 << "Number of diagonal entries with small magnitude: "
2407 << globalNumSmallDiagEntries_ << endl
2408 << "Number of zero diagonal entries: "
2409 << globalNumZeroDiagEntries_ << endl
2410 << "Number of diagonal entries with negative real part: "
2411 << globalNumNegDiagEntries_ << endl
2412 << "Abs 2-norm diff between computed and actual inverse "
2413 << "diagonal: " << globalDiagNormDiff_ << endl;
2414 }
2415 }
2416 if (isComputed ()) {
2417 out << "Saved diagonal offsets: "
2418 << (savedDiagOffsets_ ? "true" : "false") << endl;
2419 }
2420 out << "Call counts and total times (in seconds): " << endl;
2421 {
2422 OSTab tab3 (out);
2423 out << "initialize: " << endl;
2424 {
2425 OSTab tab4 (out);
2426 out << "Call count: " << NumInitialize_ << endl;
2427 out << "Total time: " << InitializeTime_ << endl;
2428 }
2429 out << "compute: " << endl;
2430 {
2431 OSTab tab4 (out);
2432 out << "Call count: " << NumCompute_ << endl;
2433 out << "Total time: " << ComputeTime_ << endl;
2434 }
2435 out << "apply: " << endl;
2436 {
2437 OSTab tab4 (out);
2438 out << "Call count: " << NumApply_ << endl;
2439 out << "Total time: " << ApplyTime_ << endl;
2440 }
2441 }
2442 }
2443}
2444
2445
2446} // namespace Ifpack2
2447
2448#define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2449 template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2450
2451#endif // IFPACK2_RELAXATION_DEF_HPP
File for utility functions.
Compute scaled damped residual for Chebyshev.
Definition Ifpack2_Details_InverseDiagonalKernel_decl.hpp:77
Diagonal preconditioner.
Definition Ifpack2_Diagonal_decl.hpp:80
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition Ifpack2_Relaxation_decl.hpp:248
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition Ifpack2_Relaxation_def.hpp:214
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:260
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition Ifpack2_Relaxation_def.hpp:467
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:263
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition Ifpack2_Relaxation_def.hpp:477
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition Ifpack2_Relaxation_def.hpp:497
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition Ifpack2_Relaxation_def.hpp:556
int getNumCompute() const
Total number of calls to compute().
Definition Ifpack2_Relaxation_def.hpp:532
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition Ifpack2_Relaxation_def.hpp:488
void compute()
Compute the preconditioner ("numeric setup");.
Definition Ifpack2_Relaxation_def.hpp:1020
void initialize()
Initialize the preconditioner ("symbolic setup").
Definition Ifpack2_Relaxation_def.hpp:722
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition Ifpack2_Relaxation_def.hpp:550
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition Ifpack2_Relaxation_def.hpp:2290
std::string description() const
A simple one-line description of this object.
Definition Ifpack2_Relaxation_def.hpp:2192
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition Ifpack2_Relaxation_def.hpp:226
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition Ifpack2_Relaxation_def.hpp:544
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_Relaxation_def.hpp:193
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:254
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition Ifpack2_Relaxation_def.hpp:588
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:257
bool isComputed() const
Return true if compute() has been called.
Definition Ifpack2_Relaxation_decl.hpp:426
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition Ifpack2_Relaxation_def.hpp:701
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition Ifpack2_Relaxation_decl.hpp:273
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_Relaxation_decl.hpp:269
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition Ifpack2_Relaxation_def.hpp:575
int getNumApply() const
Total number of calls to apply().
Definition Ifpack2_Relaxation_def.hpp:538
int getNumInitialize() const
Total number of calls to initialize().
Definition Ifpack2_Relaxation_def.hpp:526
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition Ifpack2_Relaxation_def.hpp:562
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition Ifpack2_Relaxation_def.hpp:520
bool isInitialized() const
Returns true if the preconditioner has been successfully initialized.
Definition Ifpack2_Relaxation_decl.hpp:418
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition Ifpack2_Relaxation_def.hpp:510
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition Ifpack2_Relaxation_def.hpp:568
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:74
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition Ifpack2_Parameters.cpp:51