Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_RILUK_def.hpp
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ***********************************************************************
38//@HEADER
39*/
40
41#ifndef IFPACK2_CRSRILUK_DEF_HPP
42#define IFPACK2_CRSRILUK_DEF_HPP
43
44#include "Ifpack2_LocalFilter.hpp"
45#include "Tpetra_CrsMatrix.hpp"
46#include "Teuchos_StandardParameterEntryValidators.hpp"
47#include "Ifpack2_LocalSparseTriangularSolver.hpp"
48#include "Ifpack2_Details_getParamTryingTypes.hpp"
49#include "Ifpack2_Details_getCrsMatrix.hpp"
50#include "Kokkos_Sort.hpp"
51#include "KokkosSparse_Utils.hpp"
52#include "KokkosKernels_Sorting.hpp"
53
54namespace Ifpack2 {
55
56namespace Details {
57struct IlukImplType {
58 enum Enum {
59 Serial,
60 KSPILUK
61 };
62
63 static void loadPLTypeOption (Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
64 type_strs.resize(2);
65 type_strs[0] = "Serial";
66 type_strs[1] = "KSPILUK";
67 type_enums.resize(2);
68 type_enums[0] = Serial;
69 type_enums[1] = KSPILUK;
70 }
71};
72}
73
74template<class MatrixType>
75RILUK<MatrixType>::RILUK (const Teuchos::RCP<const row_matrix_type>& Matrix_in)
76 : A_ (Matrix_in),
77 LevelOfFill_ (0),
78 Overalloc_ (2.),
79 isAllocated_ (false),
80 isInitialized_ (false),
81 isComputed_ (false),
82 numInitialize_ (0),
83 numCompute_ (0),
84 numApply_ (0),
85 initializeTime_ (0.0),
86 computeTime_ (0.0),
87 applyTime_ (0.0),
88 RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
89 Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
90 Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
92{
93 allocateSolvers();
94}
95
96
97template<class MatrixType>
98RILUK<MatrixType>::RILUK (const Teuchos::RCP<const crs_matrix_type>& Matrix_in)
99 : A_ (Matrix_in),
100 LevelOfFill_ (0),
101 Overalloc_ (2.),
102 isAllocated_ (false),
103 isInitialized_ (false),
104 isComputed_ (false),
105 numInitialize_ (0),
106 numCompute_ (0),
107 numApply_ (0),
108 initializeTime_ (0.0),
109 computeTime_ (0.0),
110 applyTime_ (0.0),
111 RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
112 Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
113 Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
115{
116 allocateSolvers();
117}
118
119
120template<class MatrixType>
122{
123 if (Teuchos::nonnull (KernelHandle_))
124 {
125 KernelHandle_->destroy_spiluk_handle();
126 }
127}
128
129template<class MatrixType>
130void RILUK<MatrixType>::allocateSolvers ()
131{
132 L_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> ());
133 L_solver_->setObjectLabel("lower");
134 U_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> ());
135 U_solver_->setObjectLabel("upper");
136}
137
138template<class MatrixType>
139void
140RILUK<MatrixType>::setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
141{
142 // It's legal for A to be null; in that case, you may not call
143 // initialize() until calling setMatrix() with a nonnull input.
144 // Regardless, setting the matrix invalidates any previous
145 // factorization.
146 if (A.getRawPtr () != A_.getRawPtr ()) {
147 isAllocated_ = false;
148 isInitialized_ = false;
149 isComputed_ = false;
150 A_local_ = Teuchos::null;
151 Graph_ = Teuchos::null;
152
153 // The sparse triangular solvers get a triangular factor as their
154 // input matrix. The triangular factors L_ and U_ are getting
155 // reset, so we reset the solvers' matrices to null. Do that
156 // before setting L_ and U_ to null, so that latter step actually
157 // frees the factors.
158 if (! L_solver_.is_null ()) {
159 L_solver_->setMatrix (Teuchos::null);
160 }
161 if (! U_solver_.is_null ()) {
162 U_solver_->setMatrix (Teuchos::null);
163 }
164
165 L_ = Teuchos::null;
166 U_ = Teuchos::null;
167 D_ = Teuchos::null;
168 A_ = A;
169 }
170}
171
172
173
174template<class MatrixType>
177{
178 TEUCHOS_TEST_FOR_EXCEPTION(
179 L_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
180 "is null. Please call initialize() (and preferably also compute()) "
181 "before calling this method. If the input matrix has not yet been set, "
182 "you must first call setMatrix() with a nonnull input matrix before you "
183 "may call initialize() or compute().");
184 return *L_;
185}
186
187
188template<class MatrixType>
189const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
194{
195 TEUCHOS_TEST_FOR_EXCEPTION(
196 D_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
197 "(of diagonal entries) is null. Please call initialize() (and "
198 "preferably also compute()) before calling this method. If the input "
199 "matrix has not yet been set, you must first call setMatrix() with a "
200 "nonnull input matrix before you may call initialize() or compute().");
201 return *D_;
202}
203
204
205template<class MatrixType>
208{
209 TEUCHOS_TEST_FOR_EXCEPTION(
210 U_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
211 "is null. Please call initialize() (and preferably also compute()) "
212 "before calling this method. If the input matrix has not yet been set, "
213 "you must first call setMatrix() with a nonnull input matrix before you "
214 "may call initialize() or compute().");
215 return *U_;
216}
217
218
219template<class MatrixType>
221 TEUCHOS_TEST_FOR_EXCEPTION(
222 A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getNodeSmootherComplexity: "
223 "The input matrix A is null. Please call setMatrix() with a nonnull "
224 "input matrix, then call compute(), before calling this method.");
225 // RILUK methods cost roughly one apply + the nnz in the upper+lower triangles
226 if(!L_.is_null() && !U_.is_null())
227 return A_->getLocalNumEntries() + L_->getLocalNumEntries() + U_->getLocalNumEntries();
228 else
229 return 0;
230}
231
232
233template<class MatrixType>
234Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
238 TEUCHOS_TEST_FOR_EXCEPTION(
239 A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getDomainMap: "
240 "The matrix is null. Please call setMatrix() with a nonnull input "
241 "before calling this method.");
242
243 // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
244 TEUCHOS_TEST_FOR_EXCEPTION(
245 Graph_.is_null (), std::runtime_error, "Ifpack2::RILUK::getDomainMap: "
246 "The computed graph is null. Please call initialize() before calling "
247 "this method.");
248 return Graph_->getL_Graph ()->getDomainMap ();
249}
250
251
252template<class MatrixType>
253Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
257 TEUCHOS_TEST_FOR_EXCEPTION(
258 A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getRangeMap: "
259 "The matrix is null. Please call setMatrix() with a nonnull input "
260 "before calling this method.");
261
262 // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
263 TEUCHOS_TEST_FOR_EXCEPTION(
264 Graph_.is_null (), std::runtime_error, "Ifpack2::RILUK::getRangeMap: "
265 "The computed graph is null. Please call initialize() before calling "
266 "this method.");
267 return Graph_->getL_Graph ()->getRangeMap ();
268}
269
270
271template<class MatrixType>
272void RILUK<MatrixType>::allocate_L_and_U ()
273{
274 using Teuchos::null;
275 using Teuchos::rcp;
276
277 if (! isAllocated_) {
278 // Deallocate any existing storage. This avoids storing 2x
279 // memory, since RCP op= does not deallocate until after the
280 // assignment.
281 L_ = null;
282 U_ = null;
283 D_ = null;
284
285 // Allocate Matrix using ILUK graphs
286 L_ = rcp (new crs_matrix_type (Graph_->getL_Graph ()));
287 U_ = rcp (new crs_matrix_type (Graph_->getU_Graph ()));
288 L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
289 U_->setAllToScalar (STS::zero ());
290
291 // FIXME (mfh 24 Jan 2014) This assumes domain == range Map for L and U.
292 L_->fillComplete ();
293 U_->fillComplete ();
294 D_ = rcp (new vec_type (Graph_->getL_Graph ()->getRowMap ()));
295 }
296 isAllocated_ = true;
297}
298
299
300template<class MatrixType>
301void
303setParameters (const Teuchos::ParameterList& params)
304{
305 using Teuchos::RCP;
306 using Teuchos::ParameterList;
307 using Teuchos::Array;
308 using Details::getParamTryingTypes;
309 const char prefix[] = "Ifpack2::RILUK: ";
310
311 // Default values of the various parameters.
312 int fillLevel = 0;
313 magnitude_type absThresh = STM::zero ();
314 magnitude_type relThresh = STM::one ();
315 magnitude_type relaxValue = STM::zero ();
316 double overalloc = 2.;
317
318 // "fact: iluk level-of-fill" parsing is more complicated, because
319 // we want to allow as many types as make sense. int is the native
320 // type, but we also want to accept double (for backwards
321 // compatibilty with ILUT). You can't cast arbitrary magnitude_type
322 // (e.g., Sacado::MP::Vector) to int, so we use float instead, to
323 // get coverage of the most common magnitude_type cases. Weirdly,
324 // there's an Ifpack2 test that sets the fill level as a
325 // global_ordinal_type.
326 {
327 const std::string paramName ("fact: iluk level-of-fill");
328 getParamTryingTypes<int, int, global_ordinal_type, double, float>
329 (fillLevel, params, paramName, prefix);
330 }
331 // For the other parameters, we prefer magnitude_type, but allow
332 // double for backwards compatibility.
333 {
334 const std::string paramName ("fact: absolute threshold");
335 getParamTryingTypes<magnitude_type, magnitude_type, double>
336 (absThresh, params, paramName, prefix);
337 }
338 {
339 const std::string paramName ("fact: relative threshold");
340 getParamTryingTypes<magnitude_type, magnitude_type, double>
341 (relThresh, params, paramName, prefix);
342 }
344 const std::string paramName ("fact: relax value");
345 getParamTryingTypes<magnitude_type, magnitude_type, double>
346 (relaxValue, params, paramName, prefix);
347 }
348 {
349 const std::string paramName ("fact: iluk overalloc");
350 getParamTryingTypes<double, double>
351 (overalloc, params, paramName, prefix);
352 }
354 // Parsing implementation type
355 Details::IlukImplType::Enum ilukimplType = Details::IlukImplType::Serial;
356 do {
357 static const char typeName[] = "fact: type";
358
359 if ( ! params.isType<std::string>(typeName)) break;
360
361 // Map std::string <-> IlukImplType::Enum.
362 Array<std::string> ilukimplTypeStrs;
363 Array<Details::IlukImplType::Enum> ilukimplTypeEnums;
364 Details::IlukImplType::loadPLTypeOption (ilukimplTypeStrs, ilukimplTypeEnums);
365 Teuchos::StringToIntegralParameterEntryValidator<Details::IlukImplType::Enum>
366 s2i(ilukimplTypeStrs (), ilukimplTypeEnums (), typeName, false);
367
368 ilukimplType = s2i.getIntegralValue(params.get<std::string>(typeName));
369 } while (0);
370
371 if (ilukimplType == Details::IlukImplType::KSPILUK) {
372 this->isKokkosKernelsSpiluk_ = true;
373 }
374 else {
375 this->isKokkosKernelsSpiluk_ = false;
376 }
377
378 // Forward to trisolvers.
379 L_solver_->setParameters(params);
380 U_solver_->setParameters(params);
381
382 // "Commit" the values only after validating all of them. This
383 // ensures that there are no side effects if this routine throws an
384 // exception.
385
386 LevelOfFill_ = fillLevel;
387 Overalloc_ = overalloc;
388
389 // mfh 28 Nov 2012: The previous code would not assign Athresh_,
390 // Rthresh_, or RelaxValue_, if the read-in value was -1. I don't
391 // know if keeping this behavior is correct, but I'll keep it just
392 // so as not to change previous behavior.
393
394 if (absThresh != -STM::one ()) {
395 Athresh_ = absThresh;
396 }
397 if (relThresh != -STM::one ()) {
398 Rthresh_ = relThresh;
399 }
400 if (relaxValue != -STM::one ()) {
401 RelaxValue_ = relaxValue;
402 }
403}
405
406template<class MatrixType>
407Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
409 return Teuchos::rcp_implicit_cast<const row_matrix_type> (A_);
410}
411
412
413template<class MatrixType>
414Teuchos::RCP<const typename RILUK<MatrixType>::crs_matrix_type>
416 return Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_, true);
417}
418
419
420template<class MatrixType>
421Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
422RILUK<MatrixType>::makeLocalFilter (const Teuchos::RCP<const row_matrix_type>& A)
423{
424 using Teuchos::RCP;
425 using Teuchos::rcp;
426 using Teuchos::rcp_dynamic_cast;
427 using Teuchos::rcp_implicit_cast;
428
429 // If A_'s communicator only has one process, or if its column and
430 // row Maps are the same, then it is already local, so use it
431 // directly.
432 if (A->getRowMap ()->getComm ()->getSize () == 1 ||
433 A->getRowMap ()->isSameAs (* (A->getColMap ()))) {
434 return A;
435 }
436
437 // If A_ is already a LocalFilter, then use it directly. This
438 // should be the case if RILUK is being used through
439 // AdditiveSchwarz, for example.
440 RCP<const LocalFilter<row_matrix_type> > A_lf_r =
441 rcp_dynamic_cast<const LocalFilter<row_matrix_type> > (A);
442 if (! A_lf_r.is_null ()) {
443 return rcp_implicit_cast<const row_matrix_type> (A_lf_r);
444 }
445 else {
446 // A_'s communicator has more than one process, its row Map and
447 // its column Map differ, and A_ is not a LocalFilter. Thus, we
448 // have to wrap it in a LocalFilter.
449 return rcp (new LocalFilter<row_matrix_type> (A));
450 }
451}
452
454template<class MatrixType>
456{
457 using Teuchos::RCP;
458 using Teuchos::rcp;
459 using Teuchos::rcp_const_cast;
460 using Teuchos::rcp_dynamic_cast;
461 using Teuchos::rcp_implicit_cast;
462 using Teuchos::Array;
463 using Teuchos::ArrayView;
464 typedef Tpetra::CrsGraph<local_ordinal_type,
466 node_type> crs_graph_type;
467 const char prefix[] = "Ifpack2::RILUK::initialize: ";
468
469 TEUCHOS_TEST_FOR_EXCEPTION
470 (A_.is_null (), std::runtime_error, prefix << "The matrix is null. Please "
471 "call setMatrix() with a nonnull input before calling this method.");
472 TEUCHOS_TEST_FOR_EXCEPTION
473 (! A_->isFillComplete (), std::runtime_error, prefix << "The matrix is not "
474 "fill complete. You may not invoke initialize() or compute() with this "
475 "matrix until the matrix is fill complete. If your matrix is a "
476 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
477 "range Maps, if appropriate) before calling this method.");
478
479 Teuchos::Time timer ("RILUK::initialize");
480 double startTime = timer.wallTime();
481 { // Start timing
482 Teuchos::TimeMonitor timeMon (timer);
483
484 // Calling initialize() means that the user asserts that the graph
485 // of the sparse matrix may have changed. We must not just reuse
486 // the previous graph in that case.
487 //
488 // Regarding setting isAllocated_ to false: Eventually, we may want
489 // some kind of clever memory reuse strategy, but it's always
490 // correct just to blow everything away and start over.
491 isInitialized_ = false;
492 isAllocated_ = false;
493 isComputed_ = false;
494 Graph_ = Teuchos::null;
495
496 A_local_ = makeLocalFilter (A_);
497 TEUCHOS_TEST_FOR_EXCEPTION(
498 A_local_.is_null (), std::logic_error, "Ifpack2::RILUK::initialize: "
499 "makeLocalFilter returned null; it failed to compute A_local. "
500 "Please report this bug to the Ifpack2 developers.");
501
502 // FIXME (mfh 24 Jan 2014, 26 Mar 2014) It would be more efficient
503 // to rewrite RILUK so that it works with any RowMatrix input, not
504 // just CrsMatrix. (That would require rewriting IlukGraph to
505 // handle a Tpetra::RowGraph.) However, to make it work for now,
506 // we just copy the input matrix if it's not a CrsMatrix.
507
508 if (this->isKokkosKernelsSpiluk_) {
509 this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
510 KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
511 A_local_->getLocalNumRows(),
512 2*A_local_->getLocalNumEntries()*(LevelOfFill_+1),
513 2*A_local_->getLocalNumEntries()*(LevelOfFill_+1) );
514 }
515
516 {
517 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(A_local_);
518 if(A_local_crs.is_null()) {
519 local_ordinal_type numRows = A_local_->getLocalNumRows();
520 Array<size_t> entriesPerRow(numRows);
521 for(local_ordinal_type i = 0; i < numRows; i++) {
522 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
523 }
524 RCP<crs_matrix_type> A_local_crs_nc =
525 rcp (new crs_matrix_type (A_local_->getRowMap (),
526 A_local_->getColMap (),
527 entriesPerRow()));
528 // copy entries into A_local_crs
529 nonconst_local_inds_host_view_type indices("indices",A_local_->getLocalMaxNumRowEntries());
530 nonconst_values_host_view_type values("values",A_local_->getLocalMaxNumRowEntries());
531 for(local_ordinal_type i = 0; i < numRows; i++) {
532 size_t numEntries = 0;
533 A_local_->getLocalRowCopy(i, indices, values, numEntries);
534 A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
535 }
536 A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
537 A_local_crs = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
538 }
539 Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type,kk_handle_type> (A_local_crs->getCrsGraph (),
540 LevelOfFill_, 0, Overalloc_));
541 }
542
543 // This calls spiluk_symbolic
544 if (this->isKokkosKernelsSpiluk_) Graph_->initialize (KernelHandle_);
545 else Graph_->initialize ();
546
547 allocate_L_and_U ();
548 checkOrderingConsistency (*A_local_);
549 L_solver_->setMatrix (L_);
550 L_solver_->initialize ();
551 //NOTE (Nov-09-2022):
552 //For Cuda >= 11.3 (using cusparseSpSV), skip trisolve computes here.
553 //Instead, call trisolve computes within RILUK compute
554#if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030)
555 L_solver_->compute ();//NOTE: It makes sense to do compute here because only the nonzero pattern is involved in trisolve compute
556#endif
557 U_solver_->setMatrix (U_);
558 U_solver_->initialize ();
559#if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030)
560 U_solver_->compute ();//NOTE: It makes sense to do compute here because only the nonzero pattern is involved in trisolve compute
561#endif
563 // Do not call initAllValues. compute() always calls initAllValues to
564 // fill L and U with possibly new numbers. initialize() is concerned
565 // only with the nonzero pattern.
566 } // Stop timing
567
568 isInitialized_ = true;
569 ++numInitialize_;
570 initializeTime_ += (timer.wallTime() - startTime);
571}
572
573template<class MatrixType>
574void
577{
578 // First check that the local row map ordering is the same as the local portion of the column map.
579 // The extraction of the strictly lower/upper parts of A, as well as the factorization,
580 // implicitly assume that this is the case.
581 Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getLocalElementList();
582 Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getLocalElementList();
583 bool gidsAreConsistentlyOrdered=true;
584 global_ordinal_type indexOfInconsistentGID=0;
585 for (global_ordinal_type i=0; i<rowGIDs.size(); ++i) {
586 if (rowGIDs[i] != colGIDs[i]) {
587 gidsAreConsistentlyOrdered=false;
588 indexOfInconsistentGID=i;
589 break;
590 }
591 }
592 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
593 "The ordering of the local GIDs in the row and column maps is not the same"
594 << std::endl << "at index " << indexOfInconsistentGID
595 << ". Consistency is required, as all calculations are done with"
596 << std::endl << "local indexing.");
597}
598
599template<class MatrixType>
600void
603{
604 using Teuchos::ArrayRCP;
605 using Teuchos::Comm;
606 using Teuchos::ptr;
607 using Teuchos::RCP;
608 using Teuchos::REDUCE_SUM;
609 using Teuchos::reduceAll;
610 typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
611
612 size_t NumIn = 0, NumL = 0, NumU = 0;
613 bool DiagFound = false;
614 size_t NumNonzeroDiags = 0;
615 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
616
617 // Allocate temporary space for extracting the strictly
618 // lower and upper parts of the matrix A.
619 nonconst_local_inds_host_view_type InI("InI",MaxNumEntries);
620 Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
621 Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
622 nonconst_values_host_view_type InV("InV",MaxNumEntries);
623 Teuchos::Array<scalar_type> LV(MaxNumEntries);
624 Teuchos::Array<scalar_type> UV(MaxNumEntries);
625
626 // Check if values should be inserted or replaced
627 const bool ReplaceValues = L_->isStaticGraph () || L_->isLocallyIndexed ();
628
629 L_->resumeFill ();
630 U_->resumeFill ();
631 if (ReplaceValues) {
632 L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
633 U_->setAllToScalar (STS::zero ());
634 }
635
636 D_->putScalar (STS::zero ()); // Set diagonal values to zero
637 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
638
639 RCP<const map_type> rowMap = L_->getRowMap ();
640
641 // First we copy the user's matrix into L and U, regardless of fill level.
642 // It is important to note that L and U are populated using local indices.
643 // This means that if the row map GIDs are not monotonically increasing
644 // (i.e., permuted or gappy), then the strictly lower (upper) part of the
645 // matrix is not the one that you would get if you based L (U) on GIDs.
646 // This is ok, as the *order* of the GIDs in the rowmap is a better
647 // expression of the user's intent than the GIDs themselves.
648
649 Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getLocalElementList();
650 for (size_t myRow=0; myRow<A.getLocalNumRows(); ++myRow) {
651 local_ordinal_type local_row = myRow;
652
653 //TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
654 // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
655 A.getLocalRowCopy (local_row, InI, InV, NumIn); // Get Values and Indices
656
657 // Split into L and U (we don't assume that indices are ordered).
658
659 NumL = 0;
660 NumU = 0;
661 DiagFound = false;
662
663 for (size_t j = 0; j < NumIn; ++j) {
664 const local_ordinal_type k = InI[j];
665
666 if (k == local_row) {
667 DiagFound = true;
668 // Store perturbed diagonal in Tpetra::Vector D_
669 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
670 }
671 else if (k < 0) { // Out of range
672 TEUCHOS_TEST_FOR_EXCEPTION(
673 true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
674 "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
675 "probably an artifact of the undocumented assumptions of the "
676 "original implementation (likely copied and pasted from Ifpack). "
677 "Nevertheless, the code I found here insisted on this being an error "
678 "state, so I will throw an exception here.");
679 }
680 else if (k < local_row) {
681 LI[NumL] = k;
682 LV[NumL] = InV[j];
683 NumL++;
684 }
685 else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
686 UI[NumU] = k;
687 UV[NumU] = InV[j];
688 NumU++;
689 }
690 }
691
692 // Check in things for this row of L and U
693
694 if (DiagFound) {
695 ++NumNonzeroDiags;
696 } else {
697 DV(local_row) = Athresh_;
698 }
699
700 if (NumL) {
701 if (ReplaceValues) {
702 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0,NumL));
703 } else {
704 //FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
705 //FIXME in this row in the column locations corresponding to UI.
706 L_->insertLocalValues(local_row, LI(0,NumL), LV(0,NumL));
707 }
708 }
709
710 if (NumU) {
711 if (ReplaceValues) {
712 U_->replaceLocalValues(local_row, UI(0,NumU), UV(0,NumU));
713 } else {
714 //FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
715 //FIXME in this row in the column locations corresponding to UI.
716 U_->insertLocalValues(local_row, UI(0,NumU), UV(0,NumU));
717 }
718 }
719 }
720
721 // At this point L and U have the values of A in the structure of L
722 // and U, and diagonal vector D
723
724 isInitialized_ = true;
725}
726
727
728template<class MatrixType>
730{
731 using Teuchos::RCP;
732 using Teuchos::rcp;
733 using Teuchos::rcp_const_cast;
734 using Teuchos::rcp_dynamic_cast;
735 using Teuchos::Array;
736 using Teuchos::ArrayView;
737 const char prefix[] = "Ifpack2::RILUK::compute: ";
738
739 // initialize() checks this too, but it's easier for users if the
740 // error shows them the name of the method that they actually
741 // called, rather than the name of some internally called method.
742 TEUCHOS_TEST_FOR_EXCEPTION
743 (A_.is_null (), std::runtime_error, prefix << "The matrix is null. Please "
744 "call setMatrix() with a nonnull input before calling this method.");
745 TEUCHOS_TEST_FOR_EXCEPTION
746 (! A_->isFillComplete (), std::runtime_error, prefix << "The matrix is not "
747 "fill complete. You may not invoke initialize() or compute() with this "
748 "matrix until the matrix is fill complete. If your matrix is a "
749 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
750 "range Maps, if appropriate) before calling this method.");
751
752 if (! isInitialized ()) {
753 initialize (); // Don't count this in the compute() time
754 }
755
756 Teuchos::Time timer ("RILUK::compute");
757
758 // Start timing
759 Teuchos::TimeMonitor timeMon (timer);
760 double startTime = timer.wallTime();
761
762 isComputed_ = false;
763
764 if (!this->isKokkosKernelsSpiluk_) {
765 // Fill L and U with numbers. This supports nonzero pattern reuse by calling
766 // initialize() once and then compute() multiple times.
767 initAllValues (*A_local_);
768
769 // MinMachNum should be officially defined, for now pick something a little
770 // bigger than IEEE underflow value
771
772 const scalar_type MinDiagonalValue = STS::rmin ();
773 const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
774
775 size_t NumIn, NumL, NumU;
776
777 // Get Maximum Row length
778 const size_t MaxNumEntries =
779 L_->getLocalMaxNumRowEntries () + U_->getLocalMaxNumRowEntries () + 1;
780
781 Teuchos::Array<local_ordinal_type> InI(MaxNumEntries); // Allocate temp space
782 Teuchos::Array<scalar_type> InV(MaxNumEntries);
783 size_t num_cols = U_->getColMap()->getLocalNumElements();
784 Teuchos::Array<int> colflag(num_cols);
785
786 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
787
788 // Now start the factorization.
789
790 for (size_t j = 0; j < num_cols; ++j) {
791 colflag[j] = -1;
792 }
793 using IST = typename row_matrix_type::impl_scalar_type;
794 for (size_t i = 0; i < L_->getLocalNumRows (); ++i) {
795 local_ordinal_type local_row = i;
796 // Need some integer workspace and pointers
797 size_t NumUU;
798 local_inds_host_view_type UUI;
799 values_host_view_type UUV;
800
801 // Fill InV, InI with current row of L, D and U combined
802
803 NumIn = MaxNumEntries;
804 nonconst_local_inds_host_view_type InI_v(InI.data(),MaxNumEntries);
805 nonconst_values_host_view_type InV_v(reinterpret_cast<IST*>(InV.data()),MaxNumEntries);
806
807 L_->getLocalRowCopy (local_row, InI_v , InV_v, NumL);
808
809 InV[NumL] = DV(i); // Put in diagonal
810 InI[NumL] = local_row;
811
812 nonconst_local_inds_host_view_type InI_sub(InI.data()+NumL+1,MaxNumEntries-NumL-1);
813 nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data())+NumL+1,MaxNumEntries-NumL-1);
814
815 U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU);
816 NumIn = NumL+NumU+1;
817
818 // Set column flags
819 for (size_t j = 0; j < NumIn; ++j) {
820 colflag[InI[j]] = j;
821 }
822
823 scalar_type diagmod = STS::zero (); // Off-diagonal accumulator
824
825 for (size_t jj = 0; jj < NumL; ++jj) {
826 local_ordinal_type j = InI[jj];
827 IST multiplier = InV[jj]; // current_mults++;
828
829 InV[jj] *= static_cast<scalar_type>(DV(j));
830
831 U_->getLocalRowView(j, UUI, UUV); // View of row above
832 NumUU = UUI.size();
833
834 if (RelaxValue_ == STM::zero ()) {
835 for (size_t k = 0; k < NumUU; ++k) {
836 const int kk = colflag[UUI[k]];
837 // FIXME (mfh 23 Dec 2013) Wait a second, we just set
838 // colflag above using size_t (which is generally unsigned),
839 // but now we're querying it using int (which is signed).
840 if (kk > -1) {
841 InV[kk] -= static_cast<scalar_type>(multiplier * UUV[k]);
842 }
843 }
844
845 }
846 else {
847 for (size_t k = 0; k < NumUU; ++k) {
848 // FIXME (mfh 23 Dec 2013) Wait a second, we just set
849 // colflag above using size_t (which is generally unsigned),
850 // but now we're querying it using int (which is signed).
851 const int kk = colflag[UUI[k]];
852 if (kk > -1) {
853 InV[kk] -= static_cast<scalar_type>(multiplier*UUV[k]);
854 }
855 else {
856 diagmod -= static_cast<scalar_type>(multiplier*UUV[k]);
857 }
858 }
859 }
860 }
861
862 if (NumL) {
863 // Replace current row of L
864 L_->replaceLocalValues (local_row, InI (0, NumL), InV (0, NumL));
865 }
866
867 DV(i) = InV[NumL]; // Extract Diagonal value
868
869 if (RelaxValue_ != STM::zero ()) {
870 DV(i) += RelaxValue_*diagmod; // Add off diagonal modifications
871 }
872
873 if (STS::magnitude (DV(i)) > STS::magnitude (MaxDiagonalValue)) {
874 if (STS::real (DV(i)) < STM::zero ()) {
875 DV(i) = -MinDiagonalValue;
876 }
877 else {
878 DV(i) = MinDiagonalValue;
879 }
880 }
881 else {
882 DV(i) = static_cast<impl_scalar_type>(STS::one ()) / DV(i); // Invert diagonal value
883 }
884
885 for (size_t j = 0; j < NumU; ++j) {
886 InV[NumL+1+j] *= static_cast<scalar_type>(DV(i)); // Scale U by inverse of diagonal
887 }
888
889 if (NumU) {
890 // Replace current row of L and U
891 U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU));
892 }
893
894 // Reset column flags
895 for (size_t j = 0; j < NumIn; ++j) {
896 colflag[InI[j]] = -1;
897 }
898 }
899
900 // The domain of L and the range of U are exactly their own row maps
901 // (there is no communication). The domain of U and the range of L
902 // must be the same as those of the original matrix, However if the
903 // original matrix is a VbrMatrix, these two latter maps are
904 // translation from a block map to a point map.
905 // FIXME (mfh 23 Dec 2013) Do we know that the column Map of L_ is
906 // always one-to-one?
907 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
908 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
909
910 // If L_solver_ or U_solver store modified factors internally, we need to reset those
911 L_solver_->setMatrix (L_);
912 L_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
913 U_solver_->setMatrix (U_);
914 U_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
915 }
916 else {
917 {//Make sure values in A is picked up even in case of pattern reuse
918 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(A_local_);
919 if(A_local_crs.is_null()) {
920 local_ordinal_type numRows = A_local_->getLocalNumRows();
921 Array<size_t> entriesPerRow(numRows);
922 for(local_ordinal_type i = 0; i < numRows; i++) {
923 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
924 }
925 RCP<crs_matrix_type> A_local_crs_nc =
926 rcp (new crs_matrix_type (A_local_->getRowMap (),
927 A_local_->getColMap (),
928 entriesPerRow()));
929 // copy entries into A_local_crs
930 nonconst_local_inds_host_view_type indices("indices",A_local_->getLocalMaxNumRowEntries());
931 nonconst_values_host_view_type values("values",A_local_->getLocalMaxNumRowEntries());
932 for(local_ordinal_type i = 0; i < numRows; i++) {
933 size_t numEntries = 0;
934 A_local_->getLocalRowCopy(i, indices, values, numEntries);
935 A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()),indices.data());
936 }
937 A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
938 A_local_crs = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
939 }
940 auto lclMtx = A_local_crs->getLocalMatrixDevice();
941 A_local_rowmap_ = lclMtx.graph.row_map;
942 A_local_entries_ = lclMtx.graph.entries;
943 A_local_values_ = lclMtx.values;
944 }
945
946 L_->resumeFill ();
947 U_->resumeFill ();
948
949 if (L_->isStaticGraph () || L_->isLocallyIndexed ()) {
950 L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
951 U_->setAllToScalar (STS::zero ());
952 }
953
954 using row_map_type = typename crs_matrix_type::local_matrix_device_type::row_map_type;
955
956 {
957 auto lclL = L_->getLocalMatrixDevice();
958 row_map_type L_rowmap = lclL.graph.row_map;
959 auto L_entries = lclL.graph.entries;
960 auto L_values = lclL.values;
961
962 auto lclU = U_->getLocalMatrixDevice();
963 row_map_type U_rowmap = lclU.graph.row_map;
964 auto U_entries = lclU.graph.entries;
965 auto U_values = lclU.values;
966
967 KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
968 A_local_rowmap_, A_local_entries_, A_local_values_,
969 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
970 }
971
972 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
973 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
974
975 L_solver_->setMatrix (L_);
976 L_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
977 U_solver_->setMatrix (U_);
978 U_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
979 }
980
981 isComputed_ = true;
982 ++numCompute_;
983 computeTime_ += (timer.wallTime() - startTime);
984}
985
986
987template<class MatrixType>
988void
990apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
991 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
992 Teuchos::ETransp mode,
993 scalar_type alpha,
994 scalar_type beta) const
995{
996 using Teuchos::RCP;
997 using Teuchos::rcpFromRef;
998
999 TEUCHOS_TEST_FOR_EXCEPTION(
1000 A_.is_null (), std::runtime_error, "Ifpack2::RILUK::apply: The matrix is "
1001 "null. Please call setMatrix() with a nonnull input, then initialize() "
1002 "and compute(), before calling this method.");
1003 TEUCHOS_TEST_FOR_EXCEPTION(
1004 ! isComputed (), std::runtime_error,
1005 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1006 "you must call compute() before calling this method.");
1007 TEUCHOS_TEST_FOR_EXCEPTION(
1008 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
1009 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1010 "X.getNumVectors() = " << X.getNumVectors ()
1011 << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
1012 TEUCHOS_TEST_FOR_EXCEPTION(
1013 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1014 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1015 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1016 "fixed. There is a FIXME in this file about this very issue.");
1017#ifdef HAVE_IFPACK2_DEBUG
1018 {
1019 const magnitude_type D_nrm1 = D_->norm1 ();
1020 TEUCHOS_TEST_FOR_EXCEPTION( STM::isnaninf (D_nrm1), std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1021 Teuchos::Array<magnitude_type> norms (X.getNumVectors ());
1022 X.norm1 (norms ());
1023 bool good = true;
1024 for (size_t j = 0; j < X.getNumVectors (); ++j) {
1025 if (STM::isnaninf (norms[j])) {
1026 good = false;
1027 break;
1028 }
1029 }
1030 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1031 }
1032#endif // HAVE_IFPACK2_DEBUG
1033
1034 const scalar_type one = STS::one ();
1035 const scalar_type zero = STS::zero ();
1036
1037 Teuchos::Time timer ("RILUK::apply");
1038 double startTime = timer.wallTime();
1039 { // Start timing
1040 Teuchos::TimeMonitor timeMon (timer);
1041 if (alpha == one && beta == zero) {
1042 if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
1043#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1044 //NOTE (Nov-15-2022):
1045 //This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
1046 //since cusparseSpSV_solve() does not support in-place computation
1047 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1048
1049 // Start by solving L Y_tmp = X for Y_tmp.
1050 L_solver_->apply (X, Y_tmp, mode);
1051
1052 if (!this->isKokkosKernelsSpiluk_) {
1053 // Solve D Y = Y. The operation lets us do this in place in Y, so we can
1054 // write "solve D Y = Y for Y."
1055 Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero);
1056 }
1057
1058 U_solver_->apply (Y_tmp, Y, mode); // Solve U Y = Y_tmp.
1059#else
1060 // Start by solving L Y = X for Y.
1061 L_solver_->apply (X, Y, mode);
1062
1063 if (!this->isKokkosKernelsSpiluk_) {
1064 // Solve D Y = Y. The operation lets us do this in place in Y, so we can
1065 // write "solve D Y = Y for Y."
1066 Y.elementWiseMultiply (one, *D_, Y, zero);
1067 }
1068
1069 U_solver_->apply (Y, Y, mode); // Solve U Y = Y.
1070#endif
1071 }
1072 else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1073#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1074 //NOTE (Nov-15-2022):
1075 //This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
1076 //since cusparseSpSV_solve() does not support in-place computation
1077 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1078
1079 // Start by solving U^P Y_tmp = X for Y_tmp.
1080 U_solver_->apply (X, Y_tmp, mode);
1081
1082 if (!this->isKokkosKernelsSpiluk_) {
1083 // Solve D^P Y = Y.
1084 //
1085 // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1086 // need to do an elementwise multiply with the conjugate of
1087 // D_, not just with D_ itself.
1088 Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero);
1089 }
1090
1091 L_solver_->apply (Y_tmp, Y, mode); // Solve L^P Y = Y_tmp.
1092#else
1093 // Start by solving U^P Y = X for Y.
1094 U_solver_->apply (X, Y, mode);
1095
1096 if (!this->isKokkosKernelsSpiluk_) {
1097 // Solve D^P Y = Y.
1098 //
1099 // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1100 // need to do an elementwise multiply with the conjugate of
1101 // D_, not just with D_ itself.
1102 Y.elementWiseMultiply (one, *D_, Y, zero);
1103 }
1104
1105 L_solver_->apply (Y, Y, mode); // Solve L^P Y = Y.
1106#endif
1107 }
1108 }
1109 else { // alpha != 1 or beta != 0
1110 if (alpha == zero) {
1111 // The special case for beta == 0 ensures that if Y contains Inf
1112 // or NaN values, we replace them with 0 (following BLAS
1113 // convention), rather than multiplying them by 0 to get NaN.
1114 if (beta == zero) {
1115 Y.putScalar (zero);
1116 } else {
1117 Y.scale (beta);
1118 }
1119 } else { // alpha != zero
1120 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1121 apply (X, Y_tmp, mode);
1122 Y.update (alpha, Y_tmp, beta);
1123 }
1124 }
1125 }//end timing
1126
1127#ifdef HAVE_IFPACK2_DEBUG
1128 {
1129 Teuchos::Array<magnitude_type> norms (Y.getNumVectors ());
1130 Y.norm1 (norms ());
1131 bool good = true;
1132 for (size_t j = 0; j < Y.getNumVectors (); ++j) {
1133 if (STM::isnaninf (norms[j])) {
1134 good = false;
1135 break;
1136 }
1137 }
1138 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1139 }
1140#endif // HAVE_IFPACK2_DEBUG
1141
1142 ++numApply_;
1143 applyTime_ += (timer.wallTime() - startTime);
1144}
1145
1146
1147//VINH comment out since multiply() is not needed anywhere
1148//template<class MatrixType>
1149//void RILUK<MatrixType>::
1150//multiply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1151// Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1152// const Teuchos::ETransp mode) const
1153//{
1154// const scalar_type zero = STS::zero ();
1155// const scalar_type one = STS::one ();
1156//
1157// if (mode != Teuchos::NO_TRANS) {
1158// U_->apply (X, Y, mode); //
1159// Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1160//
1161// // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we need
1162// // to do an elementwise multiply with the conjugate of D_, not
1163// // just with D_ itself.
1164// Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1165//
1166// MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y
1167// L_->apply (Y_tmp, Y, mode);
1168// Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1169// }
1170// else {
1171// L_->apply (X, Y, mode);
1172// Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1173// Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1174// MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y1
1175// U_->apply (Y_tmp, Y, mode);
1176// Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1177// }
1178//}
1179
1180template<class MatrixType>
1182{
1183 std::ostringstream os;
1184
1185 // Output is a valid YAML dictionary in flow style. If you don't
1186 // like everything on a single line, you should call describe()
1187 // instead.
1188 os << "\"Ifpack2::RILUK\": {";
1189 os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
1190 << "Computed: " << (isComputed () ? "true" : "false") << ", ";
1191
1192 os << "Level-of-fill: " << getLevelOfFill() << ", ";
1193
1194 if (A_.is_null ()) {
1195 os << "Matrix: null";
1196 }
1197 else {
1198 os << "Global matrix dimensions: ["
1199 << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
1200 << ", Global nnz: " << A_->getGlobalNumEntries();
1201 }
1202
1203 if (! L_solver_.is_null ()) os << ", " << L_solver_->description ();
1204 if (! U_solver_.is_null ()) os << ", " << U_solver_->description ();
1205
1206 os << "}";
1207 return os.str ();
1208}
1209
1210} // namespace Ifpack2
1211
1212#define IFPACK2_RILUK_INSTANT(S,LO,GO,N) \
1213 template class Ifpack2::RILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
1214
1215#endif
Access only local rows and columns of a sparse matrix.
Definition Ifpack2_LocalFilter_decl.hpp:163
"Preconditioner" that solves local sparse triangular systems.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:88
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition Ifpack2_RILUK_decl.hpp:254
crs_matrix_type::impl_scalar_type impl_scalar_type
Scalar type stored in Kokkos::Views (CrsMatrix and MultiVector).
Definition Ifpack2_RILUK_decl.hpp:293
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition Ifpack2_RILUK_def.hpp:455
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition Ifpack2_RILUK_decl.hpp:290
virtual ~RILUK()
Destructor (declared virtual for memory safety).
Definition Ifpack2_RILUK_def.hpp:121
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition Ifpack2_RILUK_def.hpp:990
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_RILUK_decl.hpp:269
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition Ifpack2_RILUK_def.hpp:256
Teuchos::RCP< const row_matrix_type > A_
The (original) input matrix for which to compute ILU(k).
Definition Ifpack2_RILUK_decl.hpp:592
Teuchos::RCP< crs_matrix_type > U_
The U (upper triangular) factor of ILU(k).
Definition Ifpack2_RILUK_decl.hpp:611
const crs_matrix_type & getU() const
Return the U factor of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:207
std::string description() const
A one-line description of this object.
Definition Ifpack2_RILUK_def.hpp:1181
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_RILUK_def.hpp:140
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:257
bool isComputed() const
Whether compute() has been called on this object.
Definition Ifpack2_RILUK_decl.hpp:373
const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > & getD() const
Return the diagonal entries of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:193
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:266
Teuchos::RCP< const row_matrix_type > A_local_
The matrix whos numbers are used to to compute ILU(k). The graph may be computed using a crs_matrix_t...
Definition Ifpack2_RILUK_decl.hpp:601
Teuchos::RCP< const row_matrix_type > getMatrix() const
Get the input matrix.
Definition Ifpack2_RILUK_def.hpp:408
Teuchos::RCP< LocalSparseTriangularSolver< row_matrix_type > > L_solver_
Sparse triangular solver for L.
Definition Ifpack2_RILUK_decl.hpp:609
Teuchos::RCP< Ifpack2::IlukGraph< Tpetra::CrsGraph< local_ordinal_type, global_ordinal_type, node_type >, kk_handle_type > > Graph_
The ILU(k) graph.
Definition Ifpack2_RILUK_decl.hpp:597
int getLevelOfFill() const
Get level of fill (the "k" in ILU(k)).
Definition Ifpack2_RILUK_decl.hpp:535
Teuchos::RCP< const crs_matrix_type > getCrsMatrix() const
Return the input matrix A as a Tpetra::CrsMatrix, if possible; else throws.
Definition Ifpack2_RILUK_def.hpp:415
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition Ifpack2_RILUK_def.hpp:237
void setParameters(const Teuchos::ParameterList &params)
Definition Ifpack2_RILUK_def.hpp:303
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:260
Teuchos::RCP< crs_matrix_type > L_
The L (lower triangular) factor of ILU(k).
Definition Ifpack2_RILUK_decl.hpp:607
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:263
const crs_matrix_type & getL() const
Return the L factor of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:176
Teuchos::RCP< LocalSparseTriangularSolver< row_matrix_type > > U_solver_
Sparse triangular solver for U.
Definition Ifpack2_RILUK_decl.hpp:613
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition Ifpack2_RILUK_def.hpp:220
void compute()
Compute the (numeric) incomplete factorization.
Definition Ifpack2_RILUK_def.hpp:729
Teuchos::RCP< vec_type > D_
The diagonal entries of the ILU(k) factorization.
Definition Ifpack2_RILUK_decl.hpp:616
bool isKokkosKernelsSpiluk_
Optional KokkosKernels implementation.
Definition Ifpack2_RILUK_decl.hpp:638
bool isInitialized() const
Whether initialize() has been called on this object.
Definition Ifpack2_RILUK_decl.hpp:369
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:74