Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_AdditiveSchwarzFilter_def.hpp
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41*/
42
43#ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
44#define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
45
46#include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp"
47#include "KokkosKernels_Sorting.hpp"
48#include "KokkosSparse_SortCrs.hpp"
49#include "Kokkos_Bitset.hpp"
50
51namespace Ifpack2
52{
53namespace Details
54{
55
56template<class MatrixType>
57AdditiveSchwarzFilter<MatrixType>::
58AdditiveSchwarzFilter(const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
59 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
60 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
61 bool filterSingletons)
62{
63 setup(A_unfiltered, perm, reverseperm, filterSingletons);
64}
65
66template<class MatrixType>
67void AdditiveSchwarzFilter<MatrixType>::
68setup(const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
69 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
70 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
71 bool filterSingletons)
72{
73 using Teuchos::RCP;
74 using Teuchos::rcp;
75 using Teuchos::rcp_dynamic_cast;
76 //Check that A is an instance of allowed types, either:
77 // - Tpetra::CrsMatrix
78 // - Ifpack2::OverlappingRowMatrix (which always consists of two CrsMatrices, A_ and ExtMatrix_)
79 TEUCHOS_TEST_FOR_EXCEPTION(
80 !rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered) &&
81 !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered),
82 std::invalid_argument,
83 "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix");
84 A_unfiltered_ = A_unfiltered;
85 local_matrix_type A_local, A_halo;
86 bool haveHalo = !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
87 if(haveHalo)
88 {
89 auto A_overlapping = rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
90 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
91 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
92 }
93 else
94 {
95 auto A_crs = rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
96 A_local = A_crs->getLocalMatrixDevice();
97 //leave A_halo empty in this case
98 }
99 //Check that perm and reversePerm are the same length and match the number of local rows in A
100 TEUCHOS_TEST_FOR_EXCEPTION(
101 perm.size() != reverseperm.size(),
102 std::invalid_argument,
103 "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length");
104 TEUCHOS_TEST_FOR_EXCEPTION(
105 (size_t) perm.size() != (size_t) A_unfiltered_->getLocalNumRows(),
106 std::invalid_argument,
107 "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A");
108 //First, compute the permutation tables (that exclude singletons, if requested)
109 FilterSingletons_ = filterSingletons;
110 local_ordinal_type numLocalRows;
111 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
112 if(FilterSingletons_)
113 {
114 //Fill singletons and singletonDiagonals (allocate them to the upper bound at first, then shrink it to size)
115 singletons_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletons_"), totalLocalRows);
116 singletonDiagonals_ = Kokkos::DualView<impl_scalar_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletonDiagonals_"), totalLocalRows);
117 auto singletonsDevice = singletons_.view_device();
118 singletons_.modify_device();
119 auto singletonDiagonalsDevice = singletonDiagonals_.view_device();
120 singletonDiagonals_.modify_device();
121 local_ordinal_type numSingletons;
122 Kokkos::Bitset<device_type> isSingletonBitset(totalLocalRows);
123 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
124 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lnumSingletons, bool finalPass)
125 {
126 bool isSingleton = true;
127 impl_scalar_type singletonValue = Kokkos::ArithTraits<impl_scalar_type>::zero();
128 if(i < A_local.numRows())
129 {
130 //row i is in original local matrix
131 size_type rowBegin = A_local.graph.row_map(i);
132 size_type rowEnd = A_local.graph.row_map(i + 1);
133 for(size_type j = rowBegin; j < rowEnd; j++)
134 {
135 local_ordinal_type entry = A_local.graph.entries(j);
136 if(entry < totalLocalRows && entry != i)
137 {
138 isSingleton = false;
139 break;
140 }
141 if(finalPass && entry == i)
142 {
143 //note: using a sum to compute the diagonal value, in case entries are not compressed.
144 singletonValue += A_local.values(j);
145 }
146 }
147 }
148 else
149 {
150 //row i is in halo
151 local_ordinal_type row = i - A_local.numRows();
152 size_type rowBegin = A_halo.graph.row_map(row);
153 size_type rowEnd = A_halo.graph.row_map(row + 1);
154 for(size_type j = rowBegin; j < rowEnd; j++)
155 {
156 local_ordinal_type entry = A_halo.graph.entries(j);
157 if(entry < totalLocalRows && entry != i)
158 {
159 isSingleton = false;
160 break;
161 }
162 if(finalPass && entry == i)
163 {
164 singletonValue += A_halo.values(j);
165 }
166 }
167 }
168 if(isSingleton)
169 {
170 if(finalPass)
171 {
172 isSingletonBitset.set(i);
173 singletonsDevice(lnumSingletons) = i;
174 singletonDiagonalsDevice(lnumSingletons) = singletonValue;
175 }
176 lnumSingletons++;
177 }
178 }, numSingletons);
179 numSingletons_ = numSingletons;
180 //Each local row in A_unfiltered is either a singleton or a row in the filtered matrix.
181 numLocalRows = totalLocalRows - numSingletons_;
182 //Using the list of singletons, create the reduced permutations.
183 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), totalLocalRows);
184 perm_.modify_device();
185 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), numLocalRows);
186 reverseperm_.modify_device();
187 auto permView = perm_.view_device();
188 auto reversepermView = reverseperm_.view_device();
189 //Get the original inverse permutation on device
190 Kokkos::View<local_ordinal_type*, device_type> origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "input reverse perm"), totalLocalRows);
191 Kokkos::View<local_ordinal_type*, Kokkos::HostSpace> origpermHost(reverseperm.get(), totalLocalRows);
192 Kokkos::deep_copy(execution_space(), origpermView, origpermHost);
193 //reverseperm is a list of local rows in A_unfiltered, so filter out the elements of reverseperm where isSingleton is true. Then regenerate the forward permutation.
194 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
195 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lindex, bool finalPass)
196 {
197 local_ordinal_type origRow = origpermView(i);
198 if(!isSingletonBitset.test(origRow))
199 {
200 if(finalPass)
201 {
202 //mark the mapping in both directions between origRow and lindex (a row in filtered A)
203 reversepermView(lindex) = origRow;
204 permView(origRow) = lindex;
205 }
206 lindex++;
207 }
208 else
209 {
210 //origRow is a singleton, so mark a -1 in the new forward permutation to show that it has no corresponding row in filtered A.
211 if(finalPass)
212 permView(origRow) = local_ordinal_type(-1);
213 }
214 });
215 perm_.sync_host();
216 reverseperm_.sync_host();
217 }
218 else
219 {
220 //We will keep all the local rows, so use the permutation as is
221 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), perm.size());
222 perm_.modify_host();
223 auto permHost = perm_.view_host();
224 for(size_t i = 0; i < (size_t) perm.size(); i++)
225 permHost(i) = perm[i];
226 perm_.sync_device();
227 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_"), reverseperm.size());
228 reverseperm_.modify_host();
229 auto reversepermHost = reverseperm_.view_host();
230 for(size_t i = 0; i < (size_t) reverseperm.size(); i++)
231 reversepermHost(i) = reverseperm[i];
232 reverseperm_.sync_device();
233 numSingletons_ = 0;
234 numLocalRows = totalLocalRows;
235 }
236 auto permView = perm_.view_device();
237 auto reversepermView = reverseperm_.view_device();
238 //Fill the local matrix of A_ (filtered and permuted)
239 //First, construct the rowmap by counting the entries in each row
240 row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered rowptrs"), numLocalRows + 1);
241 Kokkos::parallel_for(policy_type(0, numLocalRows + 1),
242 KOKKOS_LAMBDA(local_ordinal_type i)
243 {
244 if(i == numLocalRows)
245 {
246 localRowptrs(i) = 0;
247 return;
248 }
249 //Count the entries that will be in filtered row i.
250 //This means entries which correspond to local, non-singleton rows/columns.
251 local_ordinal_type numEntries = 0;
252 local_ordinal_type origRow = reversepermView(i);
253 if(origRow < A_local.numRows())
254 {
255 //row i is in original local matrix
256 size_type rowBegin = A_local.graph.row_map(origRow);
257 size_type rowEnd = A_local.graph.row_map(origRow + 1);
258 for(size_type j = rowBegin; j < rowEnd; j++)
259 {
260 local_ordinal_type entry = A_local.graph.entries(j);
261 if(entry < totalLocalRows && permView(entry) != -1)
262 numEntries++;
263 }
264 }
265 else
266 {
267 //row i is in halo
268 local_ordinal_type row = origRow - A_local.numRows();
269 size_type rowBegin = A_halo.graph.row_map(row);
270 size_type rowEnd = A_halo.graph.row_map(row + 1);
271 for(size_type j = rowBegin; j < rowEnd; j++)
272 {
273 local_ordinal_type entry = A_halo.graph.entries(j);
274 if(entry < totalLocalRows && permView(entry) != -1)
275 numEntries++;
276 }
277 }
278 localRowptrs(i) = numEntries;
279 });
280 //Prefix sum to finish computing the rowptrs
281 size_type numLocalEntries;
282 Kokkos::parallel_scan(policy_type(0, numLocalRows + 1),
283 KOKKOS_LAMBDA(local_ordinal_type i, size_type& lnumEntries, bool finalPass)
284 {
285 size_type numEnt = localRowptrs(i);
286 if(finalPass)
287 localRowptrs(i) = lnumEntries;
288 lnumEntries += numEnt;
289 }, numLocalEntries);
290 //Allocate and fill local entries and values
291 entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered entries"), numLocalEntries);
292 values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered values"), numLocalEntries);
293 //Create the local matrix with the uninitialized entries/values, then fill it.
294 local_matrix_type localMatrix("AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries);
295 fillLocalMatrix(localMatrix);
296 //Create a serial comm and the map for the final filtered CrsMatrix (each process uses its own local map)
297#ifdef HAVE_IFPACK2_DEBUG
298 TEUCHOS_TEST_FOR_EXCEPTION(
299 ! mapPairsAreFitted (*A_unfiltered_), std::invalid_argument, "Ifpack2::LocalFilter: "
300 "A's Map pairs are not fitted to each other on Process "
301 << A_->getRowMap ()->getComm ()->getRank () << " of the input matrix's "
302 "communicator. "
303 "This means that LocalFilter does not currently know how to work with A. "
304 "This will change soon. Please see discussion of Bug 5992.");
305#endif // HAVE_IFPACK2_DEBUG
306
307 // Build the local communicator (containing this process only).
308 RCP<const Teuchos::Comm<int> > localComm;
309#ifdef HAVE_MPI
310 localComm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF));
311#else
312 localComm = rcp (new Teuchos::SerialComm<int> ());
313#endif // HAVE_MPI
314 //All 4 maps are the same for a local square operator.
315 localMap_ = rcp (new map_type (numLocalRows, 0, localComm, Tpetra::GloballyDistributed));
316 //Create the inner filtered matrix.
317 auto crsParams = rcp(new Teuchos::ParameterList);
318 crsParams->template set<bool>("sorted", true);
319 //NOTE: this is the fastest way to construct A_ - it's created as fillComplete,
320 //and no communication needs to be done since localMap_ uses a local comm.
321 //It does need to copy the whole local matrix to host when DualViews are constructed
322 //(only an issue with non-UVM GPU backends) but this is just unavoidable when creating a Tpetra::CrsMatrix.
323 //It also needs to compute local constants (maxNumRowEntries) but this should be a
324 //cheap operation relative to what this constructor already did.
325 A_ = rcp(new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams));
326}
327
328template<class MatrixType>
329AdditiveSchwarzFilter<MatrixType>::~AdditiveSchwarzFilter () {}
330
331template<class MatrixType>
332void AdditiveSchwarzFilter<MatrixType>::updateMatrixValues()
333{
334 //Get the local matrix back from A_
335 auto localMatrix = A_->getLocalMatrixDevice();
336 //Copy new values from A_unfiltered to the local matrix, and then reconstruct A_.
337 fillLocalMatrix(localMatrix);
338 A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values);
339}
340
341template<class MatrixType>
342Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::crs_matrix_type>
343AdditiveSchwarzFilter<MatrixType>::getFilteredMatrix() const
344{
345 return A_;
346}
347
348template<class MatrixType>
349void AdditiveSchwarzFilter<MatrixType>::fillLocalMatrix(typename AdditiveSchwarzFilter<MatrixType>::local_matrix_type localMatrix)
350{
351 auto localRowptrs = localMatrix.graph.row_map;
352 auto localEntries = localMatrix.graph.entries;
353 auto localValues = localMatrix.values;
354 auto permView = perm_.view_device();
355 auto reversepermView = reverseperm_.view_device();
356 local_matrix_type A_local, A_halo;
357 bool haveHalo = !Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
358 if(haveHalo)
359 {
360 auto A_overlapping = Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
361 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
362 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
363 }
364 else
365 {
366 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
367 A_local = A_crs->getLocalMatrixDevice();
368 //leave A_halo empty in this case
369 }
370 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
371 //Fill entries and values
372 Kokkos::parallel_for(policy_type(0, localMatrix.numRows()),
373 KOKKOS_LAMBDA(local_ordinal_type i)
374 {
375 //Count the entries that will be in filtered row i.
376 //This means entries which correspond to local, non-singleton rows/columns.
377 size_type outRowStart = localRowptrs(i);
378 local_ordinal_type insertPos = 0;
379 local_ordinal_type origRow = reversepermView(i);
380 if(origRow < A_local.numRows())
381 {
382 //row i is in original local matrix
383 size_type rowBegin = A_local.graph.row_map(origRow);
384 size_type rowEnd = A_local.graph.row_map(origRow + 1);
385 for(size_type j = rowBegin; j < rowEnd; j++)
386 {
387 local_ordinal_type entry = A_local.graph.entries(j);
388 if(entry < totalLocalRows)
389 {
390 auto newCol = permView(entry);
391 if(newCol != -1)
392 {
393 localEntries(outRowStart + insertPos) = newCol;
394 localValues(outRowStart + insertPos) = A_local.values(j);
395 insertPos++;
396 }
397 }
398 }
399 }
400 else
401 {
402 //row i is in halo
403 local_ordinal_type row = origRow - A_local.numRows();
404 size_type rowBegin = A_halo.graph.row_map(row);
405 size_type rowEnd = A_halo.graph.row_map(row + 1);
406 for(size_type j = rowBegin; j < rowEnd; j++)
407 {
408 local_ordinal_type entry = A_halo.graph.entries(j);
409 if(entry < totalLocalRows)
410 {
411 auto newCol = permView(entry);
412 if(newCol != -1)
413 {
414 localEntries(outRowStart + insertPos) = newCol;
415 localValues(outRowStart + insertPos) = A_halo.values(j);
416 insertPos++;
417 }
418 }
419 }
420 }
421 });
422 //Sort the matrix, since the reordering can shuffle the entries into any order.
423 KokkosSparse::sort_crs_matrix(localMatrix);
424}
425
426template<class MatrixType>
427Teuchos::RCP<const Teuchos::Comm<int> > AdditiveSchwarzFilter<MatrixType>::getComm() const
428{
429 return localMap_->getComm();
430}
431
432template<class MatrixType>
433Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
434AdditiveSchwarzFilter<MatrixType>::getRowMap() const
435{
436 return localMap_;
437}
438
439template<class MatrixType>
440Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
441AdditiveSchwarzFilter<MatrixType>::getColMap() const
442{
443 return localMap_;
444}
445
446template<class MatrixType>
447Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
448AdditiveSchwarzFilter<MatrixType>::getDomainMap() const
449{
450 return localMap_;
451}
452
453template<class MatrixType>
454Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
455AdditiveSchwarzFilter<MatrixType>::getRangeMap() const
456{
457 return localMap_;
458}
459
460template<class MatrixType>
461Teuchos::RCP<const Tpetra::RowGraph<typename MatrixType::local_ordinal_type,
462 typename MatrixType::global_ordinal_type,
463 typename MatrixType::node_type> >
464AdditiveSchwarzFilter<MatrixType>::getGraph() const
465{
466 //NOTE BMK 6-22: this is to maintain compatibilty with LocalFilter.
467 //Situations like overlapping AdditiveSchwarz + BlockRelaxation
468 //require the importer of the original distributed graph, even though the
469 //BlockRelaxation is preconditioning a local matrix (A_).
470 return A_unfiltered_->getGraph();
471}
472
473template<class MatrixType>
474typename MatrixType::local_ordinal_type AdditiveSchwarzFilter<MatrixType>::getBlockSize() const
475{
476 return A_->getBlockSize();
477}
478
479template<class MatrixType>
480global_size_t AdditiveSchwarzFilter<MatrixType>::getGlobalNumRows() const
481{
482 return A_->getGlobalNumRows();
483}
484
485template<class MatrixType>
486global_size_t AdditiveSchwarzFilter<MatrixType>::getGlobalNumCols() const
487{
488 return A_->getGlobalNumCols();
489}
490
491template<class MatrixType>
492size_t AdditiveSchwarzFilter<MatrixType>::getLocalNumRows() const
493{
494 return A_->getLocalNumRows();
495}
496
497template<class MatrixType>
498size_t AdditiveSchwarzFilter<MatrixType>::getLocalNumCols() const
499{
500 return A_->getLocalNumCols();
501}
502
503template<class MatrixType>
504typename MatrixType::global_ordinal_type AdditiveSchwarzFilter<MatrixType>::getIndexBase() const
505{
506 return A_->getIndexBase();
507}
508
509template<class MatrixType>
510global_size_t AdditiveSchwarzFilter<MatrixType>::getGlobalNumEntries() const
511{
512 return getLocalNumEntries();
513}
514
515template<class MatrixType>
516size_t AdditiveSchwarzFilter<MatrixType>::getLocalNumEntries() const
517{
518 return A_->getLocalNumEntries();
519}
520
521template<class MatrixType>
522size_t AdditiveSchwarzFilter<MatrixType>::
523getNumEntriesInGlobalRow (global_ordinal_type globalRow) const
524{
525 return A_->getNumEntriesInGlobalRow(globalRow);
526}
527
528template<class MatrixType>
529size_t AdditiveSchwarzFilter<MatrixType>::
530getNumEntriesInLocalRow (local_ordinal_type localRow) const
531{
532 return A_->getNumEntriesInLocalRow(localRow);
533}
534
535template<class MatrixType>
536size_t AdditiveSchwarzFilter<MatrixType>::getGlobalMaxNumRowEntries() const
537{
538 //Use A_unfiltered_ to get a valid upper bound for this.
539 //This lets us support this function without computing global constants on A_.
540 return A_unfiltered_->getGlobalMaxNumRowEntries();
541}
542
543template<class MatrixType>
544size_t AdditiveSchwarzFilter<MatrixType>::getLocalMaxNumRowEntries() const
545{
546 //Use A_unfiltered_ to get a valid upper bound for this
547 return A_unfiltered_->getLocalMaxNumRowEntries();
548}
549
550
551template<class MatrixType>
552bool AdditiveSchwarzFilter<MatrixType>::hasColMap() const
553{
554 return true;
555}
556
557
558template<class MatrixType>
559bool AdditiveSchwarzFilter<MatrixType>::isLocallyIndexed() const
560{
561 return true;
562}
563
564
565template<class MatrixType>
566bool AdditiveSchwarzFilter<MatrixType>::isGloballyIndexed() const
567{
568 return false;
569}
570
571
572template<class MatrixType>
573bool AdditiveSchwarzFilter<MatrixType>::isFillComplete() const
574{
575 return true;
576}
577
578
579template<class MatrixType>
580void AdditiveSchwarzFilter<MatrixType>::
581 getGlobalRowCopy (global_ordinal_type globalRow,
582 nonconst_global_inds_host_view_type &globalInd,
583 nonconst_values_host_view_type &val,
584 size_t& numEntries) const
585{
586 throw std::runtime_error("Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy.");
587}
588
589template<class MatrixType>
590void AdditiveSchwarzFilter<MatrixType>::
591getLocalRowCopy (local_ordinal_type LocalRow,
592 nonconst_local_inds_host_view_type &Indices,
593 nonconst_values_host_view_type &Values,
594 size_t& NumEntries) const
595
596{
597 A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries);
598}
599
600template<class MatrixType>
601void AdditiveSchwarzFilter<MatrixType>::getGlobalRowView(global_ordinal_type /* GlobalRow */,
602 global_inds_host_view_type &/*indices*/,
603 values_host_view_type &/*values*/) const
604{
605 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView.");
606}
607
608template<class MatrixType>
609void AdditiveSchwarzFilter<MatrixType>::getLocalRowView(local_ordinal_type LocalRow,
610 local_inds_host_view_type & indices,
611 values_host_view_type & values) const
612{
613 A_->getLocalRowView(LocalRow, indices, values);
614}
615
616template<class MatrixType>
617void AdditiveSchwarzFilter<MatrixType>::
618getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &diag) const
619{
620 // This is somewhat dubious as to how the maps match.
621 A_->getLocalDiagCopy(diag);
622}
623
624template<class MatrixType>
625void AdditiveSchwarzFilter<MatrixType>::leftScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
626{
627 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support leftScale.");
628}
629
630template<class MatrixType>
631void AdditiveSchwarzFilter<MatrixType>::rightScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
632{
633 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support rightScale.");
634}
635
636template<class MatrixType>
637void AdditiveSchwarzFilter<MatrixType>::
638apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
639 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
640 Teuchos::ETransp mode,
641 scalar_type alpha,
642 scalar_type beta) const
643{
644 A_->apply(X, Y, mode, alpha, beta);
645}
646
647template<class MatrixType>
648void AdditiveSchwarzFilter<MatrixType>::
649CreateReducedProblem(
650 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingB,
651 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY,
652 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedB) const
653{
654 //NOTE: the three vectors here are always constructed within AdditiveSchwarz and are not subviews,
655 //so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
656 TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(),
657 std::logic_error, "Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride.");
658 local_ordinal_type numVecs = OverlappingB.getNumVectors();
659 auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly);
660 auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll);
661 auto singletons = singletons_.view_device();
662 auto singletonDiags = singletonDiagonals_.view_device();
663 auto revperm = reverseperm_.view_device();
664 //First, solve the singletons.
665 {
666 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
667 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) numSingletons_, numVecs}),
668 KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col)
669 {
670 local_ordinal_type row = singletons(singletonIndex);
671 y(row, col) = b(row, col) / singletonDiags(singletonIndex);
672 });
673 }
674 //Next, permute OverlappingB to ReducedReorderedB.
675 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
676 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
677 {
678 reducedB(row, col) = b(revperm(row), col);
679 });
680 //Finally, if there are singletons, eliminate the matrix entries which are in singleton columns ("eliminate" here means update reducedB like in row reduction)
681 //This could be done more efficiently by storing a separate matrix of just the singleton columns and permuted non-singleton rows, but this adds a lot of complexity.
682 //Instead, just apply() the unfiltered matrix to OverlappingY (which is 0, except for singletons), and then permute the result of that and subtract it from reducedB.
683 if(numSingletons_)
684 {
685 mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs, false);
686 A_unfiltered_->apply(OverlappingY, singletonUpdates);
687 auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly);
688 // auto revperm = reverseperm_.view_device();
689 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
690 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
691 {
692 local_ordinal_type origRow = revperm(row);
693 reducedB(row, col) -= updatesView(origRow, col);
694 });
695 }
696}
697
698template<class MatrixType>
699void AdditiveSchwarzFilter<MatrixType>::UpdateLHS(
700 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedY,
701 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY) const
702{
703 //NOTE: both vectors here are always constructed within AdditiveSchwarz and are not subviews,
704 //so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
705 TEUCHOS_TEST_FOR_EXCEPTION(!ReducedReorderedY.isConstantStride() || !OverlappingY.isConstantStride(),
706 std::logic_error, "Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride.");
707 auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly);
708 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
709 auto revperm = reverseperm_.view_device();
710 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedY.extent(0), (local_ordinal_type) reducedY.extent(1)}),
711 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
712 {
713 y(revperm(row), col) = reducedY(row, col);
714 });
715}
716
717template<class MatrixType>
718bool AdditiveSchwarzFilter<MatrixType>::hasTransposeApply() const
719{
720 return true;
721}
722
723
724template<class MatrixType>
725bool AdditiveSchwarzFilter<MatrixType>::supportsRowViews() const
726{
727 return true;
728}
729
730template<class MatrixType>
731typename AdditiveSchwarzFilter<MatrixType>::mag_type AdditiveSchwarzFilter<MatrixType>::getFrobeniusNorm() const
732{
733 // Reordering doesn't change the Frobenius norm.
734 return A_->getFrobeniusNorm ();
735}
736
737template<class MatrixType>
738bool
739AdditiveSchwarzFilter<MatrixType>::
740mapPairsAreFitted (const row_matrix_type& A)
741{
742 const map_type& rangeMap = * (A.getRangeMap ());
743 const map_type& rowMap = * (A.getRowMap ());
744 const bool rangeAndRowFitted = mapPairIsFitted (rowMap, rangeMap);
745
746 const map_type& domainMap = * (A.getDomainMap ());
747 const map_type& columnMap = * (A.getColMap ());
748 const bool domainAndColumnFitted = mapPairIsFitted (columnMap, domainMap);
749
750 //Note BMK 6-22: Map::isLocallyFitted is a local-only operation, not a collective.
751 //This means that it can return different values on different ranks. This can cause MPI to hang,
752 //even though it's supposed to terminate globally when any single rank does.
753 //
754 //This function doesn't need to be fast since it's debug-only code.
755 int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
756 int globalSuccess;
757
758 Teuchos::reduceAll<int, int> (*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
759
760 return globalSuccess == 1;
761}
762
763
764template<class MatrixType>
765bool
766AdditiveSchwarzFilter<MatrixType>::
767mapPairIsFitted (const map_type& map1, const map_type& map2)
768{
769 return map1.isLocallyFitted (map2);
770}
771
772
773}} // namespace Ifpack2::Details
774
775#define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S,LO,GO,N) \
776 template class Ifpack2::Details::AdditiveSchwarzFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
777
778#endif
779
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:74