MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SaPFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_SAPFACTORY_KOKKOS_DEF_HPP
47#define MUELU_SAPFACTORY_KOKKOS_DEF_HPP
48
49#ifdef out
50#include "KokkosKernels_Handle.hpp"
51#include "KokkosSparse_spgemm.hpp"
52#include "KokkosSparse_spmv.hpp"
53#endif
55
56#include <Xpetra_Matrix.hpp>
57#include <Xpetra_IteratorOps.hpp>
58
60#include "MueLu_Level.hpp"
61#include "MueLu_MasterList.hpp"
62#include "MueLu_Monitor.hpp"
63#include "MueLu_PerfUtils.hpp"
64#include "MueLu_TentativePFactory.hpp"
65#include "MueLu_Utilities.hpp"
66
67#include <sstream>
68
69namespace MueLu {
70
71 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
73 RCP<ParameterList> validParamList = rcp(new ParameterList());
74
75#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
76 SET_VALID_ENTRY("sa: damping factor");
77 SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
78 SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
79 SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
80 SET_VALID_ENTRY("sa: enforce constraints");
81 SET_VALID_ENTRY("tentative: calculate qr");
82 SET_VALID_ENTRY("sa: max eigenvalue");
83#undef SET_VALID_ENTRY
84
85 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
86 validParamList->set< RCP<const FactoryBase> >("P", Teuchos::null, "Tentative prolongator factory");
87
88 // Make sure we don't recursively validate options for the matrixmatrix kernels
89 ParameterList norecurse;
90 norecurse.disableRecursiveValidation();
91 validParamList->set<ParameterList> ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
92
93
94 return validParamList;
95 }
96
97 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
99 Input(fineLevel, "A");
100
101 // Get default tentative prolongator factory
102 // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
103 RCP<const FactoryBase> initialPFact = GetFactory("P");
104 if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
105 coarseLevel.DeclareInput("P", initialPFact.get(), this);
106 }
107
108 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
112
113 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
115 FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
116
117 typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;
118
119 // Get default tentative prolongator factory
120 // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
121 // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
122 RCP<const FactoryBase> initialPFact = GetFactory("P");
123 if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
124 const ParameterList& pL = GetParameterList();
125
126 // Level Get
127 RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
128 RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get());
129 RCP<Matrix> finalP;
130 // If Tentative facctory bailed out (e.g., number of global aggregates is 0), then SaPFactory bails
131 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
132 if (Ptent == Teuchos::null) {
133 finalP = Teuchos::null;
134 Set(coarseLevel, "P", finalP);
135 return;
136 }
137
138 if(restrictionMode_) {
139 SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
140
141 A = Utilities::Transpose(*A, true); // build transpose of A explicitly
142 }
143
144 //Build final prolongator
145
146 // Reuse pattern if available
147 RCP<ParameterList> APparams;
148 if(pL.isSublist("matrixmatrix: kernel params"))
149 APparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
150 else
151 APparams= rcp(new ParameterList);
152 if (coarseLevel.IsAvailable("AP reuse data", this)) {
153 GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
154
155 APparams = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", this);
156
157 if (APparams->isParameter("graph"))
158 finalP = APparams->get< RCP<Matrix> >("graph");
159 }
160 // By default, we don't need global constants for SaP
161 APparams->set("compute global constants: temporaries",APparams->get("compute global constants: temporaries",false));
162 APparams->set("compute global constants",APparams->get("compute global constants",false));
163
164 const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
165 const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
166 const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
167 const bool useAbsValueRowSum = pL.get<bool> ("sa: use rowsumabs diagonal scaling");
168 const bool doQRStep = pL.get<bool>("tentative: calculate qr");
169 const bool enforceConstraints= pL.get<bool>("sa: enforce constraints");
170 const SC userDefinedMaxEigen = as<SC>(pL.get<double>("sa: max eigenvalue"));
171
172 // Sanity checking
173 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints,Exceptions::RuntimeError,
174 "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
175
176 if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
177
178 SC lambdaMax;
179 RCP<Vector> invDiag;
180 if (Teuchos::ScalarTraits<SC>::real(userDefinedMaxEigen) == Teuchos::ScalarTraits<SC>::real(-1.0))
181 {
182 SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
183 lambdaMax = A->GetMaxEigenvalueEstimate();
184 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
185 GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations <<
186 ( (useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
187 Magnitude stopTol = 1e-4;
188 invDiag = Utilities::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, Teuchos::ScalarTraits<SC>::zero(), useAbsValueRowSum);
189 if (useAbsValueRowSum)
190 lambdaMax = Utilities::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
191 else
192 lambdaMax = Utilities::PowerMethod(*A, true, maxEigenIterations, stopTol);
193 A->SetMaxEigenvalueEstimate(lambdaMax);
194 } else {
195 GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
196 }
197 }
198 else {
199 lambdaMax = userDefinedMaxEigen;
200 A->SetMaxEigenvalueEstimate(lambdaMax);
201 }
202 GetOStream(Statistics0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
203
204 {
205 SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
206 {
207 SubFactoryMonitor m3(*this, "Diagonal Extraction", coarseLevel);
208 if (useAbsValueRowSum)
209 GetOStream(Runtime0) << "Using rowSumAbs diagonal" << std::endl;
210 if (invDiag == Teuchos::null)
211 invDiag = Utilities::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, Teuchos::ScalarTraits<SC>::zero(), useAbsValueRowSum);
212 }
213 SC omega = dampingFactor / lambdaMax;
214 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
215
216 {
217 SubFactoryMonitor m3(*this, "Xpetra::IteratorOps::Jacobi", coarseLevel);
218 // finalP = Ptent + (I - \omega D^{-1}A) Ptent
219 finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
220 if (enforceConstraints) SatisfyPConstraints( A, finalP);
221 }
222 }
223
224 } else {
225 finalP = Ptent;
226 }
227
228 // Level Set
229 if (!restrictionMode_) {
230 // prolongation factory is in prolongation mode
231 if(!finalP.is_null()) {std::ostringstream oss; oss << "P_" << coarseLevel.GetLevelID(); finalP->setObjectLabel(oss.str());}
232 Set(coarseLevel, "P", finalP);
233
234 // NOTE: EXPERIMENTAL
235 if (Ptent->IsView("stridedMaps"))
236 finalP->CreateView("stridedMaps", Ptent);
237
238 } else {
239 // prolongation factory is in restriction mode
240 RCP<Matrix> R = Utilities::Transpose(*finalP, true);
241 Set(coarseLevel, "R", R);
242 if(!R.is_null()) {std::ostringstream oss; oss << "R_" << coarseLevel.GetLevelID(); R->setObjectLabel(oss.str());}
243
244 // NOTE: EXPERIMENTAL
245 if (Ptent->IsView("stridedMaps"))
246 R->CreateView("stridedMaps", Ptent, true);
247 }
248
249 if (IsPrint(Statistics2)) {
250 RCP<ParameterList> params = rcp(new ParameterList());
251 params->set("printLoadBalancingInfo", true);
252 params->set("printCommInfo", true);
253 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params);
254 }
255
256 } //Build()
257
258 // Analyze the grid transfer produced by smoothed aggregation and make
259 // modifications if it does not look right. In particular, if there are
260 // negative entries or entries larger than 1, modify P's rows.
261 //
262 // Note: this kind of evaluation probably only makes sense if not doing QR
263 // when constructing tentative P.
264 //
265 // For entries that do not satisfy the two constraints (>= 0 or <=1) we set
266 // these entries to the constraint value and modify the rest of the row
267 // so that the row sum remains the same as before by adding an equal
268 // amount to each remaining entry. However, if the original row sum value
269 // violates the constraints, we set the row sum back to 1 (the row sum of
270 // tentative P). After doing the modification to a row, we need to check
271 // again the entire row to make sure that the modified row does not violate
272 // the constraints.
273
274template<typename local_matrix_type>
276
277 using Scalar= typename local_matrix_type::non_const_value_type;
278 using SC=Scalar;
279 using LO=typename local_matrix_type::non_const_ordinal_type;
280 using Device= typename local_matrix_type::device_type;
281 const Scalar zero = Kokkos::ArithTraits<SC>::zero();
282 const Scalar one = Kokkos::ArithTraits<SC>::one();
284 local_matrix_type localP;
285 Kokkos::View<SC**, Device> ConstraintViolationSum;
286 Kokkos::View<SC**, Device> Rsum;
287 Kokkos::View<size_t**, Device> nPositive;
288
289 constraintKernel(LO nPDEs_,local_matrix_type localP_) : nPDEs(nPDEs_), localP(localP_)
290 {
291 ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
292 Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
293 nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
294 }
295
296 KOKKOS_INLINE_FUNCTION
297 void operator() (const size_t rowIdx) const {
298
299 auto rowPtr = localP.graph.row_map;
300 auto values = localP.values;
301
302 bool checkRow = true;
303
304 if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
305
306
307 while (checkRow) {
308
309 // check constraints and compute the row sum
310
311 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
312 Rsum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
313 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) < Kokkos::ArithTraits<SC>::real(zero)) {
314
315 ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
316 values(entryIdx) = zero;
317 }
318 else {
319 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) != Kokkos::ArithTraits<SC>::real(zero))
320 nPositive(rowIdx, entryIdx%nPDEs) = nPositive(rowIdx, entryIdx%nPDEs) + 1;
321
322 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(1.00001 )) {
323 ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += (values(entryIdx) - one);
324 values(entryIdx) = one;
325 }
326 }
327 }
328
329 checkRow = false;
330
331 // take into account any row sum that violates the contraints
332
333 for (size_t k=0; k < (size_t) nPDEs; k++) {
334
335 if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) < Kokkos::ArithTraits<SC>::magnitude(zero)) {
336 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
337 }
338 else if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) > Kokkos::ArithTraits<SC>::magnitude(1.00001)) {
339 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k)+ (one - Rsum(rowIdx, k)); // rstumin
340 }
341 }
342
343 // check if row need modification
344 for (size_t k=0; k < (size_t) nPDEs; k++) {
345 if (Kokkos::ArithTraits<SC>::magnitude(ConstraintViolationSum(rowIdx, k)) != Kokkos::ArithTraits<SC>::magnitude(zero))
346 checkRow = true;
347 }
348 // modify row
349 if (checkRow) {
350 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
351 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(zero)) {
352 values(entryIdx) = values(entryIdx) +
353 (ConstraintViolationSum(rowIdx, entryIdx%nPDEs)/ (Scalar (nPositive(rowIdx, entryIdx%nPDEs)) != zero ? Scalar (nPositive(rowIdx, entryIdx%nPDEs)) : one));
354 }
355 }
356 for (size_t k=0; k < (size_t) nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
357 }
358 for (size_t k=0; k < (size_t) nPDEs; k++) Rsum(rowIdx, k) = zero;
359 for (size_t k=0; k < (size_t) nPDEs; k++) nPositive(rowIdx, k) = 0;
360 } // while (checkRow) ...
361
362 }
363};
364
365template<typename local_matrix_type>
367
368 using Scalar= typename local_matrix_type::non_const_value_type;
369 using SC=Scalar;
370 using LO=typename local_matrix_type::non_const_ordinal_type;
371 using Device= typename local_matrix_type::device_type;
372 using KAT = Kokkos::ArithTraits<SC>;
373 const Scalar zero = Kokkos::ArithTraits<SC>::zero();
374 const Scalar one = Kokkos::ArithTraits<SC>::one();
376 local_matrix_type localP;
377 Kokkos::View<SC**, Device> origSorted;
378 Kokkos::View<SC**, Device> fixedSorted;
379 Kokkos::View<LO**, Device> inds;
380
381 optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_) : nPDEs(nPDEs_), localP(localP_)
382 {
383 origSorted = Kokkos::View<SC**, Device>("origSorted" , localP_.numRows(), maxRowEntries_);
384 fixedSorted = Kokkos::View<SC**, Device>("fixedSorted", localP_.numRows(), maxRowEntries_);
385 inds = Kokkos::View<LO**, Device>("inds" , localP_.numRows(), maxRowEntries_);
386 }
387
388 KOKKOS_INLINE_FUNCTION
389 void operator() (const size_t rowIdx) const {
390
391 auto rowPtr = localP.graph.row_map;
392 auto values = localP.values;
393
394 auto nnz = rowPtr(rowIdx+1) - rowPtr(rowIdx);
395
396 if (nnz != 0) {
397
398 Scalar rsumTarget = zero;
399 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
400
401 {
402 SC aBigNumber;
403 SC rowSumDeviation, temp, delta;
404 SC closestToLeftBoundDist, closestToRghtBoundDist;
405 LO closestToLeftBound, closestToRghtBound;
406 bool flipped;
407
408 SC leftBound = zero;
409 SC rghtBound = one;
410 if ((KAT::real(rsumTarget) >= KAT::real(leftBound*(static_cast<SC>(nnz)))) &&
411 (KAT::real(rsumTarget) <= KAT::real(rghtBound*(static_cast<SC>(nnz))))){ // has Feasible solution
412
413 flipped = false;
414 // compute aBigNumber to handle some corner cases where we need
415 // something large so that an if statement will be false
416 aBigNumber = KAT::zero();
417 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++){
418 if ( KAT::magnitude( values(entryIdx) ) > KAT::magnitude(aBigNumber))
419 aBigNumber = KAT::magnitude( values(entryIdx) );
420 }
421 aBigNumber = aBigNumber+ (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound))*(100*one);
422
423 LO ind = 0;
424 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++){
425 origSorted(rowIdx, ind) = values(entryIdx);
426 inds(rowIdx, ind) = ind;
427 ind++;
428 }
429
430 auto sortVals = Kokkos::subview( origSorted, rowIdx, Kokkos::ALL() );
431 auto sortInds = Kokkos::subview( inds, rowIdx, Kokkos::make_pair(0,ind));
432 // Need permutation indices to sort row values from smallest to largest,
433 // and unsort once row constraints are applied.
434 // serial insertion sort workaround from https://github.com/kokkos/kokkos/issues/645
435 for (LO i = 1; i < static_cast<LO>(nnz); ++i){
436 ind = sortInds(i);
437 LO j = i;
438
439 if ( KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0))) ){
440 for ( ; j > 0; --j) sortInds(j) = sortInds(j - 1);
441
442 sortInds(0) = ind;
443 } else {
444 for ( ; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j-1))); --j) sortInds(j) = sortInds(j-1);
445
446 sortInds(j) = ind;
447 }
448 }
449
450
451 for (LO i = 0; i < static_cast<LO>(nnz); i++) origSorted(rowIdx, i) = values(rowPtr(rowIdx) + inds(rowIdx, i)); //values is no longer used
452 // find entry in origSorted just to the right of the leftBound
453 closestToLeftBound = 0;
454 while ((closestToLeftBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
455
456 // find entry in origSorted just to the right of the rghtBound
457 closestToRghtBound = closestToLeftBound;
458 while ((closestToRghtBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
459
460 // compute distance between closestToLeftBound and the left bound and the
461 // distance between closestToRghtBound and the right bound.
462
463 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
464 if (closestToRghtBound == static_cast<LO>(nnz)) closestToRghtBoundDist= aBigNumber;
465 else closestToRghtBoundDist= origSorted(rowIdx, closestToRghtBound) - rghtBound;
466
467 // compute how far the rowSum is off from the target row sum taking into account
468 // numbers that have been shifted to satisfy bound constraint
469
470 rowSumDeviation = leftBound*(static_cast<SC>(closestToLeftBound)) + (static_cast<SC>(nnz-closestToRghtBound))*rghtBound - rsumTarget;
471 for (LO i=closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted(rowIdx, i);
472
473 // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
474 // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
475 // Later we will flip the data back to its original form.
476 if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
477 flipped = true;
478 temp = leftBound; leftBound = -rghtBound; rghtBound = temp;
479
480 /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
481
482 //TODO: the following is bad for GPU performance. Switch to bit shifting if brave.
483 if ((nnz%2) == 1) origSorted(rowIdx, (nnz/2) ) = -origSorted(rowIdx, (nnz/2) );
484 for (LO i=0; i < static_cast<LO>(nnz/2); i++) {
485 temp=origSorted(rowIdx, i);
486 origSorted(rowIdx, i) = -origSorted(rowIdx, nnz-1-i);
487 origSorted(rowIdx, nnz-i-1) = -temp;
488 }
489
490 /* reverse bounds */
491
492 LO itemp = closestToLeftBound;
493 closestToLeftBound = nnz-closestToRghtBound;
494 closestToRghtBound = nnz-itemp;
495 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
496 if (closestToRghtBound == static_cast<LO>(nnz)) closestToRghtBoundDist= aBigNumber;
497 else closestToRghtBoundDist= origSorted(rowIdx, closestToRghtBound) - rghtBound;
498
499 rowSumDeviation = -rowSumDeviation;
500 }
501
502 // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
503
504 for (LO i = 0; i < closestToLeftBound; i++) fixedSorted(rowIdx, i) = leftBound;
505 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i);
506 for (LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++) fixedSorted(rowIdx, i) = rghtBound;
507
508 while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((one*1.e-10)*rsumTarget))){ // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
509 if (closestToRghtBound != closestToLeftBound)
510 delta = rowSumDeviation/ static_cast<SC>(closestToRghtBound - closestToLeftBound);
511 else delta = aBigNumber;
512
513 if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
514 if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
515 rowSumDeviation = zero;
516 for (LO i = closestToLeftBound; i < closestToRghtBound ; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
517 }
518 else {
519 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
520 fixedSorted(rowIdx, closestToLeftBound) = leftBound;
521 closestToLeftBound++;
522 if (closestToLeftBound < static_cast<LO>(nnz)) closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
523 else closestToLeftBoundDist = aBigNumber;
524 }
525 }
526 else {
527 if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
528 rowSumDeviation = 0;
529 for (LO i = closestToLeftBound; i < closestToRghtBound ; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
530 }
531 else {
532 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
533 // if (closestToRghtBound < nEntries) {
534 fixedSorted(rowIdx, closestToRghtBound) = origSorted(rowIdx, closestToRghtBound);
535 closestToRghtBound++;
536 // }
537 if (closestToRghtBound >= static_cast<LO>(nnz)) closestToRghtBoundDist = aBigNumber;
538 else closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
539 }
540 }
541 }
542
543 auto rowStart = rowPtr(rowIdx);
544 if (flipped) {
545 /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
546
547 if ((nnz%2) == 1) fixedSorted(rowIdx, (nnz/2) ) = -fixedSorted(rowIdx, (nnz/2) );
548 for (LO i=0; i < static_cast<LO>(nnz/2); i++) {
549 temp=fixedSorted(rowIdx, i);
550 fixedSorted(rowIdx, i) = -fixedSorted(rowIdx, nnz-1-i);
551 fixedSorted(rowIdx, nnz-i-1) = -temp;
552 }
553 }
554 // unsort and update row values with new values
555 for (LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart + inds(rowIdx, i)) = fixedSorted(rowIdx, i);
556
557 } else { // row does not have feasible solution to match constraint
558 // just set all entries to the same value giving a row sum of 1
559 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) = one/( static_cast<SC>(nnz) );
560 }
561 }
562
563 }
564
565 }
566};
567
568
569 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
571
572 using Device = typename Matrix::local_matrix_type::device_type;
573 LO nPDEs = A->GetFixedBlockSize();
574
575 using local_mat_type = typename Matrix::local_matrix_type;
576 constraintKernel<local_mat_type> myKernel(nPDEs,P->getLocalMatrixDevice() );
577 Kokkos::parallel_for("enforce constraint",Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements() ),
578 myKernel );
579
580 } //SatsifyPConstraints()
581
582 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
584
585 using Device = typename Matrix::local_matrix_type::device_type;
586 LO nPDEs = 1;//A->GetFixedBlockSize();
587
588 using local_mat_type = typename Matrix::local_matrix_type;
589 optimalSatisfyConstraintsForScalarPDEsKernel<local_mat_type> myKernel(nPDEs,P->getLocalMaxNumRowEntries(),P->getLocalMatrixDevice() );
590 Kokkos::parallel_for("enforce constraint",Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows() ),
591 myKernel );
592
593 } //SatsifyPConstraints()
594
595} //namespace MueLu
596
597#endif // MUELU_SAPFACTORY_KOKKOS_DEF_HPP
598
599//TODO: restrictionMode_ should use the parameter list.
#define SET_VALID_ENTRY(name)
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
const RCP< const FactoryBase > GetFactory(const std::string &varName) const
Default implementation of FactoryAcceptor::GetFactory().
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput().
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
virtual const Teuchos::ParameterList & GetParameterList() const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, DefaultLocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
bool IsPrint(MsgType type, int thisProcRankOnly=-1) const
Find out whether we need to print out information for a specific message type.
Namespace for MueLu classes and methods.
@ Statistics2
Print even more statistics.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Statistics0
Print statistics that do not involve significant additional computation.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
typename local_matrix_type::device_type Device
typename local_matrix_type::non_const_value_type Scalar
Kokkos::View< SC **, Device > ConstraintViolationSum
Kokkos::View< size_t **, Device > nPositive
constraintKernel(LO nPDEs_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
Kokkos::View< SC **, Device > Rsum
typename local_matrix_type::non_const_value_type Scalar
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO