MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SemiCoarsenPFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
47#define MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
48
49#include <stdlib.h>
50
51#include <Kokkos_Core.hpp>
52
53#include <KokkosBatched_LU_Decl.hpp>
54#include <KokkosBatched_LU_Serial_Impl.hpp>
55#include <KokkosBatched_Trsm_Decl.hpp>
56#include <KokkosBatched_Trsm_Serial_Impl.hpp>
57#include <KokkosBatched_Util.hpp>
58#include <KokkosSparse_CrsMatrix.hpp>
59
60#include <Xpetra_CrsMatrixWrap.hpp>
61#include <Xpetra_ImportFactory.hpp>
62#include <Xpetra_Matrix.hpp>
63#include <Xpetra_MultiVectorFactory.hpp>
64#include <Xpetra_VectorFactory.hpp>
65
67#include "MueLu_MasterList.hpp"
68#include "MueLu_Monitor.hpp"
70
71namespace MueLu {
72
73template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
74 class DeviceType>
75RCP<const ParameterList>
77 Tpetra::KokkosCompat::KokkosDeviceWrapperNode<
78 DeviceType>>::GetValidParameterList() const {
79 RCP<ParameterList> validParamList = rcp(new ParameterList());
80
81 std::string name = "semicoarsen: coarsen rate";
82 validParamList->setEntry(name, MasterList::getEntry(name));
83 validParamList->set<RCP<const FactoryBase>>(
84 "A", Teuchos::null, "Generating factory of the matrix A");
85 validParamList->set<RCP<const FactoryBase>>(
86 "Nullspace", Teuchos::null, "Generating factory of the nullspace");
87 validParamList->set<RCP<const FactoryBase>>(
88 "Coordinates", Teuchos::null, "Generating factory for coordinates");
89
90 validParamList->set<RCP<const FactoryBase>>(
91 "LineDetection_VertLineIds", Teuchos::null,
92 "Generating factory for LineDetection vertical line ids");
93 validParamList->set<RCP<const FactoryBase>>(
94 "LineDetection_Layers", Teuchos::null,
95 "Generating factory for LineDetection layer ids");
96 validParamList->set<RCP<const FactoryBase>>(
97 "CoarseNumZLayers", Teuchos::null,
98 "Generating factory for number of coarse z-layers");
99
100 return validParamList;
101}
102
103template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
104 class DeviceType>
107 Tpetra::KokkosCompat::KokkosDeviceWrapperNode<DeviceType>>::
108 DeclareInput(Level &fineLevel, Level & /* coarseLevel */) const {
109 Input(fineLevel, "A");
110 Input(fineLevel, "Nullspace");
111
112 Input(fineLevel, "LineDetection_VertLineIds");
113 Input(fineLevel, "LineDetection_Layers");
114 Input(fineLevel, "CoarseNumZLayers");
115
116 // check whether fine level coordinate information is available.
117 // If yes, request the fine level coordinates and generate coarse coordinates
118 // during the Build call
119 if (fineLevel.GetLevelID() == 0) {
120 if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
121 fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
123 }
124 } else if (bTransferCoordinates_ == true) {
125 // on coarser levels we check the default factory providing "Coordinates"
126 // or the factory declared to provide "Coordinates"
127 // first, check which factory is providing coordinate information
128 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
129 if (myCoordsFact == Teuchos::null) {
130 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
131 }
132 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
133 fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
134 }
135 }
136}
137
138template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
139 class DeviceType>
141 Tpetra::KokkosCompat::KokkosDeviceWrapperNode<
142 DeviceType>>::Build(Level &fineLevel,
143 Level &coarseLevel)
144 const {
145 return BuildP(fineLevel, coarseLevel);
146}
147
148template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
149 class DeviceType>
151 Tpetra::KokkosCompat::KokkosDeviceWrapperNode<
152 DeviceType>>::BuildP(Level &fineLevel,
153 Level &coarseLevel)
154 const {
155 FactoryMonitor m(*this, "Build", coarseLevel);
156
157 // obtain general variables
158 RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
159 RCP<MultiVector> fineNullspace =
160 Get<RCP<MultiVector>>(fineLevel, "Nullspace");
161
162 // get user-provided coarsening rate parameter (constant over all levels)
163 const ParameterList &pL = GetParameterList();
164 LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
165 TEUCHOS_TEST_FOR_EXCEPTION(
166 CoarsenRate < 2, Exceptions::RuntimeError,
167 "semicoarsen: coarsen rate must be greater than 1");
168
169 // collect general input data
170 LO BlkSize = A->GetFixedBlockSize();
171 RCP<const Map> rowMap = A->getRowMap();
172 LO Ndofs = rowMap->getLocalNumElements();
173 LO Nnodes = Ndofs / BlkSize;
174
175 // collect line detection information generated by the LineDetectionFactory
176 // instance
177 LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
178 Teuchos::ArrayRCP<LO> TVertLineId =
179 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_VertLineIds");
180 Teuchos::ArrayRCP<LO> TLayerId =
181 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_Layers");
182
183 // compute number of coarse layers
184 TEUCHOS_TEST_FOR_EXCEPTION(FineNumZLayers < 2, Exceptions::RuntimeError,
185 "Cannot coarsen further");
186 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
187 LO CoarseNumZLayers =
188 (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
189 if (CoarseNumZLayers < 1)
190 CoarseNumZLayers = 1;
191
192 // generate transfer operator with semicoarsening
193 RCP<Matrix> P;
194 RCP<MultiVector> coarseNullspace;
195 BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
196 CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
197 coarseNullspace);
198
199 // Store number of coarse z-layers on the coarse level container
200 // This information is used by the LineDetectionAlgorithm
201 // TODO get rid of the NoFactory
202 coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
204
205 // store semicoarsening transfer on coarse level
206 Set(coarseLevel, "P", P);
207 Set(coarseLevel, "Nullspace", coarseNullspace);
208
209 // transfer coordinates
211 SubFactoryMonitor m2(*this, "TransferCoordinates", coarseLevel);
212 typedef Xpetra::MultiVector<
213 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO, NO>
214 xdMV;
215 RCP<xdMV> fineCoords = Teuchos::null;
216 if (fineLevel.GetLevelID() == 0 &&
217 fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
218 fineCoords = fineLevel.Get<RCP<xdMV>>("Coordinates", NoFactory::get());
219 } else {
220 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
221 if (myCoordsFact == Teuchos::null) {
222 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
223 }
224 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
225 fineCoords =
226 fineLevel.Get<RCP<xdMV>>("Coordinates", myCoordsFact.get());
227 }
228 }
229
230 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
232 "No Coordinates found provided by the user.");
233
234 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
236 "Three coordinates arrays must be supplied if "
237 "line detection orientation not given.");
238 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x =
239 fineCoords->getDataNonConst(0);
240 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y =
241 fineCoords->getDataNonConst(1);
242 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z =
243 fineCoords->getDataNonConst(2);
244
245 // determine the maximum and minimum z coordinate value on the current
246 // processor.
247 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
248 -Teuchos::ScalarTraits<
249 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
250 Teuchos::ScalarTraits<
251 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
252 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
253 Teuchos::ScalarTraits<
254 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
255 Teuchos::ScalarTraits<
256 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
257 for (auto it = z.begin(); it != z.end(); ++it) {
258 if (*it > zval_max)
259 zval_max = *it;
260 if (*it < zval_min)
261 zval_min = *it;
262 }
263
264 LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
265
266 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType>
267 myZLayerCoords = Teuchos::arcp<
268 typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
269 myCoarseZLayers);
270 if (myCoarseZLayers == 1) {
271 myZLayerCoords[0] = zval_min;
272 } else {
273 typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
274 (zval_max - zval_min) / (myCoarseZLayers - 1);
275 for (LO k = 0; k < myCoarseZLayers; ++k) {
276 myZLayerCoords[k] = k * dz;
277 }
278 }
279
280 // Note, that the coarse level node coordinates have to be in vertical
281 // ordering according to the numbering of the vertical lines
282
283 // number of vertical lines on current node:
284 LO numVertLines = Nnodes / FineNumZLayers;
285 LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
286
287 RCP<const Map> coarseCoordMap = MapFactory::Build(
288 fineCoords->getMap()->lib(),
289 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
290 Teuchos::as<size_t>(numLocalCoarseNodes),
291 fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
292 RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
293 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
294 NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
295 coarseCoords->putScalar(-1.0);
296 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx =
297 coarseCoords->getDataNonConst(0);
298 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy =
299 coarseCoords->getDataNonConst(1);
300 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz =
301 coarseCoords->getDataNonConst(2);
302
303 // loop over all vert line indices (stop as soon as possible)
304 LO cntCoarseNodes = 0;
305 for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
306 // vertical line id in *vt
307 LO curVertLineId = TVertLineId[vt];
308
309 if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
310 cy[curVertLineId * myCoarseZLayers] == -1.0) {
311 // loop over all local myCoarseZLayers
312 for (LO n = 0; n < myCoarseZLayers; ++n) {
313 cx[curVertLineId * myCoarseZLayers + n] = x[vt];
314 cy[curVertLineId * myCoarseZLayers + n] = y[vt];
315 cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
316 }
317 cntCoarseNodes += myCoarseZLayers;
318 }
319
320 TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
322 "number of coarse nodes is inconsistent.");
323 if (cntCoarseNodes == numLocalCoarseNodes)
324 break;
325 }
326
327 // set coarse level coordinates
328 Set(coarseLevel, "Coordinates", coarseCoords);
329 } /* end bool bTransferCoordinates */
330}
331
332template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
333 class DeviceType>
336 Tpetra::KokkosCompat::KokkosDeviceWrapperNode<DeviceType>>::
337 BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes,
338 const LO DofsPerNode, const LO NFLayers,
339 const LO NCLayers, const ArrayRCP<LO> LayerId,
340 const ArrayRCP<LO> VertLineId, const RCP<Matrix> &Amat,
341 const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
342 RCP<MultiVector> &coarseNullspace) const {
343 SubFactoryMonitor m2(*this, "BuildSemiCoarsenP", coarseLevel);
344 using impl_SC = typename Kokkos::ArithTraits<SC>::val_type;
345 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
346 using LOView1D = Kokkos::View<LO *, DeviceType>;
347 using LOView2D = Kokkos::View<LO **, DeviceType>;
348
349 // Construct a map from fine level column to layer ids (including ghost nodes)
350 // Note: this is needed to sum all couplings within a layer
351 const auto FCol2LayerVector =
352 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
353 const auto localTemp =
354 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
355 RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
356 if (importer == Teuchos::null)
357 importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
358 {
359 // Fill local temp with layer ids and fill ghost nodes
360 const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
361 for (int row = 0; row < NFRows; row++)
362 localTempHost(row, 0) = LayerId[row / DofsPerNode];
363 const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
364 Kokkos::deep_copy(localTempView, localTempHost);
365 FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
366 }
367 const auto FCol2LayerView = FCol2LayerVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
368 const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
369
370 // Construct a map from fine level column to local dof per node id (including
371 // ghost nodes) Note: this is needed to sum all couplings within a layer
372 const auto FCol2DofVector =
373 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
374 {
375 // Fill local temp with local dof per node ids and fill ghost nodes
376 const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
377 for (int row = 0; row < NFRows; row++)
378 localTempHost(row, 0) = row % DofsPerNode;
379 const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
380 Kokkos::deep_copy(localTempView, localTempHost);
381 FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
382 }
383 const auto FCol2DofView = FCol2DofVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
384 const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
385
386 // Compute NVertLines
387 // TODO: Read this from line detection factory
388 int NVertLines = -1;
389 if (NFNodes != 0)
390 NVertLines = VertLineId[0];
391 for (int node = 1; node < NFNodes; ++node)
392 if (VertLineId[node] > NVertLines)
393 NVertLines = VertLineId[node];
394 NVertLines++;
395
396 // Construct a map from Line, Layer ids to fine level node
397 LOView2D LineLayer2Node("LineLayer2Node", NVertLines, NFLayers);
398 typename LOView2D::HostMirror LineLayer2NodeHost =
399 Kokkos::create_mirror_view(LineLayer2Node);
400 for (int node = 0; node < NFNodes; ++node)
401 LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
402 Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
403
404 // Construct a map from coarse layer id to fine layer id
405 LOView1D CLayer2FLayer("CLayer2FLayer", NCLayers);
406 typename LOView1D::HostMirror CLayer2FLayerHost =
407 Kokkos::create_mirror_view(CLayer2FLayer);
408 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
409 const LO FirstStride =
410 (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
411 const coordT RestStride =
412 ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
413 const LO NCpts =
414 (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
415 TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError,
416 "sizes do not match.");
417 coordT stride = (coordT)FirstStride;
418 for (int clayer = 0; clayer < NCLayers; ++clayer) {
419 CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
420 stride += RestStride;
421 }
422 Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
423
424 // Compute start layer and stencil sizes for layer interpolation at each
425 // coarse layer
426 int MaxStencilSize = 1;
427 LOView1D CLayer2StartLayer("CLayer2StartLayer", NCLayers);
428 LOView1D CLayer2StencilSize("CLayer2StencilSize", NCLayers);
429 typename LOView1D::HostMirror CLayer2StartLayerHost =
430 Kokkos::create_mirror_view(CLayer2StartLayer);
431 typename LOView1D::HostMirror CLayer2StencilSizeHost =
432 Kokkos::create_mirror_view(CLayer2StencilSize);
433 for (int clayer = 0; clayer < NCLayers; ++clayer) {
434 const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
435 const int stencilSize = (clayer < NCLayers - 1)
436 ? CLayer2FLayerHost(clayer + 1) - startLayer
437 : NFLayers - startLayer;
438
439 if (MaxStencilSize < stencilSize)
440 MaxStencilSize = stencilSize;
441 CLayer2StartLayerHost(clayer) = startLayer;
442 CLayer2StencilSizeHost(clayer) = stencilSize;
443 }
444 Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
445 Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
446
447 // Allocate storage for the coarse layer interpolation matrices on all
448 // vertical lines Note: Contributions to each matrix are collapsed to vertical
449 // lines. Thus, each vertical line gives rise to a block tridiagonal matrix.
450 // Here we store the full matrix to be compatible with kokkos kernels batch LU
451 // and tringular solve.
452 int Nmax = MaxStencilSize * DofsPerNode;
453 Kokkos::View<impl_SC ***, DeviceType> BandMat(
454 "BandMat", NVertLines, Nmax, Nmax);
455 Kokkos::View<impl_SC ***, DeviceType> BandSol(
456 "BandSol", NVertLines, Nmax, DofsPerNode);
457
458 // Precompute number of nonzeros in prolongation matrix and allocate P views
459 // Note: Each coarse dof (NVertLines*NCLayers*DofsPerNode) contributes an
460 // interpolation stencil (StencilSize*DofsPerNode)
461 int NnzP = 0;
462 for (int clayer = 0; clayer < NCLayers; ++clayer)
463 NnzP += CLayer2StencilSizeHost(clayer);
464 NnzP *= NVertLines * DofsPerNode * DofsPerNode;
465 Kokkos::View<impl_SC *, DeviceType> Pvals("Pvals", NnzP);
466 Kokkos::View<LO *, DeviceType> Pcols("Pcols", NnzP);
467
468 // Precompute Pptr
469 // Note: Each coarse layer stencil dof contributes DofsPerNode to the
470 // corresponding row in P
471 Kokkos::View<size_t *, DeviceType> Pptr("Pptr", NFRows + 1);
472 typename Kokkos::View<size_t *, DeviceType>::HostMirror PptrHost =
473 Kokkos::create_mirror_view(Pptr);
474 Kokkos::deep_copy(PptrHost, 0);
475 for (int line = 0; line < NVertLines; ++line) {
476 for (int clayer = 0; clayer < NCLayers; ++clayer) {
477 const int stencilSize = CLayer2StencilSizeHost(clayer);
478 const int startLayer = CLayer2StartLayerHost(clayer);
479 for (int snode = 0; snode < stencilSize; ++snode) {
480 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
481 const int layer = startLayer + snode;
482 const int AmatBlkRow = LineLayer2NodeHost(line, layer);
483 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
484 PptrHost(AmatRow + 1) += DofsPerNode;
485 }
486 }
487 }
488 }
489 for (int i = 2; i < NFRows + 1; ++i)
490 PptrHost(i) += PptrHost(i - 1);
491 TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (int)PptrHost(NFRows),
493 "Number of nonzeros in P does not match");
494 Kokkos::deep_copy(Pptr, PptrHost);
495
496 // Precompute Pptr offsets
497 // Note: These are used to determine the nonzero index in Pvals and Pcols
498 Kokkos::View<LO *, Kokkos::DefaultHostExecutionSpace> layerBuckets(
499 "layerBuckets", NFLayers);
500 Kokkos::deep_copy(layerBuckets, 0);
501 LOView2D CLayerSNode2PptrOffset("CLayerSNode2PptrOffset", NCLayers,
502 MaxStencilSize);
503 typename LOView2D::HostMirror CLayerSNode2PptrOffsetHost =
504 Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
505 for (int clayer = 0; clayer < NCLayers; ++clayer) {
506 const int stencilSize = CLayer2StencilSizeHost(clayer);
507 const int startLayer = CLayer2StartLayerHost(clayer);
508 for (int snode = 0; snode < stencilSize; ++snode) {
509 const int layer = startLayer + snode;
510 CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
511 layerBuckets(layer)++;
512 }
513 }
514 Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
515
516 { // Fill P - fill and solve each block tridiagonal system and fill P views
517 SubFactoryMonitor m3(*this, "Fill P", coarseLevel);
518
519 const auto localAmat = Amat->getLocalMatrixDevice();
520 const auto zero = impl_ATS::zero();
521 const auto one = impl_ATS::one();
522
523 using range_policy = Kokkos::RangePolicy<execution_space>;
524 Kokkos::parallel_for(
525 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
526 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
527 for (int clayer = 0; clayer < NCLayers; ++clayer) {
528
529 // Initialize BandSol
530 auto bandSol =
531 Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
532 for (int row = 0; row < Nmax; ++row)
533 for (int dof = 0; dof < DofsPerNode; ++dof)
534 bandSol(row, dof) = zero;
535
536 // Initialize BandMat (set unused row diagonal to 1.0)
537 const int stencilSize = CLayer2StencilSize(clayer);
538 const int N = stencilSize * DofsPerNode;
539 auto bandMat =
540 Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
541 for (int row = 0; row < Nmax; ++row)
542 for (int col = 0; col < Nmax; ++col)
543 bandMat(row, col) =
544 (row == col && row >= N) ? one : zero;
545
546 // Loop over layers in stencil and fill banded matrix and rhs
547 const int flayer = CLayer2FLayer(clayer);
548 const int startLayer = CLayer2StartLayer(clayer);
549 for (int snode = 0; snode < stencilSize; ++snode) {
550
551 const int layer = startLayer + snode;
552 if (layer == flayer) { // If layer in stencil is a coarse layer
553 for (int dof = 0; dof < DofsPerNode; ++dof) {
554 const int row = snode * DofsPerNode + dof;
555 bandMat(row, row) = one;
556 bandSol(row, dof) = one;
557 }
558 } else { // Not a coarse layer
559 const int AmatBlkRow = LineLayer2Node(line, layer);
560 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
561
562 // Get Amat row info
563 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
564 const auto localAmatRow = localAmat.rowConst(AmatRow);
565 const int AmatRowLeng = localAmatRow.length;
566
567 const int row = snode * DofsPerNode + dofi;
568 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
569 const int col = snode * DofsPerNode + dofj;
570
571 // Sum values along row which correspond to stencil
572 // layer/dof and fill bandMat
573 auto val = zero;
574 for (int i = 0; i < AmatRowLeng; ++i) {
575 const int colidx = localAmatRow.colidx(i);
576 if (FCol2Layer(colidx) == layer &&
577 FCol2Dof(colidx) == dofj)
578 val += localAmatRow.value(i);
579 }
580 bandMat(row, col) = val;
581
582 if (snode > 0) {
583 // Sum values along row which correspond to stencil
584 // layer/dof below and fill bandMat
585 val = zero;
586 for (int i = 0; i < AmatRowLeng; ++i) {
587 const int colidx = localAmatRow.colidx(i);
588 if (FCol2Layer(colidx) == layer - 1 &&
589 FCol2Dof(colidx) == dofj)
590 val += localAmatRow.value(i);
591 }
592 bandMat(row, col - DofsPerNode) = val;
593 }
594
595 if (snode < stencilSize - 1) {
596 // Sum values along row which correspond to stencil
597 // layer/dof above and fill bandMat
598 val = zero;
599 for (int i = 0; i < AmatRowLeng; ++i) {
600 const int colidx = localAmatRow.colidx(i);
601 if (FCol2Layer(colidx) == layer + 1 &&
602 FCol2Dof(colidx) == dofj)
603 val += localAmatRow.value(i);
604 }
605 bandMat(row, col + DofsPerNode) = val;
606 }
607 }
608 }
609 }
610 }
611
612 // Batch LU and triangular solves
613 namespace KB = KokkosBatched;
614 using lu_type = typename KB::SerialLU<KB::Algo::LU::Unblocked>;
615 lu_type::invoke(bandMat);
616 using trsv_l_type =
617 typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
618 KB::Trans::NoTranspose, KB::Diag::Unit,
619 KB::Algo::Trsm::Unblocked>;
620 trsv_l_type::invoke(one, bandMat, bandSol);
621 using trsv_u_type = typename KB::SerialTrsm<
622 KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
623 KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
624 trsv_u_type::invoke(one, bandMat, bandSol);
625
626 // Fill prolongation views with solution
627 for (int snode = 0; snode < stencilSize; ++snode) {
628 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
629 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
630 const int layer = startLayer + snode;
631 const int AmatBlkRow = LineLayer2Node(line, layer);
632 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
633
634 const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
635 const int pptr =
636 Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
637
638 const int col =
639 line * NCLayers + clayer; // coarse node (block row) index
640 Pcols(pptr) = col * DofsPerNode + dofj;
641 Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
642 }
643 }
644 }
645 }
646 });
647 } // Fill P
648
649 // Build P
650 RCP<const Map> rowMap = Amat->getRowMap();
651 Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
652 Xpetra::global_size_t itemp = GNdofs / NFLayers;
653 std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
654 RCP<const Map> coarseMap = StridedMapFactory::Build(
655 rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
656 stridingInfo_, rowMap->getComm(), -1, 0);
657 P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
658 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
659 PCrs->setAllValues(Pptr, Pcols, Pvals);
660 PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
661
662 // set StridingInformation of P
663 if (Amat->IsView("stridedMaps") == true)
664 P->CreateView("stridedMaps", Amat->getRowMap("stridedMaps"), coarseMap);
665 else
666 P->CreateView("stridedMaps", P->getRangeMap(), coarseMap);
667
668 // Construct coarse nullspace and inject fine nullspace
669 coarseNullspace =
670 MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
671 const int numVectors = fineNullspace->getNumVectors();
672 const auto fineNullspaceView = fineNullspace->getDeviceLocalView(Xpetra::Access::ReadOnly);
673 const auto coarseNullspaceView = coarseNullspace->getDeviceLocalView(Xpetra::Access::ReadWrite);
674 using range_policy = Kokkos::RangePolicy<execution_space>;
675 Kokkos::parallel_for(
676 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
677 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
678 for (int clayer = 0; clayer < NCLayers; ++clayer) {
679 const int layer = CLayer2FLayer(clayer);
680 const int AmatBlkRow =
681 LineLayer2Node(line, layer); // fine node (block row) index
682 const int col =
683 line * NCLayers + clayer; // coarse node (block row) index
684 for (int k = 0; k < numVectors; ++k) {
685 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
686 const int fRow = AmatBlkRow * DofsPerNode + dofi;
687 const int cRow = col * DofsPerNode + dofi;
688 coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);
689 }
690 }
691 }
692 });
693}
694
695} // namespace MueLu
696
697#define MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT
698#endif // MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
const RCP< const FactoryBase > GetFactory(const std::string &varName) const
Default implementation of FactoryAcceptor::GetFactory().
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput().
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const Teuchos::ParameterEntry & getEntry(const std::string &name)
Returns default entry from the "master" list corresponding to the specified name.
static const NoFactory * get()
virtual const Teuchos::ParameterList & GetParameterList() const
void BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes, const LO DofsPerNode, const LO NFLayers, const LO NCLayers, const ArrayRCP< LO > LayerId, const ArrayRCP< LO > VertLineId, const RCP< Matrix > &Amat, const RCP< MultiVector > fineNullspace, RCP< Matrix > &P, RCP< MultiVector > &coarseNullspace) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Prolongator factory performing semi-coarsening.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Namespace for MueLu classes and methods.