125 RCP<Teuchos::FancyOStream> out;
126 if(
const char* dbg = std::getenv(
"MUELU_GEOMETRICINTERPOLATIONPFACTORY_DEBUG")) {
127 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
128 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
130 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
133 *out <<
"Starting GeometricInterpolationPFactory_kokkos::BuildP." << std::endl;
137 const bool buildCoarseCoordinates = pL.get<
bool>(
"interp: build coarse coordinates");
138 const int interpolationOrder =
Get<int>(fineLevel,
"structuredInterpolationOrder");
139 const int numDimensions =
Get<int>(fineLevel,
"numDimensions");
143 RCP<const CrsGraph> prolongatorGraph =
Get<RCP<CrsGraph> >(fineLevel,
"prolongatorGraph");
144 RCP<realvaluedmultivector_type> fineCoordinates, coarseCoordinates;
149 if(buildCoarseCoordinates || (interpolationOrder == 1)) {
157 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoordinates->getMap()->lib(),
158 Teuchos::OrdinalTraits<GO>::invalid(),
159 geoData->getNumCoarseNodes(),
160 fineCoordinates->getMap()->getIndexBase(),
161 fineCoordinates->getMap()->getComm());
162 coarseCoordinates = Xpetra::MultiVectorFactory<real_type,LO,GO,Node>::
163 Build(coarseCoordsMap, fineCoordinates->getNumVectors());
168 fineCoordinates-> getDeviceLocalView(Xpetra::Access::ReadWrite),
169 coarseCoordinates->getDeviceLocalView(Xpetra::Access::OverwriteAll));
170 Kokkos::parallel_for(
"GeometricInterpolation: build coarse coordinates",
171 Kokkos::RangePolicy<execution_space>(0, geoData->getNumCoarseNodes()),
172 myCoarseCoordinatesBuilder);
174 Set(coarseLevel,
"Coordinates", coarseCoordinates);
177 *out <<
"Fine and coarse coordinates have been loaded from the fine level and set on the coarse level." << std::endl;
179 if(interpolationOrder == 0) {
183 }
else if(interpolationOrder == 1) {
186 RCP<realvaluedmultivector_type> ghostCoordinates
187 = Xpetra::MultiVectorFactory<real_type,LO,GO,NO>::Build(prolongatorGraph->getColMap(),
188 fineCoordinates->getNumVectors());
189 RCP<const Import> ghostImporter = ImportFactory::Build(coarseCoordinates->getMap(),
190 prolongatorGraph->getColMap());
191 ghostCoordinates->doImport(*coarseCoordinates, *ghostImporter, Xpetra::INSERT);
194 BuildLinearP(A, prolongatorGraph, fineCoordinates, ghostCoordinates, numDimensions, P);
197 *out <<
"The prolongator matrix has been built." << std::endl;
203 RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(P->getDomainMap(),
204 fineNullspace->getNumVectors());
205 P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(),
206 Teuchos::ScalarTraits<SC>::zero());
207 Set(coarseLevel,
"Nullspace", coarseNullspace);
210 *out <<
"The coarse nullspace is constructed and set on the coarse level." << std::endl;
212 Array<LO> lNodesPerDir =
Get<Array<LO> >(fineLevel,
"lCoarseNodesPerDim");
213 Set(coarseLevel,
"numDimensions", numDimensions);
214 Set(coarseLevel,
"lNodesPerDim", lNodesPerDir);
215 Set(coarseLevel,
"P", P);
217 *out <<
"GeometricInterpolationPFactory_kokkos::BuildP has completed." << std::endl;
223 BuildConstantP(RCP<Matrix>& P, RCP<const CrsGraph>& prolongatorGraph, RCP<Matrix>& A)
const {
226 RCP<Teuchos::FancyOStream> out;
227 if(
const char* dbg = std::getenv(
"MUELU_GEOMETRICINTERPOLATIONPFACTORY_DEBUG")) {
228 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
229 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
231 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
234 *out <<
"BuildConstantP" << std::endl;
236 std::vector<size_t> strideInfo(1);
237 strideInfo[0] = A->GetFixedBlockSize();
238 RCP<const StridedMap> stridedDomainMap =
239 StridedMapFactory::Build(prolongatorGraph->getDomainMap(), strideInfo);
241 *out <<
"Call prolongator constructor" << std::endl;
242 using helpers=Xpetra::Helpers<Scalar,LocalOrdinal,GlobalOrdinal,Node>;
243 if(helpers::isTpetraBlockCrs(A)) {
244 LO NSDim = A->GetStorageBlockSize();
248 RCP<const Map> BlockMap = prolongatorGraph->getDomainMap();
249 Teuchos::ArrayView<const GO> block_dofs = BlockMap->getLocalElementList();
250 Teuchos::Array<GO> point_dofs(block_dofs.size()*NSDim);
251 for(LO i=0, ct=0; i<block_dofs.size(); i++) {
252 for(LO j=0; j<NSDim; j++) {
253 point_dofs[ct] = block_dofs[i]*NSDim + j;
258 RCP<const Map> PointMap = MapFactory::Build(BlockMap->lib(),
259 BlockMap->getGlobalNumElements() *NSDim,
261 BlockMap->getIndexBase(),
262 BlockMap->getComm());
263 strideInfo[0] = A->GetFixedBlockSize();
264 RCP<const StridedMap> stridedPointMap = StridedMapFactory::Build(PointMap, strideInfo);
266 RCP<Xpetra::CrsMatrix<SC,LO,GO,NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC,LO,GO,NO>::BuildBlock(prolongatorGraph, PointMap, A->getRangeMap(),NSDim);
267 RCP<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> >(P_xpetra);
268 if(P_tpetra.is_null())
throw std::runtime_error(
"BuildConstantP: Matrix factory did not return a Tpetra::BlockCrsMatrix");
269 RCP<CrsMatrixWrap> P_wrap = rcp(
new CrsMatrixWrap(P_xpetra));
271 const LO stride = strideInfo[0]*strideInfo[0];
272 const LO in_stride = strideInfo[0];
273 typename CrsMatrix::local_graph_type localGraph = prolongatorGraph->getLocalGraphDevice();
274 auto rowptr = localGraph.row_map;
275 auto indices = localGraph.entries;
276 auto values = P_tpetra->getTpetra_BlockCrsMatrix()->getValuesDeviceNonConst();
278 using ISC =
typename Tpetra::BlockCrsMatrix<SC,LO,GO,NO>::impl_scalar_type;
279 ISC one = Teuchos::ScalarTraits<ISC>::one();
281 const Kokkos::TeamPolicy<execution_space> policy(prolongatorGraph->getLocalNumRows(), 1);
283 Kokkos::parallel_for(
"MueLu:GeoInterpFact::BuildConstantP::fill", policy,
284 KOKKOS_LAMBDA(
const typename Kokkos::TeamPolicy<execution_space>::member_type &thread) {
285 auto row = thread.league_rank();
286 for(LO j = (LO)rowptr[row]; j<(LO) rowptr[row+1]; j++) {
287 LO block_offset = j*stride;
288 for(LO k=0; k<in_stride; k++)
289 values[block_offset + k*(in_stride+1) ] = one;
294 if (A->IsView(
"stridedMaps") ==
true) {
295 P->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), stridedPointMap);
298 P->CreateView(
"stridedMaps", P->getRangeMap(), PointMap);
303 RCP<ParameterList> dummyList = rcp(
new ParameterList());
304 P = rcp(
new CrsMatrixWrap(prolongatorGraph, dummyList));
305 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
306 PCrs->setAllToScalar(1.0);
307 PCrs->fillComplete();
310 if (A->IsView(
"stridedMaps") ==
true) {
311 P->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), stridedDomainMap);
313 P->CreateView(
"stridedMaps", P->getRangeMap(), stridedDomainMap);
321 BuildLinearP(RCP<Matrix>& A, RCP<const CrsGraph>& prolongatorGraph,
322 RCP<realvaluedmultivector_type>& fineCoordinates,
323 RCP<realvaluedmultivector_type>& ghostCoordinates,
324 const int numDimensions, RCP<Matrix>& P)
const {
327 RCP<Teuchos::FancyOStream> out;
328 if(
const char* dbg = std::getenv(
"MUELU_GEOMETRICINTERPOLATIONPFACTORY_DEBUG")) {
329 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
330 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
332 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
335 *out <<
"Entering BuildLinearP" << std::endl;
338 const LO numFineNodes = fineCoordinates->getLocalLength();
339 const LO numGhostNodes = ghostCoordinates->getLocalLength();
340 Array<ArrayRCP<const real_type> > fineCoords(3);
341 Array<ArrayRCP<const real_type> > ghostCoords(3);
342 const real_type realZero = Teuchos::as<real_type>(0.0);
343 ArrayRCP<real_type> fineZero(numFineNodes, realZero);
344 ArrayRCP<real_type> ghostZero(numGhostNodes, realZero);
345 for(
int dim = 0; dim < 3; ++dim) {
346 if(dim < numDimensions) {
347 fineCoords[dim] = fineCoordinates->getData(dim);
348 ghostCoords[dim] = ghostCoordinates->getData(dim);
350 fineCoords[dim] = fineZero;
351 ghostCoords[dim] = ghostZero;
355 *out <<
"Coordinates extracted from the multivectors!" << std::endl;
358 const int numInterpolationPoints = 1 << numDimensions;
359 const int dofsPerNode = A->GetFixedBlockSize();
361 std::vector<size_t> strideInfo(1);
362 strideInfo[0] = dofsPerNode;
363 RCP<const StridedMap> stridedDomainMap =
364 StridedMapFactory::Build(prolongatorGraph->getDomainMap(), strideInfo);
366 *out <<
"The maps of P have been computed" << std::endl;
368 RCP<ParameterList> dummyList = rcp(
new ParameterList());
369 P = rcp(
new CrsMatrixWrap(prolongatorGraph, dummyList));
370 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
373 LO interpolationNodeIdx = 0, rowIdx = 0;
374 ArrayView<const LO> colIndices;
376 Array<Array<real_type> > coords(numInterpolationPoints + 1);
377 Array<real_type> stencil(numInterpolationPoints);
378 for(LO nodeIdx = 0; nodeIdx < numFineNodes; ++nodeIdx) {
379 if(PCrs->getNumEntriesInLocalRow(nodeIdx*dofsPerNode) == 1) {
382 for(LO dof = 0; dof < dofsPerNode; ++dof) {
383 rowIdx = nodeIdx*dofsPerNode + dof;
384 prolongatorGraph->getLocalRowView(rowIdx, colIndices);
385 PCrs->replaceLocalValues(rowIdx, colIndices, values());
391 for(
int dim = 0; dim < 3; ++dim) {
392 coords[0][dim] = fineCoords[dim][nodeIdx];
394 prolongatorGraph->getLocalRowView(nodeIdx*dofsPerNode, colIndices);
395 for(
int interpolationIdx=0; interpolationIdx < numInterpolationPoints; ++interpolationIdx) {
396 coords[interpolationIdx + 1].resize(3);
397 interpolationNodeIdx = colIndices[interpolationIdx] / dofsPerNode;
398 for(
int dim = 0; dim < 3; ++dim) {
399 coords[interpolationIdx + 1][dim] = ghostCoords[dim][interpolationNodeIdx];
403 values.resize(numInterpolationPoints);
404 for(LO valueIdx = 0; valueIdx < numInterpolationPoints; ++valueIdx) {
405 values[valueIdx] = Teuchos::as<SC>(stencil[valueIdx]);
409 for(LO dof = 0; dof < dofsPerNode; ++dof) {
410 rowIdx = nodeIdx*dofsPerNode + dof;
411 prolongatorGraph->getLocalRowView(rowIdx, colIndices);
412 PCrs->replaceLocalValues(rowIdx, colIndices, values());
417 *out <<
"The calculation of the interpolation stencils has completed." << std::endl;
419 PCrs->fillComplete();
421 *out <<
"All values in P have been set and expertStaticFillComplete has been performed." << std::endl;
424 if (A->IsView(
"stridedMaps") ==
true) {
425 P->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), stridedDomainMap);
427 P->CreateView(
"stridedMaps", P->getRangeMap(), stridedDomainMap);
436 const Array<Array<real_type> > coord,
437 Array<real_type>& stencil)
const {
454 Teuchos::SerialDenseMatrix<LO,real_type> Jacobian(numDimensions, numDimensions);
455 Teuchos::SerialDenseVector<LO,real_type> residual(numDimensions);
456 Teuchos::SerialDenseVector<LO,real_type> solutionDirection(numDimensions);
457 Teuchos::SerialDenseVector<LO,real_type> paramCoords(numDimensions);
458 Teuchos::SerialDenseSolver<LO,real_type> problem;
459 int iter = 0, max_iter = 5;
460 real_type functions[4][8], norm_ref = 1.0, norm2 = 1.0, tol = 1.0e-5;
461 paramCoords.size(numDimensions);
463 while( (iter < max_iter) && (norm2 > tol*norm_ref) ) {
466 solutionDirection.size(numDimensions);
467 residual.size(numDimensions);
472 for(LO i = 0; i < numDimensions; ++i) {
473 residual(i) = coord[0][i];
474 for(LO k = 0; k < numInterpolationPoints; ++k) {
475 residual(i) -= functions[0][k]*coord[k+1][i];
478 norm_ref += residual(i)*residual(i);
479 if(i == numDimensions - 1) {
480 norm_ref = std::sqrt(norm_ref);
484 for(LO j = 0; j < numDimensions; ++j) {
485 for(LO k = 0; k < numInterpolationPoints; ++k) {
486 Jacobian(i,j) += functions[j+1][k]*coord[k+1][i];
492 problem.setMatrix(Teuchos::rcp(&Jacobian,
false));
493 problem.setVectors(Teuchos::rcp(&solutionDirection,
false), Teuchos::rcp(&residual,
false));
494 if(problem.shouldEquilibrate()) {problem.factorWithEquilibration(
true);}
497 for(LO i = 0; i < numDimensions; ++i) {
498 paramCoords(i) = paramCoords(i) + solutionDirection(i);
503 for(LO i = 0; i < numDimensions; ++i) {
505 for(LO k = 0; k < numInterpolationPoints; ++k) {
506 tmp -= functions[0][k]*coord[k+1][i];
511 norm2 = std::sqrt(norm2);
515 for(LO i = 0; i < numInterpolationPoints; ++i) {
516 stencil[i] = functions[0][i];
524 const Teuchos::SerialDenseVector<LO, real_type> parametricCoordinates,
526 real_type xi = 0.0, eta = 0.0, zeta = 0.0, denominator = 0.0;
527 if(numDimensions == 1) {
528 xi = parametricCoordinates[0];
530 }
else if(numDimensions == 2) {
531 xi = parametricCoordinates[0];
532 eta = parametricCoordinates[1];
534 }
else if(numDimensions == 3) {
535 xi = parametricCoordinates[0];
536 eta = parametricCoordinates[1];
537 zeta = parametricCoordinates[2];
541 functions[0][0] = (1.0 - xi)*(1.0 - eta)*(1.0 - zeta) / denominator;
542 functions[0][1] = (1.0 + xi)*(1.0 - eta)*(1.0 - zeta) / denominator;
543 functions[0][2] = (1.0 - xi)*(1.0 + eta)*(1.0 - zeta) / denominator;
544 functions[0][3] = (1.0 + xi)*(1.0 + eta)*(1.0 - zeta) / denominator;
545 functions[0][4] = (1.0 - xi)*(1.0 - eta)*(1.0 + zeta) / denominator;
546 functions[0][5] = (1.0 + xi)*(1.0 - eta)*(1.0 + zeta) / denominator;
547 functions[0][6] = (1.0 - xi)*(1.0 + eta)*(1.0 + zeta) / denominator;
548 functions[0][7] = (1.0 + xi)*(1.0 + eta)*(1.0 + zeta) / denominator;
550 functions[1][0] = -(1.0 - eta)*(1.0 - zeta) / denominator;
551 functions[1][1] = (1.0 - eta)*(1.0 - zeta) / denominator;
552 functions[1][2] = -(1.0 + eta)*(1.0 - zeta) / denominator;
553 functions[1][3] = (1.0 + eta)*(1.0 - zeta) / denominator;
554 functions[1][4] = -(1.0 - eta)*(1.0 + zeta) / denominator;
555 functions[1][5] = (1.0 - eta)*(1.0 + zeta) / denominator;
556 functions[1][6] = -(1.0 + eta)*(1.0 + zeta) / denominator;
557 functions[1][7] = (1.0 + eta)*(1.0 + zeta) / denominator;
559 functions[2][0] = -(1.0 - xi)*(1.0 - zeta) / denominator;
560 functions[2][1] = -(1.0 + xi)*(1.0 - zeta) / denominator;
561 functions[2][2] = (1.0 - xi)*(1.0 - zeta) / denominator;
562 functions[2][3] = (1.0 + xi)*(1.0 - zeta) / denominator;
563 functions[2][4] = -(1.0 - xi)*(1.0 + zeta) / denominator;
564 functions[2][5] = -(1.0 + xi)*(1.0 + zeta) / denominator;
565 functions[2][6] = (1.0 - xi)*(1.0 + zeta) / denominator;
566 functions[2][7] = (1.0 + xi)*(1.0 + zeta) / denominator;
568 functions[3][0] = -(1.0 - xi)*(1.0 - eta) / denominator;
569 functions[3][1] = -(1.0 + xi)*(1.0 - eta) / denominator;
570 functions[3][2] = -(1.0 - xi)*(1.0 + eta) / denominator;
571 functions[3][3] = -(1.0 + xi)*(1.0 + eta) / denominator;
572 functions[3][4] = (1.0 - xi)*(1.0 - eta) / denominator;
573 functions[3][5] = (1.0 + xi)*(1.0 - eta) / denominator;
574 functions[3][6] = (1.0 - xi)*(1.0 + eta) / denominator;
575 functions[3][7] = (1.0 + xi)*(1.0 + eta) / denominator;