MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_BlackBoxPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_BLACKBOXPFACTORY_DEF_HPP
47#define MUELU_BLACKBOXPFACTORY_DEF_HPP
48
49#include <stdlib.h>
50#include <iomanip>
51
52
53// #include <Teuchos_LAPACK.hpp>
54#include <Teuchos_SerialDenseMatrix.hpp>
55#include <Teuchos_SerialDenseVector.hpp>
56#include <Teuchos_SerialDenseSolver.hpp>
57
58#include <Xpetra_CrsMatrixUtils.hpp>
59#include <Xpetra_CrsMatrixWrap.hpp>
60#include <Xpetra_ImportFactory.hpp>
61#include <Xpetra_Matrix.hpp>
62#include <Xpetra_MapFactory.hpp>
63#include <Xpetra_MultiVectorFactory.hpp>
64#include <Xpetra_VectorFactory.hpp>
65
66#include <Xpetra_IO.hpp>
67
70
71#include "MueLu_MasterList.hpp"
72#include "MueLu_Monitor.hpp"
73
74namespace MueLu {
75
76 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
78 RCP<ParameterList> validParamList = rcp(new ParameterList());
79
80 // Coarsen can come in two forms, either a single char that will be interpreted as an integer
81 // which is used as the coarsening rate in every spatial dimentions,
82 // or it can be a longer string that will then be interpreted as an array of integers.
83 // By default coarsen is set as "{2}", hence a coarsening rate of 2 in every spatial dimension
84 // is the default setting!
85 validParamList->set<std::string > ("Coarsen", "{3}", "Coarsening rate in all spatial dimensions");
86 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
87 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
88 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
89 validParamList->set<RCP<const FactoryBase> >("gNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
90 validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
91 validParamList->set<std::string> ("stencil type", "full", "You can use two type of stencils: full and reduced, that correspond to 27 and 7 points stencils respectively in 3D.");
92 validParamList->set<std::string> ("block strategy", "coupled", "The strategy used to handle systems of PDEs can be: coupled or uncoupled.");
93
94 return validParamList;
95 }
96
97 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
99 Level& /* coarseLevel */)
100 const {
101 Input(fineLevel, "A");
102 Input(fineLevel, "Nullspace");
103 Input(fineLevel, "Coordinates");
104 // Request the global number of nodes per dimensions
105 if(fineLevel.GetLevelID() == 0) {
106 if(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get())) {
107 fineLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this);
108 } else {
109 TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get()),
111 "gNodesPerDim was not provided by the user on level0!");
112 }
113 } else {
114 Input(fineLevel, "gNodesPerDim");
115 }
116
117 // Request the local number of nodes per dimensions
118 if(fineLevel.GetLevelID() == 0) {
119 if(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get())) {
120 fineLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this);
121 } else {
122 TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get()),
124 "lNodesPerDim was not provided by the user on level0!");
125 }
126 } else {
127 Input(fineLevel, "lNodesPerDim");
128 }
129 }
130
131 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
133 Level& coarseLevel) const{
134 return BuildP(fineLevel, coarseLevel);
135
136 }
137
138 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
140 Level& coarseLevel)const{
141 FactoryMonitor m(*this, "Build", coarseLevel);
142
143 // Get parameter list
144 const ParameterList& pL = GetParameterList();
145
146 // obtain general variables
147 RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
148 RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
149 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > coordinates =
151 LO numDimensions = coordinates->getNumVectors();
152 LO BlkSize = A->GetFixedBlockSize();
153
154 // Get fine level geometric data: g(l)FineNodesPerDir and g(l)NumFineNodes
155 Array<GO> gFineNodesPerDir(3);
156 Array<LO> lFineNodesPerDir(3);
157 // Get the number of points in each direction
158 if(fineLevel.GetLevelID() == 0) {
159 gFineNodesPerDir = fineLevel.Get<Array<GO> >("gNodesPerDim", NoFactory::get());
160 lFineNodesPerDir = fineLevel.Get<Array<LO> >("lNodesPerDim", NoFactory::get());
161 } else {
162 // Loading global number of nodes per diretions
163 gFineNodesPerDir = Get<Array<GO> >(fineLevel, "gNodesPerDim");
164
165 // Loading local number of nodes per diretions
166 lFineNodesPerDir = Get<Array<LO> >(fineLevel, "lNodesPerDim");
167 }
168 for(LO i = 0; i < 3; ++i) {
169 if(gFineNodesPerDir[i] == 0) {
170 GetOStream(Runtime0) << "gNodesPerDim in direction " << i << " is set to 1 from 0"
171 << std::endl;
172 gFineNodesPerDir[i] = 1;
173 }
174 if(lFineNodesPerDir[i] == 0) {
175 GetOStream(Runtime0) << "lNodesPerDim in direction " << i << " is set to 1 from 0"
176 << std::endl;
177 lFineNodesPerDir[i] = 1;
178 }
179 }
180 GO gNumFineNodes = gFineNodesPerDir[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
181 LO lNumFineNodes = lFineNodesPerDir[2]*lFineNodesPerDir[1]*lFineNodesPerDir[0];
182
183 // Get the coarsening rate
184 std::string coarsenRate = pL.get<std::string>("Coarsen");
185 Array<LO> coarseRate(3);
186 {
187 Teuchos::Array<LO> crates;
188 try {
189 crates = Teuchos::fromStringToArray<LO>(coarsenRate);
190 } catch(const Teuchos::InvalidArrayStringRepresentation& e) {
191 GetOStream(Errors,-1) << " *** Coarsen must be a string convertible into an array! *** "
192 << std::endl;
193 throw e;
194 }
195 TEUCHOS_TEST_FOR_EXCEPTION((crates.size() > 1) && (crates.size() < numDimensions),
197 "Coarsen must have at least as many components as the number of"
198 " spatial dimensions in the problem.");
199 for(LO i = 0; i < 3; ++i) {
200 if(i < numDimensions) {
201 if(crates.size() == 1) {
202 coarseRate[i] = crates[0];
203 } else if(i < crates.size()) {
204 coarseRate[i] = crates[i];
205 } else {
206 GetOStream(Errors,-1) << " *** Coarsen must be at least as long as the number of"
207 " spatial dimensions! *** " << std::endl;
208 throw Exceptions::RuntimeError(" *** Coarsen must be at least as long as the number of"
209 " spatial dimensions! *** \n");
210 }
211 } else {
212 coarseRate[i] = 1;
213 }
214 }
215 } // End of scope for crates
216
217 // Get the stencil type used for discretization
218 const std::string stencilType = pL.get<std::string>("stencil type");
219 if(stencilType != "full" && stencilType != "reduced") {
220 GetOStream(Errors,-1) << " *** stencil type must be set to: full or reduced, any other value "
221 "is trated as an error! *** " << std::endl;
222 throw Exceptions::RuntimeError(" *** stencil type is neither full, nor reduced! *** \n");
223 }
224
225 // Get the strategy for PDE systems
226 const std::string blockStrategy = pL.get<std::string>("block strategy");
227 if(blockStrategy != "coupled" && blockStrategy != "uncoupled") {
228 GetOStream(Errors,-1) << " *** block strategy must be set to: coupled or uncoupled, any other "
229 "value is trated as an error! *** " << std::endl;
230 throw Exceptions::RuntimeError(" *** block strategy is neither coupled, nor uncoupled! *** \n");
231 }
232
233 GO gNumCoarseNodes = 0;
234 LO lNumCoarseNodes = 0;
235 Array<GO> gIndices(3), gCoarseNodesPerDir(3), ghostGIDs, coarseNodesGIDs, colGIDs;
236 Array<LO> myOffset(3), lCoarseNodesPerDir(3), glCoarseNodesPerDir(3), endRate(3);
237 Array<bool> ghostInterface(6);
238 Array<int> boundaryFlags(3);
239 ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes(numDimensions);
240 Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
241 for(LO dim = 0; dim < numDimensions; ++dim) {fineNodes[dim] = coordinates->getData(dim)();}
242
243 // This struct stores PIDs, LIDs and GIDs on the fine mesh and GIDs on the coarse mesh.
244 RCP<NodesIDs> ghostedCoarseNodes = rcp(new NodesIDs{});
245
246 GetGeometricData(coordinates, coarseRate, gFineNodesPerDir, lFineNodesPerDir, BlkSize,// inputs
247 gIndices, myOffset, ghostInterface, endRate, gCoarseNodesPerDir, // outputs
248 lCoarseNodesPerDir, glCoarseNodesPerDir, ghostGIDs, coarseNodesGIDs, colGIDs,
249 gNumCoarseNodes, lNumCoarseNodes, coarseNodes, boundaryFlags,
250 ghostedCoarseNodes);
251
252 // Create the MultiVector of coarse coordinates
253 Xpetra::UnderlyingLib lib = coordinates->getMap()->lib();
254 RCP<const Map> coarseCoordsMap = MapFactory::Build (lib,
255 gNumCoarseNodes,
256 coarseNodesGIDs.view(0, lNumCoarseNodes),
257 coordinates->getMap()->getIndexBase(),
258 coordinates->getMap()->getComm());
259 Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseCoords(numDimensions);
260 for(LO dim = 0; dim < numDimensions; ++dim) {
261 coarseCoords[dim] = coarseNodes[dim]();
262 }
263 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > coarseCoordinates =
264 Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO>::Build(coarseCoordsMap, coarseCoords(),
265 numDimensions);
266
267 // Now create a new matrix: Aghost that contains all the data
268 // locally needed to compute the local part of the prolongator.
269 // Here assuming that all the coarse nodes o and fine nodes +
270 // are local then all the data associated with the coarse
271 // nodes O and the fine nodes * needs to be imported.
272 //
273 // *--*--*--*--*--*--*--*
274 // | | | | | | | |
275 // o--+--+--o--+--+--O--*
276 // | | | | | | | |
277 // +--+--+--+--+--+--*--*
278 // | | | | | | | |
279 // +--+--+--+--+--+--*--*
280 // | | | | | | | |
281 // o--+--+--o--+--+--O--*
282 // | | | | | | | |
283 // +--+--+--+--+--+--*--*
284 // | | | | | | | |
285 // *--*--*--*--*--*--*--*
286 // | | | | | | | |
287 // O--*--*--O--*--*--O--*
288 //
289 // Creating that local matrix is easy enough using proper range
290 // and domain maps to import data from A. Note that with this
291 // approach we reorder the local entries using the domain map and
292 // can subsequently compute the prolongator without reordering.
293 // As usual we need to be careful about any coarsening rate
294 // change at the boundary!
295
296 // The ingredients needed are an importer, a range map and a domain map
297 Array<GO> ghostRowGIDs, ghostColGIDs, nodeSteps(3);
298 nodeSteps[0] = 1;
299 nodeSteps[1] = gFineNodesPerDir[0];
300 nodeSteps[2] = gFineNodesPerDir[0]*gFineNodesPerDir[1];
301 Array<LO> glFineNodesPerDir(3);
302 GO startingGID = A->getRowMap()->getMinGlobalIndex();
303 for(LO dim = 0; dim < 3; ++dim) {
304 LO numCoarseNodes = 0;
305 if(dim < numDimensions) {
306 startingGID -= myOffset[dim]*nodeSteps[dim];
307 numCoarseNodes = lCoarseNodesPerDir[dim];
308 if(ghostInterface[2*dim]) {++numCoarseNodes;}
309 if(ghostInterface[2*dim+1]) {++numCoarseNodes;}
310 if(gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
311 glFineNodesPerDir[dim] = (numCoarseNodes - 2)*coarseRate[dim] + endRate[dim] + 1;
312 } else {
313 glFineNodesPerDir[dim] = (numCoarseNodes - 1)*coarseRate[dim] + 1;
314 }
315 } else {
316 glFineNodesPerDir[dim] = 1;
317 }
318 }
319 ghostRowGIDs.resize(glFineNodesPerDir[0]*glFineNodesPerDir[1]*glFineNodesPerDir[2]*BlkSize);
320 for(LO k = 0; k < glFineNodesPerDir[2]; ++k) {
321 for(LO j = 0; j < glFineNodesPerDir[1]; ++j) {
322 for(LO i = 0; i < glFineNodesPerDir[0]; ++i) {
323 for(LO l = 0; l < BlkSize; ++l) {
324 ghostRowGIDs[(k*glFineNodesPerDir[1]*glFineNodesPerDir[0]
325 + j*glFineNodesPerDir[0] + i)*BlkSize + l] = startingGID
326 + (k*gFineNodesPerDir[1]*gFineNodesPerDir[0] + j*gFineNodesPerDir[0] + i)*BlkSize + l;
327 }
328 }
329 }
330 }
331
332 // Looking at the above loops it is easy to find startingGID for the ghostColGIDs
333 Array<GO> startingGlobalIndices(numDimensions), dimStride(numDimensions);
334 Array<GO> startingColIndices(numDimensions), finishingColIndices(numDimensions);
335 GO colMinGID = 0;
336 Array<LO> colRange(numDimensions);
337 dimStride[0] = 1;
338 for(int dim = 1; dim < numDimensions; ++dim) {
339 dimStride[dim] = dimStride[dim - 1]*gFineNodesPerDir[dim - 1];
340 }
341 {
342 GO tmp = startingGID;
343 for(int dim = numDimensions; dim > 0; --dim) {
344 startingGlobalIndices[dim - 1] = tmp / dimStride[dim - 1];
345 tmp = tmp % dimStride[dim - 1];
346
347 if(startingGlobalIndices[dim - 1] > 0) {
348 startingColIndices[dim - 1] = startingGlobalIndices[dim - 1] - 1;
349 }
350 if(startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] < gFineNodesPerDir[dim - 1]){
351 finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1]
352 + glFineNodesPerDir[dim - 1];
353 } else {
354 finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1]
355 + glFineNodesPerDir[dim - 1] - 1;
356 }
357 colRange[dim - 1] = finishingColIndices[dim - 1] - startingColIndices[dim - 1] + 1;
358 colMinGID += startingColIndices[dim - 1]*dimStride[dim - 1];
359 }
360 }
361 ghostColGIDs.resize(colRange[0]*colRange[1]*colRange[2]*BlkSize);
362 for(LO k = 0; k < colRange[2]; ++k) {
363 for(LO j = 0; j < colRange[1]; ++j) {
364 for(LO i = 0; i < colRange[0]; ++i) {
365 for(LO l = 0; l < BlkSize; ++l) {
366 ghostColGIDs[(k*colRange[1]*colRange[0] + j*colRange[0] + i)*BlkSize + l] = colMinGID
367 + (k*gFineNodesPerDir[1]*gFineNodesPerDir[0] + j*gFineNodesPerDir[0] + i)*BlkSize + l;
368 }
369 }
370 }
371 }
372
373 RCP<const Map> ghostedRowMap = Xpetra::MapFactory<LO,GO,NO>::Build(A->getRowMap()->lib(),
374 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
375 ghostRowGIDs(),
376 A->getRowMap()->getIndexBase(),
377 A->getRowMap()->getComm());
378 RCP<const Map> ghostedColMap = Xpetra::MapFactory<LO,GO,NO>::Build(A->getRowMap()->lib(),
379 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
380 ghostColGIDs(),
381 A->getRowMap()->getIndexBase(),
382 A->getRowMap()->getComm());
383 RCP<const Import> ghostImporter = Xpetra::ImportFactory<LO,GO,NO>::Build(A->getRowMap(),
384 ghostedRowMap);
385 RCP<const Matrix> Aghost = Xpetra::MatrixFactory<SC,LO,GO,NO>::Build(A, *ghostImporter,
386 ghostedRowMap,
387 ghostedRowMap);
388
389 // Create the maps and data structures for the projection matrix
390 RCP<const Map> rowMapP = A->getDomainMap();
391
392 RCP<const Map> domainMapP;
393
394 RCP<const Map> colMapP;
395
396 // At this point we need to create the column map which is a delicate operation.
397 // The entries in that map need to be ordered as follows:
398 // 1) first owned entries ordered by LID
399 // 2) second order the remaining entries by PID
400 // 3) entries with the same remote PID are ordered by GID.
401 // One piece of good news: lNumCoarseNodes is the number of ownedNodes and lNumGhostNodes
402 // is the number of remote nodes. The sorting can be limited to remote nodes
403 // as the owned ones are alreaded ordered by LID!
404
405 LO lNumGhostedNodes = ghostedCoarseNodes->GIDs.size();
406 {
407 std::vector<NodeID> colMapOrdering(lNumGhostedNodes);
408 for(LO ind = 0; ind < lNumGhostedNodes; ++ind) {
409 colMapOrdering[ind].GID = ghostedCoarseNodes->GIDs[ind];
410 if(ghostedCoarseNodes->PIDs[ind] == rowMapP->getComm()->getRank()) {
411 colMapOrdering[ind].PID = -1;
412 } else {
413 colMapOrdering[ind].PID = ghostedCoarseNodes->PIDs[ind];
414 }
415 colMapOrdering[ind].LID = ghostedCoarseNodes->LIDs[ind];
416 colMapOrdering[ind].lexiInd = ind;
417 }
418 std::sort(colMapOrdering.begin(), colMapOrdering.end(),
419 [](NodeID a, NodeID b)->bool{
420 return ( (a.PID < b.PID) || ((a.PID == b.PID) && (a.GID < b.GID)) );
421 });
422
423 colGIDs.resize(BlkSize*lNumGhostedNodes);
424 for(LO ind = 0; ind < lNumGhostedNodes; ++ind) {
425 // Save the permutation calculated to go from Lexicographic indexing to column map indexing
426 ghostedCoarseNodes->colInds[colMapOrdering[ind].lexiInd] = ind;
427 for(LO dof = 0; dof < BlkSize; ++dof) {
428 colGIDs[BlkSize*ind + dof] = BlkSize*colMapOrdering[ind].GID + dof;
429 }
430 }
431 domainMapP = Xpetra::MapFactory<LO,GO,NO>::Build(rowMapP->lib(),
432 BlkSize*gNumCoarseNodes,
433 colGIDs.view(0,BlkSize*lNumCoarseNodes),
434 rowMapP->getIndexBase(),
435 rowMapP->getComm());
436 colMapP = Xpetra::MapFactory<LO,GO,NO>::Build(rowMapP->lib(),
437 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
438 colGIDs.view(0, colGIDs.size()),
439 rowMapP->getIndexBase(),
440 rowMapP->getComm());
441 } // End of scope for colMapOrdering and colGIDs
442
443 std::vector<size_t> strideInfo(1);
444 strideInfo[0] = BlkSize;
445 RCP<const Map> stridedDomainMapP = Xpetra::StridedMapFactory<LO,GO,NO>::Build(domainMapP,
446 strideInfo);
447
448 GO gnnzP = 0;
449 LO lnnzP = 0;
450 // coarse points have one nnz per row
451 gnnzP += gNumCoarseNodes;
452 lnnzP += lNumCoarseNodes;
453 // add all other points multiplying by 2^numDimensions
454 gnnzP += (gNumFineNodes - gNumCoarseNodes)*std::pow(2, numDimensions);
455 lnnzP += (lNumFineNodes - lNumCoarseNodes)*std::pow(2, numDimensions);
456 // finally multiply by the number of dofs per node
457 gnnzP = gnnzP*BlkSize;
458 lnnzP = lnnzP*BlkSize;
459
460 // Create the matrix itself using the above maps
461 RCP<Matrix> P;
462 P = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0));
463 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
464
465 ArrayRCP<size_t> iaP;
466 ArrayRCP<LO> jaP;
467 ArrayRCP<SC> valP;
468
469 PCrs->allocateAllValues(lnnzP, iaP, jaP, valP);
470
471 ArrayView<size_t> ia = iaP();
472 ArrayView<LO> ja = jaP();
473 ArrayView<SC> val = valP();
474 ia[0] = 0;
475
476
477 LO numCoarseElements = 1;
478 Array<LO> lCoarseElementsPerDir(3);
479 for(LO dim = 0; dim < numDimensions; ++dim) {
480 lCoarseElementsPerDir[dim] = lCoarseNodesPerDir[dim];
481 if(ghostInterface[2*dim]) {++lCoarseElementsPerDir[dim];}
482 if(!ghostInterface[2*dim+1]) {--lCoarseElementsPerDir[dim];}
483 numCoarseElements = numCoarseElements*lCoarseElementsPerDir[dim];
484 }
485
486 for(LO dim = numDimensions; dim < 3; ++dim) {
487 lCoarseElementsPerDir[dim] = 1;
488 }
489
490 // Loop over the coarse elements
491 Array<int> elementFlags(3);
492 Array<LO> elemInds(3), elementNodesPerDir(3), glElementRefTuple(3);
493 Array<LO> glElementRefTupleCG(3), glElementCoarseNodeCG(8);
494 const int numCoarseNodesInElement = std::pow(2, numDimensions);
495 const int nnzPerCoarseNode = (blockStrategy == "coupled") ? BlkSize : 1;
496 const int numRowsPerPoint = BlkSize;
497 for(elemInds[2] = 0; elemInds[2] < lCoarseElementsPerDir[2]; ++elemInds[2]) {
498 for(elemInds[1] = 0; elemInds[1] < lCoarseElementsPerDir[1]; ++elemInds[1]) {
499 for(elemInds[0] = 0; elemInds[0] < lCoarseElementsPerDir[0]; ++elemInds[0]) {
500 elementFlags[0] = 0; elementFlags[1] = 0; elementFlags[2] = 0;
501 for(int dim = 0; dim < 3; ++dim) {
502 // Detect boundary conditions on the element and set corresponding flags.
503 if(elemInds[dim] == 0 && elemInds[dim] == lCoarseElementsPerDir[dim] - 1) {
504 elementFlags[dim] = boundaryFlags[dim];
505 } else if(elemInds[dim] == 0 && (boundaryFlags[dim] == 1 || boundaryFlags[dim] == 3)) {
506 elementFlags[dim] += 1;
507 } else if((elemInds[dim] == lCoarseElementsPerDir[dim] - 1)
508 && (boundaryFlags[dim] == 2 || boundaryFlags[dim] == 3)) {
509 elementFlags[dim] += 2;
510 } else {
511 elementFlags[dim] = 0;
512 }
513
514 // Compute the number of nodes in the current element.
515 if(dim < numDimensions) {
516 if((elemInds[dim] == lCoarseElementsPerDir[dim])
517 && (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim])) {
518 elementNodesPerDir[dim] = endRate[dim] + 1;
519 } else {
520 elementNodesPerDir[dim] = coarseRate[dim] + 1;
521 }
522 } else {
523 elementNodesPerDir[dim] = 1;
524 }
525
526 // Get the lowest tuple of the element using the ghosted local coordinate system
527 glElementRefTuple[dim] = elemInds[dim]*coarseRate[dim];
528 glElementRefTupleCG[dim] = elemInds[dim];
529 }
530
531 // Now get the column map indices corresponding to the dofs associated with the current
532 // element's coarse nodes.
533 for(typename Array<LO>::size_type elem = 0; elem < glElementCoarseNodeCG.size(); ++elem) {
534 glElementCoarseNodeCG[elem]
535 = glElementRefTupleCG[2]*glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0]
536 + glElementRefTupleCG[1]*glCoarseNodesPerDir[0] + glElementRefTupleCG[0];
537 }
538 glElementCoarseNodeCG[4] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
539 glElementCoarseNodeCG[5] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
540 glElementCoarseNodeCG[6] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
541 glElementCoarseNodeCG[7] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
542
543 glElementCoarseNodeCG[2] += glCoarseNodesPerDir[0];
544 glElementCoarseNodeCG[3] += glCoarseNodesPerDir[0];
545 glElementCoarseNodeCG[6] += glCoarseNodesPerDir[0];
546 glElementCoarseNodeCG[7] += glCoarseNodesPerDir[0];
547
548 glElementCoarseNodeCG[1] += 1;
549 glElementCoarseNodeCG[3] += 1;
550 glElementCoarseNodeCG[5] += 1;
551 glElementCoarseNodeCG[7] += 1;
552
553 LO numNodesInElement = elementNodesPerDir[0]*elementNodesPerDir[1]*elementNodesPerDir[2];
554 // LO elementOffset = elemInds[2]*coarseRate[2]*glFineNodesPerDir[1]*glFineNodesPerDir[0]
555 // + elemInds[1]*coarseRate[1]*glFineNodesPerDir[0] + elemInds[0]*coarseRate[0];
556
557 // Compute the element prolongator
558 Teuchos::SerialDenseMatrix<LO,SC> Pi, Pf, Pe;
559 Array<LO> dofType(numNodesInElement*BlkSize), lDofInd(numNodesInElement*BlkSize);
560 ComputeLocalEntries(Aghost, coarseRate, endRate, BlkSize, elemInds, lCoarseElementsPerDir,
561 numDimensions, glFineNodesPerDir, gFineNodesPerDir, gIndices,
562 lCoarseNodesPerDir, ghostInterface, elementFlags, stencilType,
563 blockStrategy, elementNodesPerDir, numNodesInElement, colGIDs,
564 Pi, Pf, Pe, dofType, lDofInd);
565
566 // Find ghosted LID associated with nodes in the element and eventually which of these
567 // nodes are ghosts, this information is used to fill the local prolongator.
568 Array<LO> lNodeLIDs(numNodesInElement);
569 {
570 Array<LO> lNodeTuple(3), nodeInd(3);
571 for(nodeInd[2] = 0; nodeInd[2] < elementNodesPerDir[2]; ++nodeInd[2]) {
572 for(nodeInd[1] = 0; nodeInd[1] < elementNodesPerDir[1]; ++nodeInd[1]) {
573 for(nodeInd[0] = 0; nodeInd[0] < elementNodesPerDir[0]; ++nodeInd[0]) {
574 int stencilLength = 0;
575 if((nodeInd[0] == 0 || nodeInd[0] == elementNodesPerDir[0] - 1) &&
576 (nodeInd[1] == 0 || nodeInd[1] == elementNodesPerDir[1] - 1) &&
577 (nodeInd[2] == 0 || nodeInd[2] == elementNodesPerDir[2] - 1)) {
578 stencilLength = 1;
579 } else {
580 stencilLength = std::pow(2, numDimensions);
581 }
582 LO nodeElementInd = nodeInd[2]*elementNodesPerDir[1]*elementNodesPerDir[1]
583 + nodeInd[1]*elementNodesPerDir[0] + nodeInd[0];
584 for(int dim = 0; dim < 3; ++dim) {
585 lNodeTuple[dim] = glElementRefTuple[dim] - myOffset[dim] + nodeInd[dim];
586 }
587 if(lNodeTuple[0] < 0 || lNodeTuple[1] < 0 || lNodeTuple[2] < 0 ||
588 lNodeTuple[0] > lFineNodesPerDir[0] - 1 ||
589 lNodeTuple[1] > lFineNodesPerDir[1] - 1 ||
590 lNodeTuple[2] > lFineNodesPerDir[2] - 1) {
591 // This flags the ghosts nodes used for prolongator calculation but for which
592 // we do not own the row, hence we won't fill these values on this rank.
593 lNodeLIDs[nodeElementInd] = -1;
594 } else if ((nodeInd[0] == 0 && elemInds[0] !=0) ||
595 (nodeInd[1] == 0 && elemInds[1] !=0) ||
596 (nodeInd[2] == 0 && elemInds[2] !=0)) {
597 // This flags nodes that are owned but common to two coarse elements and that
598 // were already filled by another element, we don't want to fill them twice so
599 // we skip them
600 lNodeLIDs[nodeElementInd] = -2;
601 } else {
602 // The remaining nodes are locally owned and we need to fill the coresponding
603 // rows of the prolongator
604
605 // First we need to find which row needs to be filled
606 lNodeLIDs[nodeElementInd] = lNodeTuple[2]*lFineNodesPerDir[1]*lFineNodesPerDir[0]
607 + lNodeTuple[1]*lFineNodesPerDir[0] + lNodeTuple[0];
608
609 // We also compute the row offset using arithmetic to ensure that we can loop
610 // easily over the nodes in a macro-element as well as facilitate on-node
611 // parallelization. The node serial code could be rewritten with two loops over
612 // the local part of the mesh to avoid the costly integer divisions...
613 Array<LO> refCoarsePointTuple(3);
614 for(int dim = 2; dim > -1; --dim) {
615 if(dim == 0) {
616 refCoarsePointTuple[dim] = (lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim];
617 if(myOffset[dim] == 0) {
618 ++refCoarsePointTuple[dim];
619 }
620 } else {
621 // Note: no need for magnitudeType here, just use double because these things are LO's
622 refCoarsePointTuple[dim] =
623 std::ceil(static_cast<double>(lNodeTuple[dim] + myOffset[dim])
624 / coarseRate[dim]);
625 }
626 if((lNodeTuple[dim] + myOffset[dim]) % coarseRate[dim] > 0) {break;}
627 }
628 const LO numCoarsePoints = refCoarsePointTuple[0]
629 + refCoarsePointTuple[1]*lCoarseNodesPerDir[0]
630 + refCoarsePointTuple[2]*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0];
631 const LO numFinePoints = lNodeLIDs[nodeElementInd] + 1;
632
633 // The below formula computes the rowPtr for row 'n+BlkSize' and we are about to
634 // fill row 'n' to 'n+BlkSize'.
635 size_t rowPtr = (numFinePoints - numCoarsePoints)*numRowsPerPoint
636 *numCoarseNodesInElement*nnzPerCoarseNode + numCoarsePoints*numRowsPerPoint;
637 if(dofType[nodeElementInd*BlkSize] == 0) {
638 // Check if current node is a coarse point
639 rowPtr = rowPtr - numRowsPerPoint;
640 } else {
641 rowPtr = rowPtr - numRowsPerPoint*numCoarseNodesInElement*nnzPerCoarseNode;
642 }
643
644 for(int dof = 0; dof < BlkSize; ++dof) {
645
646 // Now we loop over the stencil and fill the column indices and row values
647 int cornerInd = 0;
648 switch(dofType[nodeElementInd*BlkSize + dof]) {
649 case 0: // Corner node
650 if(nodeInd[2] == elementNodesPerDir[2] - 1) {cornerInd += 4;}
651 if(nodeInd[1] == elementNodesPerDir[1] - 1) {cornerInd += 2;}
652 if(nodeInd[0] == elementNodesPerDir[0] - 1) {cornerInd += 1;}
653 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1] = rowPtr + dof + 1;
654 ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[cornerInd]]*BlkSize + dof;
655 val[rowPtr + dof] = 1.0;
656 break;
657
658 case 1: // Edge node
659 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
660 = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
661 for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
662 if(blockStrategy == "coupled") {
663 for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
664 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
665 + ind1*BlkSize + ind2;
666 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
667 val[lRowPtr] = Pe(lDofInd[nodeElementInd*BlkSize + dof],
668 ind1*BlkSize + ind2);
669 }
670 } else if(blockStrategy == "uncoupled") {
671 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
672 + ind1;
673 ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
674 val[lRowPtr] = Pe(lDofInd[nodeElementInd*BlkSize + dof],
675 ind1*BlkSize + dof);
676 }
677 }
678 break;
679
680 case 2: // Face node
681 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
682 = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
683 for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
684 if(blockStrategy == "coupled") {
685 for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
686 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
687 + ind1*BlkSize + ind2;
688 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
689 val[lRowPtr] = Pf(lDofInd[nodeElementInd*BlkSize + dof],
690 ind1*BlkSize + ind2);
691 }
692 } else if(blockStrategy == "uncoupled") {
693 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
694 + ind1;
695 // ja [lRowPtr] = colGIDs[glElementCoarseNodeCG[ind1]*BlkSize + dof];
696 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
697 val[lRowPtr] = Pf(lDofInd[nodeElementInd*BlkSize + dof],
698 ind1*BlkSize + dof);
699 }
700 }
701 break;
702
703 case 3: // Interior node
704 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
705 = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
706 for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
707 if(blockStrategy == "coupled") {
708 for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
709 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
710 + ind1*BlkSize + ind2;
711 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
712 val[lRowPtr] = Pi(lDofInd[nodeElementInd*BlkSize + dof],
713 ind1*BlkSize + ind2);
714 }
715 } else if(blockStrategy == "uncoupled") {
716 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
717 + ind1;
718 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
719 val[lRowPtr] = Pi(lDofInd[nodeElementInd*BlkSize + dof],
720 ind1*BlkSize + dof);
721 }
722 }
723 break;
724 }
725 }
726 }
727 }
728 }
729 }
730 } // End of scopt for lNodeTuple and nodeInd
731 }
732 }
733 }
734
735 // Sort all row's column indicies and entries by LID
736 Xpetra::CrsMatrixUtils<SC,LO,GO,NO>::sortCrsEntries(ia, ja, val, rowMapP->lib());
737
738 // Set the values of the prolongation operators into the CrsMatrix P and call FillComplete
739 PCrs->setAllValues(iaP, jaP, valP);
740 PCrs->expertStaticFillComplete(domainMapP,rowMapP);
741
742 // set StridingInformation of P
743 if (A->IsView("stridedMaps") == true) {
744 P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), stridedDomainMapP);
745 } else {
746 P->CreateView("stridedMaps", P->getRangeMap(), stridedDomainMapP);
747 }
748
749 // store the transfer operator and node coordinates on coarse level
750 Set(coarseLevel, "P", P);
751 Set(coarseLevel, "coarseCoordinates", coarseCoordinates);
752 Set<Array<GO> >(coarseLevel, "gCoarseNodesPerDim", gCoarseNodesPerDir);
753 Set<Array<LO> >(coarseLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir);
754
755 }
756
757 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
759 GetGeometricData(RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> >& coordinates,
760 const Array<LO> coarseRate, const Array<GO> gFineNodesPerDir,
761 const Array<LO> lFineNodesPerDir, const LO BlkSize, Array<GO>& gIndices,
762 Array<LO>& myOffset, Array<bool>& ghostInterface, Array<LO>& endRate,
763 Array<GO>& gCoarseNodesPerDir, Array<LO>& lCoarseNodesPerDir,
764 Array<LO>& glCoarseNodesPerDir, Array<GO>& ghostGIDs, Array<GO>& coarseNodesGIDs,
765 Array<GO>& colGIDs, GO& gNumCoarseNodes, LO& lNumCoarseNodes,
766 ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes, Array<int>& boundaryFlags,
767 RCP<NodesIDs> ghostedCoarseNodes) const {
768 // This function is extracting the geometric information from the coordinates
769 // and creates the necessary data/formatting to perform locally the calculation
770 // of the pronlongator.
771 //
772 // inputs:
773
774 RCP<const Map> coordinatesMap = coordinates->getMap();
775 LO numDimensions = coordinates->getNumVectors();
776
777 // Using the coarsening rate and the fine level data,
778 // compute coarse level data
779
780 // Phase 1 //
781 // ------------------------------------------------------------------ //
782 // We first start by finding small informations on the mesh such as //
783 // the number of coarse nodes (local and global) and the number of //
784 // ghost nodes / the end rate of coarsening. //
785 // ------------------------------------------------------------------ //
786 GO minGlobalIndex = coordinatesMap->getMinGlobalIndex();
787 {
788 GO tmp;
789 gIndices[2] = minGlobalIndex / (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
790 tmp = minGlobalIndex % (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
791 gIndices[1] = tmp / gFineNodesPerDir[0];
792 gIndices[0] = tmp % gFineNodesPerDir[0];
793
794 myOffset[2] = gIndices[2] % coarseRate[2];
795 myOffset[1] = gIndices[1] % coarseRate[1];
796 myOffset[0] = gIndices[0] % coarseRate[0];
797 }
798
799 for(int dim = 0; dim < 3; ++dim) {
800 if(gIndices[dim] == 0) {
801 boundaryFlags[dim] += 1;
802 }
803 if(gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
804 boundaryFlags[dim] += 2;
805 }
806 }
807
808 // Check whether ghost nodes are needed in each direction
809 for(LO i=0; i < numDimensions; ++i) {
810 if((gIndices[i] != 0) && (gIndices[i] % coarseRate[i] > 0)) {
811 ghostInterface[2*i] = true;
812 }
813 if(((gIndices[i] + lFineNodesPerDir[i]) != gFineNodesPerDir[i])
814 && ((gIndices[i] + lFineNodesPerDir[i] - 1) % coarseRate[i] > 0)) {
815 ghostInterface[2*i + 1] = true;
816 }
817 }
818
819 for(LO i = 0; i < 3; ++i) {
820 if(i < numDimensions) {
821 lCoarseNodesPerDir[i] = (lFineNodesPerDir[i] + myOffset[i] - 1) / coarseRate[i];
822 if(myOffset[i] == 0) { ++lCoarseNodesPerDir[i]; }
823 gCoarseNodesPerDir[i] = (gFineNodesPerDir[i] - 1) / coarseRate[i];
824 endRate[i] = (gFineNodesPerDir[i] - 1) % coarseRate[i];
825 if(endRate[i] == 0) {
826 ++gCoarseNodesPerDir[i];
827 endRate[i] = coarseRate[i];
828 }
829 } else {
830 // Most quantities need to be set to 1 for extra dimensions
831 // this is rather logical, an x-y plane is like a single layer
832 // of nodes in the z direction...
833 gCoarseNodesPerDir[i] = 1;
834 lCoarseNodesPerDir[i] = 1;
835 endRate[i] = 1;
836 }
837 }
838
839 gNumCoarseNodes = gCoarseNodesPerDir[0]*gCoarseNodesPerDir[1]*gCoarseNodesPerDir[2];
840 lNumCoarseNodes = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[2];
841
842 // For each direction, determine how many ghost points are required.
843 LO lNumGhostNodes = 0;
844 Array<GO> startGhostedCoarseNode(3);
845 {
846 const int complementaryIndices[3][2] = {{1,2}, {0,2}, {0,1}};
847 LO tmp = 0;
848 for(LO i = 0; i < 3; ++i) {
849 // The first branch of this if-statement will be used if the rank contains only one layer
850 // of nodes in direction i, that layer must also coincide with the boundary of the mesh
851 // and coarseRate[i] == endRate[i]...
852 if((gIndices[i] == gFineNodesPerDir[i] - 1) && (gIndices[i] % coarseRate[i] == 0)) {
853 startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i] - 1;
854 } else {
855 startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i];
856 }
857 // First we load the number of locally owned coarse nodes
858 glCoarseNodesPerDir[i] = lCoarseNodesPerDir[i];
859
860 // Check whether a face in direction i needs ghost nodes
861 if(ghostInterface[2*i] || ghostInterface[2*i+1]) {
862 ++glCoarseNodesPerDir[i];
863 if(i == 0) {tmp = lCoarseNodesPerDir[1]*lCoarseNodesPerDir[2];}
864 if(i == 1) {tmp = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[2];}
865 if(i == 2) {tmp = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1];}
866 }
867 // If both faces in direction i need nodes, double the number of ghost nodes
868 if(ghostInterface[2*i] && ghostInterface[2*i+1]) {
869 ++glCoarseNodesPerDir[i];
870 tmp = 2*tmp;
871 }
872 lNumGhostNodes += tmp;
873
874 // The corners and edges need to be checked in 2D / 3D to add more ghosts nodes
875 for(LO j = 0; j < 2; ++j) {
876 for(LO k = 0; k < 2; ++k) {
877 // Check if two adjoining faces need ghost nodes and then add their common edge
878 if(ghostInterface[2*complementaryIndices[i][0]+j]
879 && ghostInterface[2*complementaryIndices[i][1]+k]) {
880 lNumGhostNodes += lCoarseNodesPerDir[i];
881 // Add corners if three adjoining faces need ghost nodes,
882 // but add them only once! Hence when i == 0.
883 if(ghostInterface[2*i] && (i == 0)) { lNumGhostNodes += 1; }
884 if(ghostInterface[2*i+1] && (i == 0)) { lNumGhostNodes += 1; }
885 }
886 }
887 }
888 tmp = 0;
889 }
890 } // end of scope for tmp and complementaryIndices
891
892 const LO lNumGhostedNodes = glCoarseNodesPerDir[0]*glCoarseNodesPerDir[1]
893 *glCoarseNodesPerDir[2];
894 ghostedCoarseNodes->PIDs.resize(lNumGhostedNodes);
895 ghostedCoarseNodes->LIDs.resize(lNumGhostedNodes);
896 ghostedCoarseNodes->GIDs.resize(lNumGhostedNodes);
897 ghostedCoarseNodes->coarseGIDs.resize(lNumGhostedNodes);
898 ghostedCoarseNodes->colInds.resize(lNumGhostedNodes);
899
900 // We loop over all ghosted coarse nodes by increasing global lexicographic order
901 Array<LO> coarseNodeCoarseIndices(3), coarseNodeFineIndices(3), ijk(3);
902 LO currentIndex = -1;
903 for(ijk[2] = 0; ijk[2] < glCoarseNodesPerDir[2]; ++ijk[2]) {
904 for(ijk[1] = 0; ijk[1] < glCoarseNodesPerDir[1]; ++ijk[1]) {
905 for(ijk[0] = 0; ijk[0] < glCoarseNodesPerDir[0]; ++ijk[0]) {
906 currentIndex = ijk[2]*glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0]
907 + ijk[1]*glCoarseNodesPerDir[0] + ijk[0];
908 coarseNodeCoarseIndices[0] = startGhostedCoarseNode[0] + ijk[0];
909 coarseNodeFineIndices[0] = coarseNodeCoarseIndices[0]*coarseRate[0];
910 if(coarseNodeFineIndices[0] > gFineNodesPerDir[0] - 1) {
911 coarseNodeFineIndices[0] = gFineNodesPerDir[0] - 1;
912 }
913 coarseNodeCoarseIndices[1] = startGhostedCoarseNode[1] + ijk[1];
914 coarseNodeFineIndices[1] = coarseNodeCoarseIndices[1]*coarseRate[1];
915 if(coarseNodeFineIndices[1] > gFineNodesPerDir[1] - 1) {
916 coarseNodeFineIndices[1] = gFineNodesPerDir[1] - 1;
917 }
918 coarseNodeCoarseIndices[2] = startGhostedCoarseNode[2] + ijk[2];
919 coarseNodeFineIndices[2] = coarseNodeCoarseIndices[2]*coarseRate[2];
920 if(coarseNodeFineIndices[2] > gFineNodesPerDir[2] - 1) {
921 coarseNodeFineIndices[2] = gFineNodesPerDir[2] - 1;
922 }
923 GO myGID = 0, myCoarseGID = -1;
924 GO factor[3] = {};
925 factor[2] = gFineNodesPerDir[1]*gFineNodesPerDir[0];
926 factor[1] = gFineNodesPerDir[0];
927 factor[0] = 1;
928 for(int dim = 0; dim < 3; ++dim) {
929 if(dim < numDimensions) {
930 if(gIndices[dim] - myOffset[dim] + ijk[dim]*coarseRate[dim]
931 < gFineNodesPerDir[dim] - 1) {
932 myGID += (gIndices[dim] - myOffset[dim]
933 + ijk[dim]*coarseRate[dim])*factor[dim];
934 } else {
935 myGID += (gIndices[dim] - myOffset[dim]
936 + (ijk[dim] - 1)*coarseRate[dim] + endRate[dim])*factor[dim];
937 }
938 }
939 }
940 myCoarseGID = coarseNodeCoarseIndices[0]
941 + coarseNodeCoarseIndices[1]*gCoarseNodesPerDir[0]
942 + coarseNodeCoarseIndices[2]*gCoarseNodesPerDir[1]*gCoarseNodesPerDir[0];
943 ghostedCoarseNodes->GIDs[currentIndex] = myGID;
944 ghostedCoarseNodes->coarseGIDs[currentIndex] = myCoarseGID;
945 }
946 }
947 }
948 coordinatesMap->getRemoteIndexList(ghostedCoarseNodes->GIDs(),
949 ghostedCoarseNodes->PIDs(),
950 ghostedCoarseNodes->LIDs());
951
952 // Phase 2 //
953 // ------------------------------------------------------------------ //
954 // Build a list of GIDs to import the required ghost nodes. //
955 // The ordering of the ghosts nodes will be as natural as possible, //
956 // i.e. it should follow the GID ordering of the mesh. //
957 // ------------------------------------------------------------------ //
958
959 // Saddly we have to more or less redo what was just done to figure out the value of
960 // lNumGhostNodes, there might be some optimization possibility here...
961 ghostGIDs.resize(lNumGhostNodes);
962 LO countGhosts = 0;
963 // Get the GID of the first point on the current partition.
964 GO startingGID = minGlobalIndex;
965 Array<GO> startingIndices(3);
966 // We still want ghost nodes even if have with a 0 offset,
967 // except when we are on a boundary
968 if(ghostInterface[4] && (myOffset[2] == 0)) {
969 if(gIndices[2] + coarseRate[2] > gFineNodesPerDir[2]) {
970 startingGID -= endRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
971 } else {
972 startingGID -= coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
973 }
974 } else {
975 startingGID -= myOffset[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
976 }
977 if(ghostInterface[2] && (myOffset[1] == 0)) {
978 if(gIndices[1] + coarseRate[1] > gFineNodesPerDir[1]) {
979 startingGID -= endRate[1]*gFineNodesPerDir[0];
980 } else {
981 startingGID -= coarseRate[1]*gFineNodesPerDir[0];
982 }
983 } else {
984 startingGID -= myOffset[1]*gFineNodesPerDir[0];
985 }
986 if(ghostInterface[0] && (myOffset[0] == 0)) {
987 if(gIndices[0] + coarseRate[0] > gFineNodesPerDir[0]) {
988 startingGID -= endRate[0];
989 } else {
990 startingGID -= coarseRate[0];
991 }
992 } else {
993 startingGID -= myOffset[0];
994 }
995
996 { // scope for tmp
997 GO tmp;
998 startingIndices[2] = startingGID / (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
999 tmp = startingGID % (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
1000 startingIndices[1] = tmp / gFineNodesPerDir[0];
1001 startingIndices[0] = tmp % gFineNodesPerDir[0];
1002 }
1003
1004 GO ghostOffset[3] = {0, 0, 0};
1005 LO lengthZero = lCoarseNodesPerDir[0];
1006 LO lengthOne = lCoarseNodesPerDir[1];
1007 LO lengthTwo = lCoarseNodesPerDir[2];
1008 if(ghostInterface[0]) {++lengthZero;}
1009 if(ghostInterface[1]) {++lengthZero;}
1010 if(ghostInterface[2]) {++lengthOne;}
1011 if(ghostInterface[3]) {++lengthOne;}
1012 if(ghostInterface[4]) {++lengthTwo;}
1013 if(ghostInterface[5]) {++lengthTwo;}
1014
1015 // First check the bottom face as it will have the lowest GIDs
1016 if(ghostInterface[4]) {
1017 ghostOffset[2] = startingGID;
1018 for(LO j = 0; j < lengthOne; ++j) {
1019 if( (j == lengthOne-1)
1020 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1021 ghostOffset[1] = ((j-1)*coarseRate[1] + endRate[1])*gFineNodesPerDir[0];
1022 } else {
1023 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1024 }
1025 for(LO k = 0; k < lengthZero; ++k) {
1026 if( (k == lengthZero-1)
1027 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1028 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1029 } else {
1030 ghostOffset[0] = k*coarseRate[0];
1031 }
1032 // If the partition includes a changed rate at the edge, ghost nodes need to be picked
1033 // carefully.
1034 // This if statement is repeated each time a ghost node is selected.
1035 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1036 ++countGhosts;
1037 }
1038 }
1039 }
1040
1041 // Sweep over the lCoarseNodesPerDir[2] coarse layers in direction 2 and gather necessary ghost
1042 // nodes located on these layers.
1043 for(LO i = 0; i < lengthTwo; ++i) {
1044 // Exclude the cases where ghost nodes exists on the faces in directions 2, these faces are
1045 // swept seperatly for efficiency.
1046 if( !((i == lengthTwo-1) && ghostInterface[5]) && !((i == 0) && ghostInterface[4]) ) {
1047 // Set the ghostOffset in direction 2 taking into account a possible endRate different
1048 // from the regular coarseRate.
1049 if( (i == lengthTwo-1) && !ghostInterface[5] ) {
1050 ghostOffset[2] = startingGID + ((i-1)*coarseRate[2] + endRate[2])
1051 *gFineNodesPerDir[1]*gFineNodesPerDir[0];
1052 } else {
1053 ghostOffset[2] = startingGID + i*coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1054 }
1055 for(LO j = 0; j < lengthOne; ++j) {
1056 if( (j == 0) && ghostInterface[2] ) {
1057 for(LO k = 0; k < lengthZero; ++k) {
1058 if( (k == lengthZero-1)
1059 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1060 if(k == 0) {
1061 ghostOffset[0] = endRate[0];
1062 } else {
1063 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1064 }
1065 } else {
1066 ghostOffset[0] = k*coarseRate[0];
1067 }
1068 // In this case j == 0 so ghostOffset[1] is zero and can be ignored
1069 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[0];
1070 ++countGhosts;
1071 }
1072 } else if( (j == lengthOne-1) && ghostInterface[3] ) {
1073 // Set the ghostOffset in direction 1 taking into account a possible endRate different
1074 // from the regular coarseRate.
1075 if( (j == lengthOne-1)
1076 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1077 ghostOffset[1] = ((j-1)*coarseRate[1] + endRate[1])*gFineNodesPerDir[0];
1078 } else {
1079 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1080 }
1081 for(LO k = 0; k < lengthZero; ++k) {
1082 if( (k == lengthZero-1)
1083 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1084 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1085 } else {
1086 ghostOffset[0] = k*coarseRate[0];
1087 }
1088 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1089 ++countGhosts;
1090 }
1091 } else {
1092 // Set ghostOffset[1] for side faces sweep
1093 if( (j == lengthOne-1)
1094 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1095 ghostOffset[1] = ( (j-1)*coarseRate[1] + endRate[1] )*gFineNodesPerDir[0];
1096 } else {
1097 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1098 }
1099
1100 // Set ghostOffset[0], ghostGIDs and countGhosts
1101 if(ghostInterface[0]) { // In that case ghostOffset[0]==0, so we can ignore it
1102 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1];
1103 ++countGhosts;
1104 }
1105 if(ghostInterface[1]) { // Grab ghost point at the end of direction 0.
1106 if( (startingIndices[0] + (lengthZero-1)*coarseRate[0]) > gFineNodesPerDir[0] - 1 ) {
1107 if(lengthZero > 2) {
1108 ghostOffset[0] = (lengthZero-2)*coarseRate[0] + endRate[0];
1109 } else {
1110 ghostOffset[0] = endRate[0];
1111 }
1112 } else {
1113 ghostOffset[0] = (lengthZero-1)*coarseRate[0];
1114 }
1115 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1116 ++countGhosts;
1117 }
1118 }
1119 }
1120 }
1121 }
1122
1123 // Finally check the top face
1124 if(ghostInterface[5]) {
1125 if( startingIndices[2] + (lengthTwo-1)*coarseRate[2] + 1 > gFineNodesPerDir[2] ) {
1126 ghostOffset[2] = startingGID
1127 + ((lengthTwo-2)*coarseRate[2] + endRate[2])*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1128 } else {
1129 ghostOffset[2] = startingGID
1130 + (lengthTwo-1)*coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1131 }
1132 for(LO j = 0; j < lengthOne; ++j) {
1133 if( (j == lengthOne-1)
1134 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1135 ghostOffset[1] = ( (j-1)*coarseRate[1] + endRate[1] )*gFineNodesPerDir[0];
1136 } else {
1137 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1138 }
1139 for(LO k = 0; k < lengthZero; ++k) {
1140 if( (k == lengthZero-1)
1141 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1142 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1143 } else {
1144 ghostOffset[0] = k*coarseRate[0];
1145 }
1146 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1147 ++countGhosts;
1148 }
1149 }
1150 }
1151
1152 // Phase 3 //
1153 // ------------------------------------------------------------------ //
1154 // Final phase of this function, lists are being built to create the //
1155 // column and domain maps of the projection as well as the map and //
1156 // arrays for the coarse coordinates multivector. //
1157 // ------------------------------------------------------------------ //
1158
1159 Array<GO> gCoarseNodesGIDs(lNumCoarseNodes);
1160 LO currentNode, offset2, offset1, offset0;
1161 // Find the GIDs of the coarse nodes on the partition.
1162 for(LO ind2 = 0; ind2 < lCoarseNodesPerDir[2]; ++ind2) {
1163 if(myOffset[2] == 0) {
1164 offset2 = startingIndices[2] + myOffset[2];
1165 } else {
1166 if(startingIndices[2] + endRate[2] == gFineNodesPerDir[2] - 1) {
1167 offset2 = startingIndices[2] + endRate[2];
1168 } else {
1169 offset2 = startingIndices[2] + coarseRate[2];
1170 }
1171 }
1172 if(offset2 + ind2*coarseRate[2] > gFineNodesPerDir[2] - 1) {
1173 offset2 += (ind2 - 1)*coarseRate[2] + endRate[2];
1174 } else {
1175 offset2 += ind2*coarseRate[2];
1176 }
1177 offset2 = offset2*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1178
1179 for(LO ind1 = 0; ind1 < lCoarseNodesPerDir[1]; ++ind1) {
1180 if(myOffset[1] == 0) {
1181 offset1 = startingIndices[1] + myOffset[1];
1182 } else {
1183 if(startingIndices[1] + endRate[1] == gFineNodesPerDir[1] - 1) {
1184 offset1 = startingIndices[1] + endRate[1];
1185 } else {
1186 offset1 = startingIndices[1] + coarseRate[1];
1187 }
1188 }
1189 if(offset1 + ind1*coarseRate[1] > gFineNodesPerDir[1] - 1) {
1190 offset1 += (ind1 - 1)*coarseRate[1] + endRate[1];
1191 } else {
1192 offset1 += ind1*coarseRate[1];
1193 }
1194 offset1 = offset1*gFineNodesPerDir[0];
1195 for(LO ind0 = 0; ind0 < lCoarseNodesPerDir[0]; ++ind0) {
1196 offset0 = startingIndices[0];
1197 if(myOffset[0] == 0) {
1198 offset0 += ind0*coarseRate[0];
1199 } else {
1200 offset0 += (ind0 + 1)*coarseRate[0];
1201 }
1202 if(offset0 > gFineNodesPerDir[0] - 1) {offset0 += endRate[0] - coarseRate[0];}
1203
1204 currentNode = ind2*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]
1205 + ind1*lCoarseNodesPerDir[0]
1206 + ind0;
1207 gCoarseNodesGIDs[currentNode] = offset2 + offset1 + offset0;
1208 }
1209 }
1210 }
1211
1212 // Actual loop over all the coarse/ghost nodes to find their index on the coarse mesh
1213 // and the corresponding dofs that will need to be added to colMapP.
1214 colGIDs.resize(BlkSize*(lNumCoarseNodes+lNumGhostNodes));
1215 coarseNodesGIDs.resize(lNumCoarseNodes);
1216 for(LO i = 0; i < numDimensions; ++i) {coarseNodes[i].resize(lNumCoarseNodes);}
1217 GO fineNodesPerCoarseSlab = coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1218 GO fineNodesEndCoarseSlab = endRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1219 GO fineNodesPerCoarsePlane = coarseRate[1]*gFineNodesPerDir[0];
1220 GO fineNodesEndCoarsePlane = endRate[1]*gFineNodesPerDir[0];
1221 GO coarseNodesPerCoarseLayer = gCoarseNodesPerDir[1]*gCoarseNodesPerDir[0];
1222 GO gCoarseNodeOnCoarseGridGID;
1223 LO gInd[3], lCol;
1224 Array<int> ghostPIDs (lNumGhostNodes);
1225 Array<LO> ghostLIDs (lNumGhostNodes);
1226 Array<LO> ghostPermut(lNumGhostNodes);
1227 for(LO k = 0; k < lNumGhostNodes; ++k) {ghostPermut[k] = k;}
1228 coordinatesMap->getRemoteIndexList(ghostGIDs, ghostPIDs, ghostLIDs);
1229 sh_sort_permute(ghostPIDs.begin(),ghostPIDs.end(), ghostPermut.begin(),ghostPermut.end());
1230
1231 { // scope for tmpInds, tmpVars and tmp.
1232 GO tmpInds[3], tmpVars[2];
1233 LO tmp;
1234 // Loop over the coarse nodes of the partition and add them to colGIDs
1235 // that will be used to construct the column and domain maps of P as well
1236 // as to construct the coarse coordinates map.
1237 // for(LO col = 0; col < lNumCoarseNodes; ++col) { // This should most likely be replaced
1238 // by loops of lCoarseNodesPerDir[] to simplify arithmetics
1239 LO col = 0;
1240 LO firstCoarseNodeInds[3], currentCoarseNode;
1241 for(LO dim = 0; dim < 3; ++dim) {
1242 if(myOffset[dim] == 0) {
1243 firstCoarseNodeInds[dim] = 0;
1244 } else {
1245 firstCoarseNodeInds[dim] = coarseRate[dim] - myOffset[dim];
1246 }
1247 }
1248 Array<ArrayRCP<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
1249 for(LO dim = 0; dim < numDimensions; ++dim) {fineNodes[dim] = coordinates->getData(dim);}
1250 for(LO k = 0; k < lCoarseNodesPerDir[2]; ++k) {
1251 for(LO j = 0; j < lCoarseNodesPerDir[1]; ++j) {
1252 for(LO i = 0; i < lCoarseNodesPerDir[0]; ++i) {
1253 col = k*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0] + j*lCoarseNodesPerDir[0] + i;
1254
1255 // Check for endRate
1256 currentCoarseNode = 0;
1257 if(firstCoarseNodeInds[0] + i*coarseRate[0] > lFineNodesPerDir[0] - 1) {
1258 currentCoarseNode += firstCoarseNodeInds[0] + (i-1)*coarseRate[0] + endRate[0];
1259 } else {
1260 currentCoarseNode += firstCoarseNodeInds[0] + i*coarseRate[0];
1261 }
1262 if(firstCoarseNodeInds[1] + j*coarseRate[1] > lFineNodesPerDir[1] - 1) {
1263 currentCoarseNode += (firstCoarseNodeInds[1] + (j-1)*coarseRate[1] + endRate[1])
1264 *lFineNodesPerDir[0];
1265 } else {
1266 currentCoarseNode += (firstCoarseNodeInds[1] + j*coarseRate[1])*lFineNodesPerDir[0];
1267 }
1268 if(firstCoarseNodeInds[2] + k*coarseRate[2] > lFineNodesPerDir[2] - 1) {
1269 currentCoarseNode += (firstCoarseNodeInds[2] + (k-1)*coarseRate[2] + endRate[2])
1270 *lFineNodesPerDir[1]*lFineNodesPerDir[0];
1271 } else {
1272 currentCoarseNode += (firstCoarseNodeInds[2] + k*coarseRate[2])
1273 *lFineNodesPerDir[1]*lFineNodesPerDir[0];
1274 }
1275 // Load coordinates
1276 for(LO dim = 0; dim < numDimensions; ++dim) {
1277 coarseNodes[dim][col] = fineNodes[dim][currentCoarseNode];
1278 }
1279
1280 if((endRate[2] != coarseRate[2])
1281 && (gCoarseNodesGIDs[col] > (gCoarseNodesPerDir[2] - 2)*fineNodesPerCoarseSlab
1282 + fineNodesEndCoarseSlab - 1)) {
1283 tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab + 1;
1284 tmpVars[0] = gCoarseNodesGIDs[col] - (tmpInds[2] - 1)*fineNodesPerCoarseSlab
1285 - fineNodesEndCoarseSlab;
1286 } else {
1287 tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab;
1288 tmpVars[0] = gCoarseNodesGIDs[col] % fineNodesPerCoarseSlab;
1289 }
1290 if((endRate[1] != coarseRate[1])
1291 && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2)*fineNodesPerCoarsePlane
1292 + fineNodesEndCoarsePlane - 1)) {
1293 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1294 tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1)*fineNodesPerCoarsePlane
1295 - fineNodesEndCoarsePlane;
1296 } else {
1297 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1298 tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1299 }
1300 if(tmpVars[1] == gFineNodesPerDir[0] - 1) {
1301 tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1302 } else {
1303 tmpInds[0] = tmpVars[1] / coarseRate[0];
1304 }
1305 gInd[2] = col / (lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]);
1306 tmp = col % (lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]);
1307 gInd[1] = tmp / lCoarseNodesPerDir[0];
1308 gInd[0] = tmp % lCoarseNodesPerDir[0];
1309 lCol = gInd[2]*(lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0])
1310 + gInd[1]*lCoarseNodesPerDir[0] + gInd[0];
1311 gCoarseNodeOnCoarseGridGID = tmpInds[2]*coarseNodesPerCoarseLayer
1312 + tmpInds[1]*gCoarseNodesPerDir[0] + tmpInds[0];
1313 coarseNodesGIDs[lCol] = gCoarseNodeOnCoarseGridGID;
1314 for(LO dof = 0; dof < BlkSize; ++dof) {
1315 colGIDs[BlkSize*lCol + dof] = BlkSize*gCoarseNodeOnCoarseGridGID + dof;
1316 }
1317 }
1318 }
1319 }
1320 // Now loop over the ghost nodes of the partition to add them to colGIDs
1321 // since they will need to be included in the column map of P
1322 for(col = lNumCoarseNodes; col < lNumCoarseNodes + lNumGhostNodes; ++col) {
1323 if((endRate[2] != coarseRate[2])
1324 && (ghostGIDs[ghostPermut[col - lNumCoarseNodes]] > (gCoarseNodesPerDir[2] - 2)
1325 *fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1326 tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab + 1;
1327 tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]]
1328 - (tmpInds[2] - 1)*fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1329 } else {
1330 tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab;
1331 tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] % fineNodesPerCoarseSlab;
1332 }
1333 if((endRate[1] != coarseRate[1])
1334 && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2)*fineNodesPerCoarsePlane
1335 + fineNodesEndCoarsePlane - 1)) {
1336 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1337 tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1)*fineNodesPerCoarsePlane
1338 - fineNodesEndCoarsePlane;
1339 } else {
1340 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1341 tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1342 }
1343 if(tmpVars[1] == gFineNodesPerDir[0] - 1) {
1344 tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1345 } else {
1346 tmpInds[0] = tmpVars[1] / coarseRate[0];
1347 }
1348 gCoarseNodeOnCoarseGridGID = tmpInds[2]*coarseNodesPerCoarseLayer
1349 + tmpInds[1]*gCoarseNodesPerDir[0] + tmpInds[0];
1350 for(LO dof = 0; dof < BlkSize; ++dof) {
1351 colGIDs[BlkSize*col + dof] = BlkSize*gCoarseNodeOnCoarseGridGID + dof;
1352 }
1353 }
1354 } // End of scope for tmpInds, tmpVars and tmp
1355
1356 } // GetGeometricData()
1357
1358 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1360 ComputeLocalEntries(const RCP<const Matrix>& Aghost, const Array<LO> coarseRate,
1361 const Array<LO> /* endRate */, const LO BlkSize, const Array<LO> elemInds,
1362 const Array<LO> /* lCoarseElementsPerDir */, const LO numDimensions,
1363 const Array<LO> lFineNodesPerDir, const Array<GO> /* gFineNodesPerDir */,
1364 const Array<GO> /* gIndices */, const Array<LO> /* lCoarseNodesPerDir */,
1365 const Array<bool> ghostInterface, const Array<int> elementFlags,
1366 const std::string stencilType, const std::string /* blockStrategy */,
1367 const Array<LO> elementNodesPerDir, const LO numNodesInElement,
1368 const Array<GO> /* colGIDs */,
1369 Teuchos::SerialDenseMatrix<LO,SC>& Pi, Teuchos::SerialDenseMatrix<LO,SC>& Pf,
1370 Teuchos::SerialDenseMatrix<LO,SC>& Pe, Array<LO>& dofType,
1371 Array<LO>& lDofInd) const {
1372
1373 // First extract data from Aghost and move it to the corresponding dense matrix
1374 // This step requires to compute the number of nodes (resp dofs) in the current
1375 // coarse element, then allocate storage for local dense matrices, finally loop
1376 // over the dofs and extract the corresponding row in Aghost before dispactching
1377 // its data to the dense matrices.
1378 // At the same time we want to compute a mapping from the element indexing to
1379 // group indexing. The groups are the following: corner, edge, face and interior
1380 // nodes. We uses these groups to operate on the dense matrices but need to
1381 // store the nodes in their original order after groupd operations are completed.
1382 LO countInterior=0, countFace=0, countEdge=0, countCorner =0;
1383 Array<LO> collapseDir(numNodesInElement*BlkSize);
1384 for(LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1385 for(LO je = 0; je < elementNodesPerDir[1]; ++je) {
1386 for(LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1387 // Check for corner node
1388 if((ke == 0 || ke == elementNodesPerDir[2]-1)
1389 && (je == 0 || je == elementNodesPerDir[1]-1)
1390 && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1391 for(LO dof = 0; dof < BlkSize; ++dof) {
1392 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1393 + je*elementNodesPerDir[0] + ie) + dof] = 0;
1394 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1395 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countCorner + dof;
1396 }
1397 ++countCorner;
1398
1399 // Check for edge node
1400 } else if (((ke == 0 || ke == elementNodesPerDir[2]-1)
1401 && (je == 0 || je == elementNodesPerDir[1]-1))
1402 || ((ke == 0 || ke == elementNodesPerDir[2]-1)
1403 && (ie == 0 || ie == elementNodesPerDir[0]-1))
1404 || ((je == 0 || je == elementNodesPerDir[1]-1)
1405 && (ie == 0 || ie == elementNodesPerDir[0]-1))) {
1406 for(LO dof = 0; dof < BlkSize; ++dof) {
1407 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1408 + je*elementNodesPerDir[0] + ie) + dof] = 1;
1409 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1410 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countEdge + dof;
1411 if((ke == 0 || ke == elementNodesPerDir[2]-1)
1412 && (je == 0 || je == elementNodesPerDir[1]-1)) {
1413 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1414 + je*elementNodesPerDir[0] + ie) + dof] = 0;
1415 } else if((ke == 0 || ke == elementNodesPerDir[2]-1)
1416 && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1417 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1418 + je*elementNodesPerDir[0] + ie) + dof] = 1;
1419 } else if((je == 0 || je == elementNodesPerDir[1]-1
1420 ) && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1421 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1422 + je*elementNodesPerDir[0] + ie) + dof] = 2;
1423 }
1424 }
1425 ++countEdge;
1426
1427 // Check for face node
1428 } else if ((ke == 0 || ke == elementNodesPerDir[2]-1)
1429 || (je == 0 || je == elementNodesPerDir[1]-1)
1430 || (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1431 for(LO dof = 0; dof < BlkSize; ++dof) {
1432 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1433 + je*elementNodesPerDir[0] + ie) + dof] = 2;
1434 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1435 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countFace + dof;
1436 if(ke == 0 || ke == elementNodesPerDir[2]-1) {
1437 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1438 + je*elementNodesPerDir[0] + ie) + dof] = 2;
1439 } else if(je == 0 || je == elementNodesPerDir[1]-1) {
1440 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1441 + je*elementNodesPerDir[0] + ie) + dof] = 1;
1442 } else if(ie == 0 || ie == elementNodesPerDir[0]-1) {
1443 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1444 + je*elementNodesPerDir[0] + ie) + dof] = 0;
1445 }
1446 }
1447 ++countFace;
1448
1449 // Otherwise it is an interior node
1450 } else {
1451 for(LO dof = 0; dof < BlkSize; ++dof) {
1452 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1453 + je*elementNodesPerDir[0] + ie) + dof] = 3;
1454 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1455 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countInterior +dof;
1456 }
1457 ++countInterior;
1458 }
1459 }
1460 }
1461 }
1462
1463 LO numInteriorNodes = 0, numFaceNodes = 0, numEdgeNodes = 0, numCornerNodes = 8;
1464 numInteriorNodes = (elementNodesPerDir[0] - 2)*(elementNodesPerDir[1] - 2)
1465 *(elementNodesPerDir[2] - 2);
1466 numFaceNodes = 2*(elementNodesPerDir[0] - 2)*(elementNodesPerDir[1] - 2)
1467 + 2*(elementNodesPerDir[0] - 2)*(elementNodesPerDir[2] - 2)
1468 + 2*(elementNodesPerDir[1] - 2)*(elementNodesPerDir[2] - 2);
1469 numEdgeNodes = 4*(elementNodesPerDir[0] - 2) + 4*(elementNodesPerDir[1] - 2)
1470 + 4*(elementNodesPerDir[2] - 2);
1471 // Diagonal blocks
1472 Teuchos::SerialDenseMatrix<LO,SC> Aii(BlkSize*numInteriorNodes, BlkSize*numInteriorNodes);
1473 Teuchos::SerialDenseMatrix<LO,SC> Aff(BlkSize*numFaceNodes, BlkSize*numFaceNodes);
1474 Teuchos::SerialDenseMatrix<LO,SC> Aee(BlkSize*numEdgeNodes, BlkSize*numEdgeNodes);
1475 // Upper triangular blocks
1476 Teuchos::SerialDenseMatrix<LO,SC> Aif(BlkSize*numInteriorNodes, BlkSize*numFaceNodes);
1477 Teuchos::SerialDenseMatrix<LO,SC> Aie(BlkSize*numInteriorNodes, BlkSize*numEdgeNodes);
1478 Teuchos::SerialDenseMatrix<LO,SC> Afe(BlkSize*numFaceNodes, BlkSize*numEdgeNodes);
1479 // Coarse nodes "right hand sides" and local prolongators
1480 Teuchos::SerialDenseMatrix<LO,SC> Aic(BlkSize*numInteriorNodes, BlkSize*numCornerNodes);
1481 Teuchos::SerialDenseMatrix<LO,SC> Afc(BlkSize*numFaceNodes, BlkSize*numCornerNodes);
1482 Teuchos::SerialDenseMatrix<LO,SC> Aec(BlkSize*numEdgeNodes, BlkSize*numCornerNodes);
1483 Pi.shape( BlkSize*numInteriorNodes, BlkSize*numCornerNodes);
1484 Pf.shape( BlkSize*numFaceNodes, BlkSize*numCornerNodes);
1485 Pe.shape( BlkSize*numEdgeNodes, BlkSize*numCornerNodes);
1486
1487 ArrayView<const LO> rowIndices;
1488 ArrayView<const SC> rowValues;
1489 LO idof, iInd, jInd;
1490 int iType = 0, jType = 0;
1491 int orientation = -1;
1492 int collapseFlags[3] = {};
1493 Array<SC> stencil((std::pow(3,numDimensions))*BlkSize);
1494 // LBV Note: we could skip the extraction of rows corresponding to coarse nodes
1495 // we might want to fuse the first three loops and do integer arithmetic
1496 // to have more optimization during compilation...
1497 for(LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1498 for(LO je = 0; je < elementNodesPerDir[1]; ++je) {
1499 for(LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1500 collapseFlags[0] = 0; collapseFlags[1] = 0; collapseFlags[2] = 0;
1501 if((elementFlags[0] == 1 || elementFlags[0] == 3) && ie == 0) {
1502 collapseFlags[0] += 1;
1503 }
1504 if((elementFlags[0] == 2 || elementFlags[0] == 3) && ie == elementNodesPerDir[0] - 1) {
1505 collapseFlags[0] += 2;
1506 }
1507 if((elementFlags[1] == 1 || elementFlags[1] == 3) && je == 0) {
1508 collapseFlags[1] += 1;
1509 }
1510 if((elementFlags[1] == 2 || elementFlags[1] == 3) && je == elementNodesPerDir[1] - 1) {
1511 collapseFlags[1] += 2;
1512 }
1513 if((elementFlags[2] == 1 || elementFlags[2] == 3) && ke == 0) {
1514 collapseFlags[2] += 1;
1515 }
1516 if((elementFlags[2] == 2 || elementFlags[2] == 3) && ke == elementNodesPerDir[2] - 1) {
1517 collapseFlags[2] += 2;
1518 }
1519
1520 // Based on (ie, je, ke) we detect the type of node we are dealing with
1521 GetNodeInfo(ie, je, ke, elementNodesPerDir, &iType, iInd, &orientation);
1522 for(LO dof0 = 0; dof0 < BlkSize; ++dof0) {
1523 // Compute the dof index for each dof at point (ie, je, ke)
1524 idof = ((elemInds[2]*coarseRate[2] + ke)*lFineNodesPerDir[1]*lFineNodesPerDir[0]
1525 + (elemInds[1]*coarseRate[1] + je)*lFineNodesPerDir[0]
1526 + elemInds[0]*coarseRate[0] + ie)*BlkSize + dof0;
1527 Aghost->getLocalRowView(idof, rowIndices, rowValues);
1528 FormatStencil(BlkSize, ghostInterface, ie, je, ke, rowValues,
1529 elementNodesPerDir, collapseFlags, stencilType, stencil);
1530 LO io, jo, ko;
1531 if(iType == 3) {// interior node, no stencil collapse needed
1532 for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1533 // Find (if, jf, kf) the indices associated with the interacting node
1534 ko = ke + (interactingNode / 9 - 1);
1535 {
1536 LO tmp = interactingNode % 9;
1537 jo = je + (tmp / 3 - 1);
1538 io = ie + (tmp % 3 - 1);
1539 int dummy;
1540 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1541 }
1542 if((io > -1 && io < elementNodesPerDir[0]) &&
1543 (jo > -1 && jo < elementNodesPerDir[1]) &&
1544 (ko > -1 && ko < elementNodesPerDir[2])) {
1545 for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1546 if (jType == 3) {
1547 Aii(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1548 stencil[interactingNode*BlkSize + dof1];
1549 } else if(jType == 2) {
1550 Aif(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1551 stencil[interactingNode*BlkSize + dof1];
1552 } else if(jType == 1) {
1553 Aie(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1554 stencil[interactingNode*BlkSize + dof1];
1555 } else if(jType == 0) {
1556 Aic(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1557 -stencil[interactingNode*BlkSize + dof1];
1558 }
1559 }
1560 }
1561 }
1562 } else if(iType == 2) {// Face node, collapse stencil along face normal (*orientation)
1563 CollapseStencil(2, orientation, collapseFlags, stencil);
1564 for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1565 // Find (if, jf, kf) the indices associated with the interacting node
1566 ko = ke + (interactingNode / 9 - 1);
1567 {
1568 LO tmp = interactingNode % 9;
1569 jo = je + (tmp / 3 - 1);
1570 io = ie + (tmp % 3 - 1);
1571 int dummy;
1572 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1573 }
1574 if((io > -1 && io < elementNodesPerDir[0]) &&
1575 (jo > -1 && jo < elementNodesPerDir[1]) &&
1576 (ko > -1 && ko < elementNodesPerDir[2])) {
1577 for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1578 if(jType == 2) {
1579 Aff(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1580 stencil[interactingNode*BlkSize + dof1];
1581 } else if(jType == 1) {
1582 Afe(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1583 stencil[interactingNode*BlkSize + dof1];
1584 } else if(jType == 0) {
1585 Afc(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1586 -stencil[interactingNode*BlkSize + dof1];
1587 }
1588 }
1589 }
1590 }
1591 } else if(iType == 1) {// Edge node, collapse stencil perpendicular to edge direction
1592 CollapseStencil(1, orientation, collapseFlags, stencil);
1593 for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1594 // Find (if, jf, kf) the indices associated with the interacting node
1595 ko = ke + (interactingNode / 9 - 1);
1596 {
1597 LO tmp = interactingNode % 9;
1598 jo = je + (tmp / 3 - 1);
1599 io = ie + (tmp % 3 - 1);
1600 int dummy;
1601 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1602 }
1603 if((io > -1 && io < elementNodesPerDir[0]) &&
1604 (jo > -1 && jo < elementNodesPerDir[1]) &&
1605 (ko > -1 && ko < elementNodesPerDir[2])) {
1606 for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1607 if(jType == 1) {
1608 Aee(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1609 stencil[interactingNode*BlkSize + dof1];
1610 } else if(jType == 0) {
1611 Aec(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1612 -stencil[interactingNode*BlkSize + dof1];
1613 }
1614 } // Check if interacting node is in element or not
1615 } // Pc is the identity so no need to treat iType == 0
1616 } // Loop over interacting nodes within a row
1617 } // Check for current node type
1618 } // Loop over degrees of freedom associated to a given node
1619 } // Loop over i
1620 } // Loop over j
1621 } // Loop over k
1622
1623 // Compute the projection operators for edge and interior nodes
1624 //
1625 // [P_i] = [A_{ii} & A_{if} & A_{ie}]^{-1} [A_{ic}]
1626 // [P_f] = [ 0 & A_{ff} & A_{fe}] [A_{fc}]
1627 // [P_e] = [ 0 & 0 & A_{ee}] [A_{ec}]
1628 // [P_c] = I_c
1629 //
1630 { // Compute P_e
1631 // We need to solve for P_e in: A_{ee}*P_e = A_{fc}
1632 // So we simple do P_e = A_{ee}^{-1)*A_{ec}
1633 Teuchos::SerialDenseSolver<LO,SC> problem;
1634 problem.setMatrix(Teuchos::rcp(&Aee, false));
1635 problem.setVectors(Teuchos::rcp(&Pe, false), Teuchos::rcp(&Aec, false));
1636 problem.factorWithEquilibration(true);
1637 problem.solve();
1638 problem.unequilibrateLHS();
1639 }
1640
1641 { // Compute P_f
1642 // We need to solve for P_f in: A_{ff}*P_f + A_{fe}*P_e = A_{fc}
1643 // Step one: A_{fc} = 1.0*A_{fc} + (-1.0)*A_{fe}*P_e
1644 Afc.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Afe, Pe, 1.0);
1645 // Step two: P_f = A_{ff}^{-1}*A_{fc}
1646 Teuchos::SerialDenseSolver<LO,SC> problem;
1647 problem.setMatrix(Teuchos::rcp(&Aff, false));
1648 problem.setVectors(Teuchos::rcp(&Pf, false), Teuchos::rcp(&Afc, false));
1649 problem.factorWithEquilibration(true);
1650 problem.solve();
1651 problem.unequilibrateLHS();
1652 }
1653
1654 { // Compute P_i
1655 // We need to solve for P_i in: A_{ii}*P_i + A_{if}*P_f + A_{ie}*P_e = A_{ic}
1656 // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{ie}*P_e
1657 Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aie, Pe, 1.0);
1658 // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{if}*P_f
1659 Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aif, Pf, 1.0);
1660 // Step two: P_i = A_{ii}^{-1}*A_{ic}
1661 Teuchos::SerialDenseSolver<LO,SC> problem;
1662 problem.setMatrix(Teuchos::rcp(&Aii, false));
1663 problem.setVectors(Teuchos::rcp(&Pi, false), Teuchos::rcp(&Aic, false));
1664 problem.factorWithEquilibration(true);
1665 problem.solve();
1666 problem.unequilibrateLHS();
1667 }
1668
1669 } // ComputeLocalEntries()
1670
1671 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1673 CollapseStencil(const int type, const int orientation, const int /* collapseFlags */[3],
1674 Array<SC>& stencil) const {
1675
1676 if(type == 2) {// Face stencil collapse
1677 // Let's hope things vectorize well inside this if statement
1678 if(orientation == 0) {
1679 for(LO i = 0; i < 9; ++i) {
1680 stencil[3*i + 1] = stencil[3*i] + stencil[3*i + 1] + stencil[3*i + 2];
1681 stencil[3*i] = 0;
1682 stencil[3*i + 2] = 0;
1683 }
1684 } else if(orientation == 1) {
1685 for(LO i = 0; i < 3; ++i) {
1686 stencil[9*i + 3] = stencil[9*i + 0] + stencil[9*i + 3] + stencil[9*i + 6];
1687 stencil[9*i + 0] = 0;
1688 stencil[9*i + 6] = 0;
1689 stencil[9*i + 4] = stencil[9*i + 1] + stencil[9*i + 4] + stencil[9*i + 7];
1690 stencil[9*i + 1] = 0;
1691 stencil[9*i + 7] = 0;
1692 stencil[9*i + 5] = stencil[9*i + 2] + stencil[9*i + 5] + stencil[9*i + 8];
1693 stencil[9*i + 2] = 0;
1694 stencil[9*i + 8] = 0;
1695 }
1696 } else if(orientation == 2) {
1697 for(LO i = 0; i < 9; ++i) {
1698 stencil[i + 9] = stencil[i] + stencil[i + 9] + stencil[i + 18];
1699 stencil[i] = 0;
1700 stencil[i + 18] = 0;
1701 }
1702 }
1703 } else if(type == 1) {// Edge stencil collapse
1704 SC tmp1 = 0, tmp2 = 0, tmp3 = 0;
1705 if(orientation == 0) {
1706 for(LO i = 0; i < 9; ++i) {
1707 tmp1 += stencil[0 + 3*i];
1708 stencil[0 + 3*i] = 0;
1709 tmp2 += stencil[1 + 3*i];
1710 stencil[1 + 3*i] = 0;
1711 tmp3 += stencil[2 + 3*i];
1712 stencil[2 + 3*i] = 0;
1713 }
1714 stencil[12] = tmp1;
1715 stencil[13] = tmp2;
1716 stencil[14] = tmp3;
1717 } else if(orientation == 1) {
1718 for(LO i = 0; i < 3; ++i) {
1719 tmp1 += stencil[0 + i] + stencil[9 + i] + stencil[18 + i];
1720 stencil[ 0 + i] = 0;
1721 stencil[ 9 + i] = 0;
1722 stencil[18 + i] = 0;
1723 tmp2 += stencil[3 + i] + stencil[12 + i] + stencil[21 + i];
1724 stencil[ 3 + i] = 0;
1725 stencil[12 + i] = 0;
1726 stencil[21 + i] = 0;
1727 tmp3 += stencil[6 + i] + stencil[15 + i] + stencil[24 + i];
1728 stencil[ 6 + i] = 0;
1729 stencil[15 + i] = 0;
1730 stencil[24 + i] = 0;
1731 }
1732 stencil[10] = tmp1;
1733 stencil[13] = tmp2;
1734 stencil[16] = tmp3;
1735 } else if(orientation == 2) {
1736 for(LO i = 0; i < 9; ++i) {
1737 tmp1 += stencil[i];
1738 stencil[i] = 0;
1739 tmp2 += stencil[i + 9];
1740 stencil[i + 9] = 0;
1741 tmp3 += stencil[i + 18];
1742 stencil[i + 18] = 0;
1743 }
1744 stencil[ 4] = tmp1;
1745 stencil[13] = tmp2;
1746 stencil[22] = tmp3;
1747 }
1748 }
1749 } // CollapseStencil
1750
1751 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1753 FormatStencil(const LO BlkSize, const Array<bool> /* ghostInterface */, const LO /* ie */, const LO /* je */,
1754 const LO /* ke */, const ArrayView<const SC> rowValues,const Array<LO> /* elementNodesPerDir */,
1755 const int collapseFlags[3], const std::string stencilType, Array<SC>& stencil)
1756 const {
1757
1758 if(stencilType == "reduced") {
1759 Array<int> fullStencilInds(7);
1760 fullStencilInds[0] = 4; fullStencilInds[1] = 10; fullStencilInds[2] = 12;
1761 fullStencilInds[3] = 13; fullStencilInds[4] = 14; fullStencilInds[5] = 16;
1762 fullStencilInds[6] = 22;
1763
1764 // Create a mask array to automate mesh boundary processing
1765 Array<int> mask(7);
1766 int stencilSize = rowValues.size();
1767 if(collapseFlags[0] + collapseFlags[1] + collapseFlags[2] > 0) {
1768 if(stencilSize == 1) {
1769 // The assumption is made that if only one non-zero exists in the row
1770 // then this represent a Dirichlet BC being enforced.
1771 // One might want to zero out the corresponding row in the prolongator.
1772 mask[0] = 1; mask[1] = 1; mask[2] = 1;
1773 mask[4] = 1; mask[5] = 1; mask[6] = 1;
1774 } else {
1775 // Here we are looking at Neumann like BC where only a flux is prescribed
1776 // and less non-zeros will be present in the corresponding row.
1777 if(collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1778 mask[0] = 1;
1779 }
1780 if(collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1781 mask[6] = 1;
1782 }
1783 if(collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1784 mask[1] = 1;
1785 }
1786 if(collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1787 mask[5] = 1;
1788 }
1789 if(collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1790 mask[2] = 1;
1791 }
1792 if(collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1793 mask[4] = 1;
1794 }
1795 }
1796 }
1797
1798 int offset = 0;
1799 for(int ind = 0; ind < 7; ++ind) {
1800 if(mask[ind] == 1) {
1801 for(int dof = 0; dof < BlkSize; ++dof) {
1802 stencil[BlkSize*fullStencilInds[ind] + dof] = 0.0;
1803 }
1804 ++offset; // The offset is used to stay within rowValues bounds
1805 } else {
1806 for(int dof = 0; dof < BlkSize; ++dof) {
1807 stencil[BlkSize*fullStencilInds[ind] + dof] = rowValues[BlkSize*(ind - offset) + dof];
1808 }
1809 }
1810 }
1811 } else if(stencilType == "full") {
1812 // Create a mask array to automate mesh boundary processing
1813 Array<int> mask(27);
1814 if(collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1815 for(int ind = 0; ind < 9; ++ind) {
1816 mask[ind] = 1;
1817 }
1818 }
1819 if(collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1820 for(int ind = 0; ind < 9; ++ind) {
1821 mask[18 + ind] = 1;
1822 }
1823 }
1824 if(collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1825 for(int ind1 = 0; ind1 < 3; ++ind1) {
1826 for(int ind2 = 0; ind2 < 3; ++ind2) {
1827 mask[ind1*9 + ind2] = 1;
1828 }
1829 }
1830 }
1831 if(collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1832 for(int ind1 = 0; ind1 < 3; ++ind1) {
1833 for(int ind2 = 0; ind2 < 3; ++ind2) {
1834 mask[ind1*9 + ind2 + 6] = 1;
1835 }
1836 }
1837 }
1838 if(collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1839 for(int ind = 0; ind < 9; ++ind) {
1840 mask[3*ind] = 1;
1841 }
1842 }
1843 if(collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1844 for(int ind = 0; ind < 9; ++ind) {
1845 mask[3*ind + 2] = 1;
1846 }
1847 }
1848
1849 // Now the stencil is extracted and formated
1850 int offset = 0;
1851 for(LO ind = 0; ind < 27; ++ind) {
1852 if(mask[ind] == 0) {
1853 for(int dof = 0; dof < BlkSize; ++dof) {
1854 stencil[BlkSize*ind + dof] = 0.0;
1855 }
1856 ++offset; // The offset is used to stay within rowValues bounds
1857 } else {
1858 for(int dof = 0; dof < BlkSize; ++dof) {
1859 stencil[BlkSize*ind + dof] = rowValues[BlkSize*(ind - offset) + dof];
1860 }
1861 }
1862 }
1863 } // stencilTpye
1864 } // FormatStencil()
1865
1866 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1868 const LO ie, const LO je, const LO ke,
1869 const Array<LO> elementNodesPerDir,
1870 int* type, LO& ind, int* orientation) const {
1871
1872 // Initialize flags with -1 to be able to catch issues easily
1873 *type = -1, ind = 0, *orientation = -1;
1874 if((ke == 0 || ke == elementNodesPerDir[2]-1)
1875 && (je == 0 || je == elementNodesPerDir[1]-1)
1876 && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1877 // Corner node
1878 *type = 0;
1879 ind = 4*ke / (elementNodesPerDir[2]-1) + 2*je / (elementNodesPerDir[1]-1)
1880 + ie / (elementNodesPerDir[0]-1);
1881 } else if(((ke == 0 || ke == elementNodesPerDir[2]-1)
1882 && (je == 0 || je == elementNodesPerDir[1]-1))
1883 || ((ke == 0 || ke == elementNodesPerDir[2]-1)
1884 && (ie == 0 || ie == elementNodesPerDir[0]-1))
1885 || ((je == 0 || je == elementNodesPerDir[1]-1)
1886 && (ie == 0 || ie == elementNodesPerDir[0]-1))) {
1887 // Edge node
1888 *type = 1;
1889 if(ke > 0) {ind += 2*(elementNodesPerDir[0] - 2 + elementNodesPerDir[1] - 2);}
1890 if(ke == elementNodesPerDir[2] - 1) {ind += 4*(elementNodesPerDir[2] - 2);}
1891 if((ke == 0) || (ke == elementNodesPerDir[2] - 1)) { // je or ie edges
1892 if(je == 0) {// jlo ie edge
1893 *orientation = 0;
1894 ind += ie - 1;
1895 } else if(je == elementNodesPerDir[1] - 1) {// jhi ie edge
1896 *orientation = 0;
1897 ind += 2*(elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1898 } else {// ilo or ihi je edge
1899 *orientation = 1;
1900 ind += elementNodesPerDir[0] - 2 + 2*(je - 1) + ie / (elementNodesPerDir[0] - 1);
1901 }
1902 } else {// ke edges
1903 *orientation = 2;
1904 ind += 4*(ke - 1) + 2*(je/(elementNodesPerDir[1] - 1)) + ie / (elementNodesPerDir[0] - 1);
1905 }
1906 } else if ((ke == 0 || ke == elementNodesPerDir[2]-1)
1907 || (je == 0 || je == elementNodesPerDir[1]-1)
1908 || (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1909 // Face node
1910 *type = 2;
1911 if(ke == 0) {// current node is on "bottom" face
1912 *orientation = 2;
1913 ind = (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1914 } else {
1915 // add nodes from "bottom" face
1916 ind += (elementNodesPerDir[1] - 2)*(elementNodesPerDir[0] - 2);
1917 // add nodes from side faces
1918 ind += 2*(ke - 1)*(elementNodesPerDir[1] - 2 + elementNodesPerDir[0] - 2);
1919 if(ke == elementNodesPerDir[2]-1) {// current node is on "top" face
1920 *orientation = 2;
1921 ind += (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1922 } else {// current node is on a side face
1923 if(je == 0) {
1924 *orientation = 1;
1925 ind += ie - 1;
1926 } else if(je == elementNodesPerDir[1] - 1) {
1927 *orientation = 1;
1928 ind += 2*(elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1929 } else {
1930 *orientation = 0;
1931 ind += elementNodesPerDir[0] - 2 + 2*(je - 1) + ie / (elementNodesPerDir[0]-1);
1932 }
1933 }
1934 }
1935 } else {
1936 // Interior node
1937 *type = 3;
1938 ind = (ke - 1)*(elementNodesPerDir[1] - 2)*(elementNodesPerDir[0] - 2)
1939 + (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1940 }
1941 } // GetNodeInfo()
1942
1943 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1945 const typename Teuchos::Array<LocalOrdinal>::iterator& first1,
1946 const typename Teuchos::Array<LocalOrdinal>::iterator& last1,
1947 const typename Teuchos::Array<LocalOrdinal>::iterator& first2,
1948 const typename Teuchos::Array<LocalOrdinal>::iterator& /* last2 */) const
1949 {
1950 typedef typename std::iterator_traits<typename Teuchos::Array<LocalOrdinal>::iterator>::difference_type DT;
1951 DT n = last1 - first1;
1952 DT m = n / 2;
1953 DT z = Teuchos::OrdinalTraits<DT>::zero();
1954 while (m > z)
1955 {
1956 DT max = n - m;
1957 for (DT j = 0; j < max; j++)
1958 {
1959 for (DT k = j; k >= 0; k-=m)
1960 {
1961 if (first1[first2[k+m]] >= first1[first2[k]])
1962 break;
1963 std::swap(first2[k+m], first2[k]);
1964 }
1965 }
1966 m = m/2;
1967 }
1968 }
1969
1970} //namespace MueLu
1971
1972#define MUELU_BLACKBOXPFACTORY_SHORT
1973#endif // MUELU_BLACKBOXPFACTORY_DEF_HPP
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void FormatStencil(const LO BlkSize, const Array< bool > ghostInterface, const LO ie, const LO je, const LO ke, const ArrayView< const SC > rowValues, const Array< LO > elementNodesPerDir, const int collapseFlags[3], const std::string stencilType, Array< SC > &stencil) const
void GetNodeInfo(const LO ie, const LO je, const LO ke, const Array< LO > elementNodesPerDir, int *type, LO &ind, int *orientation) const
void sh_sort_permute(const typename Teuchos::Array< LocalOrdinal >::iterator &first1, const typename Teuchos::Array< LocalOrdinal >::iterator &last1, const typename Teuchos::Array< LocalOrdinal >::iterator &first2, const typename Teuchos::Array< LocalOrdinal >::iterator &last2) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void ComputeLocalEntries(const RCP< const Matrix > &Aghost, const Array< LO > coarseRate, const Array< LO > endRate, const LO BlkSize, const Array< LO > elemInds, const Array< LO > lCoarseElementsPerDir, const LO numDimensions, const Array< LO > lFineNodesPerDir, const Array< GO > gFineNodesPerDir, const Array< GO > gIndices, const Array< LO > lCoarseNodesPerDir, const Array< bool > ghostInterface, const Array< int > elementFlags, const std::string stencilType, const std::string blockStrategy, const Array< LO > elementNodesPerDir, const LO numNodesInElement, const Array< GO > colGIDs, Teuchos::SerialDenseMatrix< LO, SC > &Pi, Teuchos::SerialDenseMatrix< LO, SC > &Pf, Teuchos::SerialDenseMatrix< LO, SC > &Pe, Array< LO > &dofType, Array< LO > &lDofInd) const
void CollapseStencil(const int type, const int orientation, const int collapseFlags[3], Array< SC > &stencil) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void GetGeometricData(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LO, GO, NO > > &coordinates, const Array< LO > coarseRate, const Array< GO > gFineNodesPerDir, const Array< LO > lFineNodesPerDir, const LO BlkSize, Array< GO > &gIndices, Array< LO > &myOffset, Array< bool > &ghostInterface, Array< LO > &endRate, Array< GO > &gCoarseNodesPerDir, Array< LO > &lCoarseNodesPerDir, Array< LO > &glCoarseNodesPerDir, Array< GO > &ghostGIDs, Array< GO > &coarseNodesGIDs, Array< GO > &colGIDs, GO &gNumCoarseNodes, LO &lNumCoarseNodes, ArrayRCP< Array< typename Teuchos::ScalarTraits< Scalar >::magnitudeType > > coarseNodes, Array< int > &boundaryFlags, RCP< NodesIDs > ghostedCoarseNodes) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static const NoFactory * get()
virtual const Teuchos::ParameterList & GetParameterList() const
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
Namespace for MueLu classes and methods.
@ Runtime0
One-liner description of what is happening.