MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_BlackBoxPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_BLACKBOXPFACTORY_DEF_HPP
47#define MUELU_BLACKBOXPFACTORY_DEF_HPP
48
49#include <stdlib.h>
50#include <iomanip>
51
52
53// #include <Teuchos_LAPACK.hpp>
54#include <Teuchos_SerialDenseMatrix.hpp>
55#include <Teuchos_SerialDenseVector.hpp>
56#include <Teuchos_SerialDenseSolver.hpp>
57
58#include <Xpetra_CrsMatrixUtils.hpp>
59#include <Xpetra_CrsMatrixWrap.hpp>
60#include <Xpetra_ImportFactory.hpp>
61#include <Xpetra_Matrix.hpp>
62#include <Xpetra_MapFactory.hpp>
63#include <Xpetra_MultiVectorFactory.hpp>
64#include <Xpetra_VectorFactory.hpp>
65
66#include <Xpetra_IO.hpp>
67
69
70#include "MueLu_Monitor.hpp"
71
72namespace MueLu {
73
74 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
76 RCP<ParameterList> validParamList = rcp(new ParameterList());
77
78 // Coarsen can come in two forms, either a single char that will be interpreted as an integer
79 // which is used as the coarsening rate in every spatial dimentions,
80 // or it can be a longer string that will then be interpreted as an array of integers.
81 // By default coarsen is set as "{2}", hence a coarsening rate of 2 in every spatial dimension
82 // is the default setting!
83 validParamList->set<std::string > ("Coarsen", "{3}", "Coarsening rate in all spatial dimensions");
84 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
85 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
86 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
87 validParamList->set<RCP<const FactoryBase> >("gNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
88 validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
89 validParamList->set<std::string> ("stencil type", "full", "You can use two type of stencils: full and reduced, that correspond to 27 and 7 points stencils respectively in 3D.");
90 validParamList->set<std::string> ("block strategy", "coupled", "The strategy used to handle systems of PDEs can be: coupled or uncoupled.");
91
92 return validParamList;
93 }
94
95 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
97 Level& /* coarseLevel */)
98 const {
99 Input(fineLevel, "A");
100 Input(fineLevel, "Nullspace");
101 Input(fineLevel, "Coordinates");
102 // Request the global number of nodes per dimensions
103 if(fineLevel.GetLevelID() == 0) {
104 if(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get())) {
105 fineLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this);
106 } else {
107 TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get()),
109 "gNodesPerDim was not provided by the user on level0!");
110 }
111 } else {
112 Input(fineLevel, "gNodesPerDim");
113 }
114
115 // Request the local number of nodes per dimensions
116 if(fineLevel.GetLevelID() == 0) {
117 if(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get())) {
118 fineLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this);
119 } else {
120 TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get()),
122 "lNodesPerDim was not provided by the user on level0!");
123 }
124 } else {
125 Input(fineLevel, "lNodesPerDim");
126 }
127 }
128
129 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
131 Level& coarseLevel) const{
132 return BuildP(fineLevel, coarseLevel);
133
134 }
135
136 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
138 Level& coarseLevel)const{
139 FactoryMonitor m(*this, "Build", coarseLevel);
140
141 // Get parameter list
142 const ParameterList& pL = GetParameterList();
143
144 // obtain general variables
145 RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
146 RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
147 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > coordinates =
149 LO numDimensions = coordinates->getNumVectors();
150 LO BlkSize = A->GetFixedBlockSize();
151
152 // Get fine level geometric data: g(l)FineNodesPerDir and g(l)NumFineNodes
153 Array<GO> gFineNodesPerDir(3);
154 Array<LO> lFineNodesPerDir(3);
155 // Get the number of points in each direction
156 if(fineLevel.GetLevelID() == 0) {
157 gFineNodesPerDir = fineLevel.Get<Array<GO> >("gNodesPerDim", NoFactory::get());
158 lFineNodesPerDir = fineLevel.Get<Array<LO> >("lNodesPerDim", NoFactory::get());
159 } else {
160 // Loading global number of nodes per diretions
161 gFineNodesPerDir = Get<Array<GO> >(fineLevel, "gNodesPerDim");
162
163 // Loading local number of nodes per diretions
164 lFineNodesPerDir = Get<Array<LO> >(fineLevel, "lNodesPerDim");
165 }
166 for(LO i = 0; i < 3; ++i) {
167 if(gFineNodesPerDir[i] == 0) {
168 GetOStream(Runtime0) << "gNodesPerDim in direction " << i << " is set to 1 from 0"
169 << std::endl;
170 gFineNodesPerDir[i] = 1;
171 }
172 if(lFineNodesPerDir[i] == 0) {
173 GetOStream(Runtime0) << "lNodesPerDim in direction " << i << " is set to 1 from 0"
174 << std::endl;
175 lFineNodesPerDir[i] = 1;
176 }
177 }
178 GO gNumFineNodes = gFineNodesPerDir[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
179 LO lNumFineNodes = lFineNodesPerDir[2]*lFineNodesPerDir[1]*lFineNodesPerDir[0];
180
181 // Get the coarsening rate
182 std::string coarsenRate = pL.get<std::string>("Coarsen");
183 Array<LO> coarseRate(3);
184 {
185 Teuchos::Array<LO> crates;
186 try {
187 crates = Teuchos::fromStringToArray<LO>(coarsenRate);
188 } catch(const Teuchos::InvalidArrayStringRepresentation& e) {
189 GetOStream(Errors,-1) << " *** Coarsen must be a string convertible into an array! *** "
190 << std::endl;
191 throw e;
192 }
193 TEUCHOS_TEST_FOR_EXCEPTION((crates.size() > 1) && (crates.size() < numDimensions),
195 "Coarsen must have at least as many components as the number of"
196 " spatial dimensions in the problem.");
197 for(LO i = 0; i < 3; ++i) {
198 if(i < numDimensions) {
199 if(crates.size() == 1) {
200 coarseRate[i] = crates[0];
201 } else if(i < crates.size()) {
202 coarseRate[i] = crates[i];
203 } else {
204 GetOStream(Errors,-1) << " *** Coarsen must be at least as long as the number of"
205 " spatial dimensions! *** " << std::endl;
206 throw Exceptions::RuntimeError(" *** Coarsen must be at least as long as the number of"
207 " spatial dimensions! *** \n");
208 }
209 } else {
210 coarseRate[i] = 1;
211 }
212 }
213 } // End of scope for crates
214
215 // Get the stencil type used for discretization
216 const std::string stencilType = pL.get<std::string>("stencil type");
217 if(stencilType != "full" && stencilType != "reduced") {
218 GetOStream(Errors,-1) << " *** stencil type must be set to: full or reduced, any other value "
219 "is trated as an error! *** " << std::endl;
220 throw Exceptions::RuntimeError(" *** stencil type is neither full, nor reduced! *** \n");
221 }
222
223 // Get the strategy for PDE systems
224 const std::string blockStrategy = pL.get<std::string>("block strategy");
225 if(blockStrategy != "coupled" && blockStrategy != "uncoupled") {
226 GetOStream(Errors,-1) << " *** block strategy must be set to: coupled or uncoupled, any other "
227 "value is trated as an error! *** " << std::endl;
228 throw Exceptions::RuntimeError(" *** block strategy is neither coupled, nor uncoupled! *** \n");
229 }
230
231 GO gNumCoarseNodes = 0;
232 LO lNumCoarseNodes = 0;
233 Array<GO> gIndices(3), gCoarseNodesPerDir(3), ghostGIDs, coarseNodesGIDs, colGIDs;
234 Array<LO> myOffset(3), lCoarseNodesPerDir(3), glCoarseNodesPerDir(3), endRate(3);
235 Array<bool> ghostInterface(6);
236 Array<int> boundaryFlags(3);
237 ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes(numDimensions);
238 Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
239 for(LO dim = 0; dim < numDimensions; ++dim) {fineNodes[dim] = coordinates->getData(dim)();}
240
241 // This struct stores PIDs, LIDs and GIDs on the fine mesh and GIDs on the coarse mesh.
242 RCP<NodesIDs> ghostedCoarseNodes = rcp(new NodesIDs{});
243
244 GetGeometricData(coordinates, coarseRate, gFineNodesPerDir, lFineNodesPerDir, BlkSize,// inputs
245 gIndices, myOffset, ghostInterface, endRate, gCoarseNodesPerDir, // outputs
246 lCoarseNodesPerDir, glCoarseNodesPerDir, ghostGIDs, coarseNodesGIDs, colGIDs,
247 gNumCoarseNodes, lNumCoarseNodes, coarseNodes, boundaryFlags,
248 ghostedCoarseNodes);
249
250 // Create the MultiVector of coarse coordinates
251 Xpetra::UnderlyingLib lib = coordinates->getMap()->lib();
252 RCP<const Map> coarseCoordsMap = MapFactory::Build (lib,
253 gNumCoarseNodes,
254 coarseNodesGIDs.view(0, lNumCoarseNodes),
255 coordinates->getMap()->getIndexBase(),
256 coordinates->getMap()->getComm());
257 Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseCoords(numDimensions);
258 for(LO dim = 0; dim < numDimensions; ++dim) {
259 coarseCoords[dim] = coarseNodes[dim]();
260 }
261 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > coarseCoordinates =
262 Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO>::Build(coarseCoordsMap, coarseCoords(),
263 numDimensions);
264
265 // Now create a new matrix: Aghost that contains all the data
266 // locally needed to compute the local part of the prolongator.
267 // Here assuming that all the coarse nodes o and fine nodes +
268 // are local then all the data associated with the coarse
269 // nodes O and the fine nodes * needs to be imported.
270 //
271 // *--*--*--*--*--*--*--*
272 // | | | | | | | |
273 // o--+--+--o--+--+--O--*
274 // | | | | | | | |
275 // +--+--+--+--+--+--*--*
276 // | | | | | | | |
277 // +--+--+--+--+--+--*--*
278 // | | | | | | | |
279 // o--+--+--o--+--+--O--*
280 // | | | | | | | |
281 // +--+--+--+--+--+--*--*
282 // | | | | | | | |
283 // *--*--*--*--*--*--*--*
284 // | | | | | | | |
285 // O--*--*--O--*--*--O--*
286 //
287 // Creating that local matrix is easy enough using proper range
288 // and domain maps to import data from A. Note that with this
289 // approach we reorder the local entries using the domain map and
290 // can subsequently compute the prolongator without reordering.
291 // As usual we need to be careful about any coarsening rate
292 // change at the boundary!
293
294 // The ingredients needed are an importer, a range map and a domain map
295 Array<GO> ghostRowGIDs, ghostColGIDs, nodeSteps(3);
296 nodeSteps[0] = 1;
297 nodeSteps[1] = gFineNodesPerDir[0];
298 nodeSteps[2] = gFineNodesPerDir[0]*gFineNodesPerDir[1];
299 Array<LO> glFineNodesPerDir(3);
300 GO startingGID = A->getRowMap()->getMinGlobalIndex();
301 for(LO dim = 0; dim < 3; ++dim) {
302 LO numCoarseNodes = 0;
303 if(dim < numDimensions) {
304 startingGID -= myOffset[dim]*nodeSteps[dim];
305 numCoarseNodes = lCoarseNodesPerDir[dim];
306 if(ghostInterface[2*dim]) {++numCoarseNodes;}
307 if(ghostInterface[2*dim+1]) {++numCoarseNodes;}
308 if(gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
309 glFineNodesPerDir[dim] = (numCoarseNodes - 2)*coarseRate[dim] + endRate[dim] + 1;
310 } else {
311 glFineNodesPerDir[dim] = (numCoarseNodes - 1)*coarseRate[dim] + 1;
312 }
313 } else {
314 glFineNodesPerDir[dim] = 1;
315 }
316 }
317 ghostRowGIDs.resize(glFineNodesPerDir[0]*glFineNodesPerDir[1]*glFineNodesPerDir[2]*BlkSize);
318 for(LO k = 0; k < glFineNodesPerDir[2]; ++k) {
319 for(LO j = 0; j < glFineNodesPerDir[1]; ++j) {
320 for(LO i = 0; i < glFineNodesPerDir[0]; ++i) {
321 for(LO l = 0; l < BlkSize; ++l) {
322 ghostRowGIDs[(k*glFineNodesPerDir[1]*glFineNodesPerDir[0]
323 + j*glFineNodesPerDir[0] + i)*BlkSize + l] = startingGID
324 + (k*gFineNodesPerDir[1]*gFineNodesPerDir[0] + j*gFineNodesPerDir[0] + i)*BlkSize + l;
325 }
326 }
327 }
328 }
329
330 // Looking at the above loops it is easy to find startingGID for the ghostColGIDs
331 Array<GO> startingGlobalIndices(numDimensions), dimStride(numDimensions);
332 Array<GO> startingColIndices(numDimensions), finishingColIndices(numDimensions);
333 GO colMinGID = 0;
334 Array<LO> colRange(numDimensions);
335 dimStride[0] = 1;
336 for(int dim = 1; dim < numDimensions; ++dim) {
337 dimStride[dim] = dimStride[dim - 1]*gFineNodesPerDir[dim - 1];
338 }
339 {
340 GO tmp = startingGID;
341 for(int dim = numDimensions; dim > 0; --dim) {
342 startingGlobalIndices[dim - 1] = tmp / dimStride[dim - 1];
343 tmp = tmp % dimStride[dim - 1];
344
345 if(startingGlobalIndices[dim - 1] > 0) {
346 startingColIndices[dim - 1] = startingGlobalIndices[dim - 1] - 1;
347 }
348 if(startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] < gFineNodesPerDir[dim - 1]){
349 finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1]
350 + glFineNodesPerDir[dim - 1];
351 } else {
352 finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1]
353 + glFineNodesPerDir[dim - 1] - 1;
354 }
355 colRange[dim - 1] = finishingColIndices[dim - 1] - startingColIndices[dim - 1] + 1;
356 colMinGID += startingColIndices[dim - 1]*dimStride[dim - 1];
357 }
358 }
359 ghostColGIDs.resize(colRange[0]*colRange[1]*colRange[2]*BlkSize);
360 for(LO k = 0; k < colRange[2]; ++k) {
361 for(LO j = 0; j < colRange[1]; ++j) {
362 for(LO i = 0; i < colRange[0]; ++i) {
363 for(LO l = 0; l < BlkSize; ++l) {
364 ghostColGIDs[(k*colRange[1]*colRange[0] + j*colRange[0] + i)*BlkSize + l] = colMinGID
365 + (k*gFineNodesPerDir[1]*gFineNodesPerDir[0] + j*gFineNodesPerDir[0] + i)*BlkSize + l;
366 }
367 }
368 }
369 }
370
371 RCP<const Map> ghostedRowMap = Xpetra::MapFactory<LO,GO,NO>::Build(A->getRowMap()->lib(),
372 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
373 ghostRowGIDs(),
374 A->getRowMap()->getIndexBase(),
375 A->getRowMap()->getComm());
376 RCP<const Map> ghostedColMap = Xpetra::MapFactory<LO,GO,NO>::Build(A->getRowMap()->lib(),
377 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
378 ghostColGIDs(),
379 A->getRowMap()->getIndexBase(),
380 A->getRowMap()->getComm());
381 RCP<const Import> ghostImporter = Xpetra::ImportFactory<LO,GO,NO>::Build(A->getRowMap(),
382 ghostedRowMap);
383 RCP<const Matrix> Aghost = Xpetra::MatrixFactory<SC,LO,GO,NO>::Build(A, *ghostImporter,
384 ghostedRowMap,
385 ghostedRowMap);
386
387 // Create the maps and data structures for the projection matrix
388 RCP<const Map> rowMapP = A->getDomainMap();
389
390 RCP<const Map> domainMapP;
391
392 RCP<const Map> colMapP;
393
394 // At this point we need to create the column map which is a delicate operation.
395 // The entries in that map need to be ordered as follows:
396 // 1) first owned entries ordered by LID
397 // 2) second order the remaining entries by PID
398 // 3) entries with the same remote PID are ordered by GID.
399 // One piece of good news: lNumCoarseNodes is the number of ownedNodes and lNumGhostNodes
400 // is the number of remote nodes. The sorting can be limited to remote nodes
401 // as the owned ones are alreaded ordered by LID!
402
403 LO lNumGhostedNodes = ghostedCoarseNodes->GIDs.size();
404 {
405 std::vector<NodeID> colMapOrdering(lNumGhostedNodes);
406 for(LO ind = 0; ind < lNumGhostedNodes; ++ind) {
407 colMapOrdering[ind].GID = ghostedCoarseNodes->GIDs[ind];
408 if(ghostedCoarseNodes->PIDs[ind] == rowMapP->getComm()->getRank()) {
409 colMapOrdering[ind].PID = -1;
410 } else {
411 colMapOrdering[ind].PID = ghostedCoarseNodes->PIDs[ind];
412 }
413 colMapOrdering[ind].LID = ghostedCoarseNodes->LIDs[ind];
414 colMapOrdering[ind].lexiInd = ind;
415 }
416 std::sort(colMapOrdering.begin(), colMapOrdering.end(),
417 [](NodeID a, NodeID b)->bool{
418 return ( (a.PID < b.PID) || ((a.PID == b.PID) && (a.GID < b.GID)) );
419 });
420
421 colGIDs.resize(BlkSize*lNumGhostedNodes);
422 for(LO ind = 0; ind < lNumGhostedNodes; ++ind) {
423 // Save the permutation calculated to go from Lexicographic indexing to column map indexing
424 ghostedCoarseNodes->colInds[colMapOrdering[ind].lexiInd] = ind;
425 for(LO dof = 0; dof < BlkSize; ++dof) {
426 colGIDs[BlkSize*ind + dof] = BlkSize*colMapOrdering[ind].GID + dof;
427 }
428 }
429 domainMapP = Xpetra::MapFactory<LO,GO,NO>::Build(rowMapP->lib(),
430 BlkSize*gNumCoarseNodes,
431 colGIDs.view(0,BlkSize*lNumCoarseNodes),
432 rowMapP->getIndexBase(),
433 rowMapP->getComm());
434 colMapP = Xpetra::MapFactory<LO,GO,NO>::Build(rowMapP->lib(),
435 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
436 colGIDs.view(0, colGIDs.size()),
437 rowMapP->getIndexBase(),
438 rowMapP->getComm());
439 } // End of scope for colMapOrdering and colGIDs
440
441 std::vector<size_t> strideInfo(1);
442 strideInfo[0] = BlkSize;
443 RCP<const Map> stridedDomainMapP = Xpetra::StridedMapFactory<LO,GO,NO>::Build(domainMapP,
444 strideInfo);
445
446 GO gnnzP = 0;
447 LO lnnzP = 0;
448 // coarse points have one nnz per row
449 gnnzP += gNumCoarseNodes;
450 lnnzP += lNumCoarseNodes;
451 // add all other points multiplying by 2^numDimensions
452 gnnzP += (gNumFineNodes - gNumCoarseNodes)*std::pow(2, numDimensions);
453 lnnzP += (lNumFineNodes - lNumCoarseNodes)*std::pow(2, numDimensions);
454 // finally multiply by the number of dofs per node
455 gnnzP = gnnzP*BlkSize;
456 lnnzP = lnnzP*BlkSize;
457
458 // Create the matrix itself using the above maps
459 RCP<Matrix> P;
460 P = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0));
461 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
462
463 ArrayRCP<size_t> iaP;
464 ArrayRCP<LO> jaP;
465 ArrayRCP<SC> valP;
466
467 PCrs->allocateAllValues(lnnzP, iaP, jaP, valP);
468
469 ArrayView<size_t> ia = iaP();
470 ArrayView<LO> ja = jaP();
471 ArrayView<SC> val = valP();
472 ia[0] = 0;
473
474
475 LO numCoarseElements = 1;
476 Array<LO> lCoarseElementsPerDir(3);
477 for(LO dim = 0; dim < numDimensions; ++dim) {
478 lCoarseElementsPerDir[dim] = lCoarseNodesPerDir[dim];
479 if(ghostInterface[2*dim]) {++lCoarseElementsPerDir[dim];}
480 if(!ghostInterface[2*dim+1]) {--lCoarseElementsPerDir[dim];}
481 numCoarseElements = numCoarseElements*lCoarseElementsPerDir[dim];
482 }
483
484 for(LO dim = numDimensions; dim < 3; ++dim) {
485 lCoarseElementsPerDir[dim] = 1;
486 }
487
488 // Loop over the coarse elements
489 Array<int> elementFlags(3);
490 Array<LO> elemInds(3), elementNodesPerDir(3), glElementRefTuple(3);
491 Array<LO> glElementRefTupleCG(3), glElementCoarseNodeCG(8);
492 const int numCoarseNodesInElement = std::pow(2, numDimensions);
493 const int nnzPerCoarseNode = (blockStrategy == "coupled") ? BlkSize : 1;
494 const int numRowsPerPoint = BlkSize;
495 for(elemInds[2] = 0; elemInds[2] < lCoarseElementsPerDir[2]; ++elemInds[2]) {
496 for(elemInds[1] = 0; elemInds[1] < lCoarseElementsPerDir[1]; ++elemInds[1]) {
497 for(elemInds[0] = 0; elemInds[0] < lCoarseElementsPerDir[0]; ++elemInds[0]) {
498 elementFlags[0] = 0; elementFlags[1] = 0; elementFlags[2] = 0;
499 for(int dim = 0; dim < 3; ++dim) {
500 // Detect boundary conditions on the element and set corresponding flags.
501 if(elemInds[dim] == 0 && elemInds[dim] == lCoarseElementsPerDir[dim] - 1) {
502 elementFlags[dim] = boundaryFlags[dim];
503 } else if(elemInds[dim] == 0 && (boundaryFlags[dim] == 1 || boundaryFlags[dim] == 3)) {
504 elementFlags[dim] += 1;
505 } else if((elemInds[dim] == lCoarseElementsPerDir[dim] - 1)
506 && (boundaryFlags[dim] == 2 || boundaryFlags[dim] == 3)) {
507 elementFlags[dim] += 2;
508 } else {
509 elementFlags[dim] = 0;
510 }
511
512 // Compute the number of nodes in the current element.
513 if(dim < numDimensions) {
514 if((elemInds[dim] == lCoarseElementsPerDir[dim])
515 && (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim])) {
516 elementNodesPerDir[dim] = endRate[dim] + 1;
517 } else {
518 elementNodesPerDir[dim] = coarseRate[dim] + 1;
519 }
520 } else {
521 elementNodesPerDir[dim] = 1;
522 }
523
524 // Get the lowest tuple of the element using the ghosted local coordinate system
525 glElementRefTuple[dim] = elemInds[dim]*coarseRate[dim];
526 glElementRefTupleCG[dim] = elemInds[dim];
527 }
528
529 // Now get the column map indices corresponding to the dofs associated with the current
530 // element's coarse nodes.
531 for(typename Array<LO>::size_type elem = 0; elem < glElementCoarseNodeCG.size(); ++elem) {
532 glElementCoarseNodeCG[elem]
533 = glElementRefTupleCG[2]*glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0]
534 + glElementRefTupleCG[1]*glCoarseNodesPerDir[0] + glElementRefTupleCG[0];
535 }
536 glElementCoarseNodeCG[4] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
537 glElementCoarseNodeCG[5] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
538 glElementCoarseNodeCG[6] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
539 glElementCoarseNodeCG[7] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
540
541 glElementCoarseNodeCG[2] += glCoarseNodesPerDir[0];
542 glElementCoarseNodeCG[3] += glCoarseNodesPerDir[0];
543 glElementCoarseNodeCG[6] += glCoarseNodesPerDir[0];
544 glElementCoarseNodeCG[7] += glCoarseNodesPerDir[0];
545
546 glElementCoarseNodeCG[1] += 1;
547 glElementCoarseNodeCG[3] += 1;
548 glElementCoarseNodeCG[5] += 1;
549 glElementCoarseNodeCG[7] += 1;
550
551 LO numNodesInElement = elementNodesPerDir[0]*elementNodesPerDir[1]*elementNodesPerDir[2];
552 // LO elementOffset = elemInds[2]*coarseRate[2]*glFineNodesPerDir[1]*glFineNodesPerDir[0]
553 // + elemInds[1]*coarseRate[1]*glFineNodesPerDir[0] + elemInds[0]*coarseRate[0];
554
555 // Compute the element prolongator
556 Teuchos::SerialDenseMatrix<LO,SC> Pi, Pf, Pe;
557 Array<LO> dofType(numNodesInElement*BlkSize), lDofInd(numNodesInElement*BlkSize);
558 ComputeLocalEntries(Aghost, coarseRate, endRate, BlkSize, elemInds, lCoarseElementsPerDir,
559 numDimensions, glFineNodesPerDir, gFineNodesPerDir, gIndices,
560 lCoarseNodesPerDir, ghostInterface, elementFlags, stencilType,
561 blockStrategy, elementNodesPerDir, numNodesInElement, colGIDs,
562 Pi, Pf, Pe, dofType, lDofInd);
563
564 // Find ghosted LID associated with nodes in the element and eventually which of these
565 // nodes are ghosts, this information is used to fill the local prolongator.
566 Array<LO> lNodeLIDs(numNodesInElement);
567 {
568 Array<LO> lNodeTuple(3), nodeInd(3);
569 for(nodeInd[2] = 0; nodeInd[2] < elementNodesPerDir[2]; ++nodeInd[2]) {
570 for(nodeInd[1] = 0; nodeInd[1] < elementNodesPerDir[1]; ++nodeInd[1]) {
571 for(nodeInd[0] = 0; nodeInd[0] < elementNodesPerDir[0]; ++nodeInd[0]) {
572 int stencilLength = 0;
573 if((nodeInd[0] == 0 || nodeInd[0] == elementNodesPerDir[0] - 1) &&
574 (nodeInd[1] == 0 || nodeInd[1] == elementNodesPerDir[1] - 1) &&
575 (nodeInd[2] == 0 || nodeInd[2] == elementNodesPerDir[2] - 1)) {
576 stencilLength = 1;
577 } else {
578 stencilLength = std::pow(2, numDimensions);
579 }
580 LO nodeElementInd = nodeInd[2]*elementNodesPerDir[1]*elementNodesPerDir[1]
581 + nodeInd[1]*elementNodesPerDir[0] + nodeInd[0];
582 for(int dim = 0; dim < 3; ++dim) {
583 lNodeTuple[dim] = glElementRefTuple[dim] - myOffset[dim] + nodeInd[dim];
584 }
585 if(lNodeTuple[0] < 0 || lNodeTuple[1] < 0 || lNodeTuple[2] < 0 ||
586 lNodeTuple[0] > lFineNodesPerDir[0] - 1 ||
587 lNodeTuple[1] > lFineNodesPerDir[1] - 1 ||
588 lNodeTuple[2] > lFineNodesPerDir[2] - 1) {
589 // This flags the ghosts nodes used for prolongator calculation but for which
590 // we do not own the row, hence we won't fill these values on this rank.
591 lNodeLIDs[nodeElementInd] = -1;
592 } else if ((nodeInd[0] == 0 && elemInds[0] !=0) ||
593 (nodeInd[1] == 0 && elemInds[1] !=0) ||
594 (nodeInd[2] == 0 && elemInds[2] !=0)) {
595 // This flags nodes that are owned but common to two coarse elements and that
596 // were already filled by another element, we don't want to fill them twice so
597 // we skip them
598 lNodeLIDs[nodeElementInd] = -2;
599 } else {
600 // The remaining nodes are locally owned and we need to fill the coresponding
601 // rows of the prolongator
602
603 // First we need to find which row needs to be filled
604 lNodeLIDs[nodeElementInd] = lNodeTuple[2]*lFineNodesPerDir[1]*lFineNodesPerDir[0]
605 + lNodeTuple[1]*lFineNodesPerDir[0] + lNodeTuple[0];
606
607 // We also compute the row offset using arithmetic to ensure that we can loop
608 // easily over the nodes in a macro-element as well as facilitate on-node
609 // parallelization. The node serial code could be rewritten with two loops over
610 // the local part of the mesh to avoid the costly integer divisions...
611 Array<LO> refCoarsePointTuple(3);
612 for(int dim = 2; dim > -1; --dim) {
613 if(dim == 0) {
614 refCoarsePointTuple[dim] = (lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim];
615 if(myOffset[dim] == 0) {
616 ++refCoarsePointTuple[dim];
617 }
618 } else {
619 // Note: no need for magnitudeType here, just use double because these things are LO's
620 refCoarsePointTuple[dim] =
621 std::ceil(static_cast<double>(lNodeTuple[dim] + myOffset[dim])
622 / coarseRate[dim]);
623 }
624 if((lNodeTuple[dim] + myOffset[dim]) % coarseRate[dim] > 0) {break;}
625 }
626 const LO numCoarsePoints = refCoarsePointTuple[0]
627 + refCoarsePointTuple[1]*lCoarseNodesPerDir[0]
628 + refCoarsePointTuple[2]*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0];
629 const LO numFinePoints = lNodeLIDs[nodeElementInd] + 1;
630
631 // The below formula computes the rowPtr for row 'n+BlkSize' and we are about to
632 // fill row 'n' to 'n+BlkSize'.
633 size_t rowPtr = (numFinePoints - numCoarsePoints)*numRowsPerPoint
634 *numCoarseNodesInElement*nnzPerCoarseNode + numCoarsePoints*numRowsPerPoint;
635 if(dofType[nodeElementInd*BlkSize] == 0) {
636 // Check if current node is a coarse point
637 rowPtr = rowPtr - numRowsPerPoint;
638 } else {
639 rowPtr = rowPtr - numRowsPerPoint*numCoarseNodesInElement*nnzPerCoarseNode;
640 }
641
642 for(int dof = 0; dof < BlkSize; ++dof) {
643
644 // Now we loop over the stencil and fill the column indices and row values
645 int cornerInd = 0;
646 switch(dofType[nodeElementInd*BlkSize + dof]) {
647 case 0: // Corner node
648 if(nodeInd[2] == elementNodesPerDir[2] - 1) {cornerInd += 4;}
649 if(nodeInd[1] == elementNodesPerDir[1] - 1) {cornerInd += 2;}
650 if(nodeInd[0] == elementNodesPerDir[0] - 1) {cornerInd += 1;}
651 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1] = rowPtr + dof + 1;
652 ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[cornerInd]]*BlkSize + dof;
653 val[rowPtr + dof] = 1.0;
654 break;
655
656 case 1: // Edge node
657 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
658 = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
659 for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
660 if(blockStrategy == "coupled") {
661 for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
662 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
663 + ind1*BlkSize + ind2;
664 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
665 val[lRowPtr] = Pe(lDofInd[nodeElementInd*BlkSize + dof],
666 ind1*BlkSize + ind2);
667 }
668 } else if(blockStrategy == "uncoupled") {
669 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
670 + ind1;
671 ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
672 val[lRowPtr] = Pe(lDofInd[nodeElementInd*BlkSize + dof],
673 ind1*BlkSize + dof);
674 }
675 }
676 break;
677
678 case 2: // Face node
679 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
680 = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
681 for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
682 if(blockStrategy == "coupled") {
683 for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
684 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
685 + ind1*BlkSize + ind2;
686 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
687 val[lRowPtr] = Pf(lDofInd[nodeElementInd*BlkSize + dof],
688 ind1*BlkSize + ind2);
689 }
690 } else if(blockStrategy == "uncoupled") {
691 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
692 + ind1;
693 // ja [lRowPtr] = colGIDs[glElementCoarseNodeCG[ind1]*BlkSize + dof];
694 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
695 val[lRowPtr] = Pf(lDofInd[nodeElementInd*BlkSize + dof],
696 ind1*BlkSize + dof);
697 }
698 }
699 break;
700
701 case 3: // Interior node
702 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
703 = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
704 for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
705 if(blockStrategy == "coupled") {
706 for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
707 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
708 + ind1*BlkSize + ind2;
709 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
710 val[lRowPtr] = Pi(lDofInd[nodeElementInd*BlkSize + dof],
711 ind1*BlkSize + ind2);
712 }
713 } else if(blockStrategy == "uncoupled") {
714 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
715 + ind1;
716 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
717 val[lRowPtr] = Pi(lDofInd[nodeElementInd*BlkSize + dof],
718 ind1*BlkSize + dof);
719 }
720 }
721 break;
722 }
723 }
724 }
725 }
726 }
727 }
728 } // End of scopt for lNodeTuple and nodeInd
729 }
730 }
731 }
732
733 // Sort all row's column indicies and entries by LID
734 Xpetra::CrsMatrixUtils<SC,LO,GO,NO>::sortCrsEntries(ia, ja, val, rowMapP->lib());
735
736 // Set the values of the prolongation operators into the CrsMatrix P and call FillComplete
737 PCrs->setAllValues(iaP, jaP, valP);
738 PCrs->expertStaticFillComplete(domainMapP,rowMapP);
739
740 // set StridingInformation of P
741 if (A->IsView("stridedMaps") == true) {
742 P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), stridedDomainMapP);
743 } else {
744 P->CreateView("stridedMaps", P->getRangeMap(), stridedDomainMapP);
745 }
746
747 // store the transfer operator and node coordinates on coarse level
748 Set(coarseLevel, "P", P);
749 Set(coarseLevel, "coarseCoordinates", coarseCoordinates);
750 Set<Array<GO> >(coarseLevel, "gCoarseNodesPerDim", gCoarseNodesPerDir);
751 Set<Array<LO> >(coarseLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir);
752
753 }
754
755 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
757 GetGeometricData(RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> >& coordinates,
758 const Array<LO> coarseRate, const Array<GO> gFineNodesPerDir,
759 const Array<LO> lFineNodesPerDir, const LO BlkSize, Array<GO>& gIndices,
760 Array<LO>& myOffset, Array<bool>& ghostInterface, Array<LO>& endRate,
761 Array<GO>& gCoarseNodesPerDir, Array<LO>& lCoarseNodesPerDir,
762 Array<LO>& glCoarseNodesPerDir, Array<GO>& ghostGIDs, Array<GO>& coarseNodesGIDs,
763 Array<GO>& colGIDs, GO& gNumCoarseNodes, LO& lNumCoarseNodes,
764 ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes, Array<int>& boundaryFlags,
765 RCP<NodesIDs> ghostedCoarseNodes) const {
766 // This function is extracting the geometric information from the coordinates
767 // and creates the necessary data/formatting to perform locally the calculation
768 // of the pronlongator.
769 //
770 // inputs:
771
772 RCP<const Map> coordinatesMap = coordinates->getMap();
773 LO numDimensions = coordinates->getNumVectors();
774
775 // Using the coarsening rate and the fine level data,
776 // compute coarse level data
777
778 // Phase 1 //
779 // ------------------------------------------------------------------ //
780 // We first start by finding small informations on the mesh such as //
781 // the number of coarse nodes (local and global) and the number of //
782 // ghost nodes / the end rate of coarsening. //
783 // ------------------------------------------------------------------ //
784 GO minGlobalIndex = coordinatesMap->getMinGlobalIndex();
785 {
786 GO tmp;
787 gIndices[2] = minGlobalIndex / (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
788 tmp = minGlobalIndex % (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
789 gIndices[1] = tmp / gFineNodesPerDir[0];
790 gIndices[0] = tmp % gFineNodesPerDir[0];
791
792 myOffset[2] = gIndices[2] % coarseRate[2];
793 myOffset[1] = gIndices[1] % coarseRate[1];
794 myOffset[0] = gIndices[0] % coarseRate[0];
795 }
796
797 for(int dim = 0; dim < 3; ++dim) {
798 if(gIndices[dim] == 0) {
799 boundaryFlags[dim] += 1;
800 }
801 if(gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
802 boundaryFlags[dim] += 2;
803 }
804 }
805
806 // Check whether ghost nodes are needed in each direction
807 for(LO i=0; i < numDimensions; ++i) {
808 if((gIndices[i] != 0) && (gIndices[i] % coarseRate[i] > 0)) {
809 ghostInterface[2*i] = true;
810 }
811 if(((gIndices[i] + lFineNodesPerDir[i]) != gFineNodesPerDir[i])
812 && ((gIndices[i] + lFineNodesPerDir[i] - 1) % coarseRate[i] > 0)) {
813 ghostInterface[2*i + 1] = true;
814 }
815 }
816
817 for(LO i = 0; i < 3; ++i) {
818 if(i < numDimensions) {
819 lCoarseNodesPerDir[i] = (lFineNodesPerDir[i] + myOffset[i] - 1) / coarseRate[i];
820 if(myOffset[i] == 0) { ++lCoarseNodesPerDir[i]; }
821 gCoarseNodesPerDir[i] = (gFineNodesPerDir[i] - 1) / coarseRate[i];
822 endRate[i] = (gFineNodesPerDir[i] - 1) % coarseRate[i];
823 if(endRate[i] == 0) {
824 ++gCoarseNodesPerDir[i];
825 endRate[i] = coarseRate[i];
826 }
827 } else {
828 // Most quantities need to be set to 1 for extra dimensions
829 // this is rather logical, an x-y plane is like a single layer
830 // of nodes in the z direction...
831 gCoarseNodesPerDir[i] = 1;
832 lCoarseNodesPerDir[i] = 1;
833 endRate[i] = 1;
834 }
835 }
836
837 gNumCoarseNodes = gCoarseNodesPerDir[0]*gCoarseNodesPerDir[1]*gCoarseNodesPerDir[2];
838 lNumCoarseNodes = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[2];
839
840 // For each direction, determine how many ghost points are required.
841 LO lNumGhostNodes = 0;
842 Array<GO> startGhostedCoarseNode(3);
843 {
844 const int complementaryIndices[3][2] = {{1,2}, {0,2}, {0,1}};
845 LO tmp = 0;
846 for(LO i = 0; i < 3; ++i) {
847 // The first branch of this if-statement will be used if the rank contains only one layer
848 // of nodes in direction i, that layer must also coincide with the boundary of the mesh
849 // and coarseRate[i] == endRate[i]...
850 if((gIndices[i] == gFineNodesPerDir[i] - 1) && (gIndices[i] % coarseRate[i] == 0)) {
851 startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i] - 1;
852 } else {
853 startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i];
854 }
855 // First we load the number of locally owned coarse nodes
856 glCoarseNodesPerDir[i] = lCoarseNodesPerDir[i];
857
858 // Check whether a face in direction i needs ghost nodes
859 if(ghostInterface[2*i] || ghostInterface[2*i+1]) {
860 ++glCoarseNodesPerDir[i];
861 if(i == 0) {tmp = lCoarseNodesPerDir[1]*lCoarseNodesPerDir[2];}
862 if(i == 1) {tmp = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[2];}
863 if(i == 2) {tmp = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1];}
864 }
865 // If both faces in direction i need nodes, double the number of ghost nodes
866 if(ghostInterface[2*i] && ghostInterface[2*i+1]) {
867 ++glCoarseNodesPerDir[i];
868 tmp = 2*tmp;
869 }
870 lNumGhostNodes += tmp;
871
872 // The corners and edges need to be checked in 2D / 3D to add more ghosts nodes
873 for(LO j = 0; j < 2; ++j) {
874 for(LO k = 0; k < 2; ++k) {
875 // Check if two adjoining faces need ghost nodes and then add their common edge
876 if(ghostInterface[2*complementaryIndices[i][0]+j]
877 && ghostInterface[2*complementaryIndices[i][1]+k]) {
878 lNumGhostNodes += lCoarseNodesPerDir[i];
879 // Add corners if three adjoining faces need ghost nodes,
880 // but add them only once! Hence when i == 0.
881 if(ghostInterface[2*i] && (i == 0)) { lNumGhostNodes += 1; }
882 if(ghostInterface[2*i+1] && (i == 0)) { lNumGhostNodes += 1; }
883 }
884 }
885 }
886 tmp = 0;
887 }
888 } // end of scope for tmp and complementaryIndices
889
890 const LO lNumGhostedNodes = glCoarseNodesPerDir[0]*glCoarseNodesPerDir[1]
891 *glCoarseNodesPerDir[2];
892 ghostedCoarseNodes->PIDs.resize(lNumGhostedNodes);
893 ghostedCoarseNodes->LIDs.resize(lNumGhostedNodes);
894 ghostedCoarseNodes->GIDs.resize(lNumGhostedNodes);
895 ghostedCoarseNodes->coarseGIDs.resize(lNumGhostedNodes);
896 ghostedCoarseNodes->colInds.resize(lNumGhostedNodes);
897
898 // We loop over all ghosted coarse nodes by increasing global lexicographic order
899 Array<LO> coarseNodeCoarseIndices(3), coarseNodeFineIndices(3), ijk(3);
900 LO currentIndex = -1;
901 for(ijk[2] = 0; ijk[2] < glCoarseNodesPerDir[2]; ++ijk[2]) {
902 for(ijk[1] = 0; ijk[1] < glCoarseNodesPerDir[1]; ++ijk[1]) {
903 for(ijk[0] = 0; ijk[0] < glCoarseNodesPerDir[0]; ++ijk[0]) {
904 currentIndex = ijk[2]*glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0]
905 + ijk[1]*glCoarseNodesPerDir[0] + ijk[0];
906 coarseNodeCoarseIndices[0] = startGhostedCoarseNode[0] + ijk[0];
907 coarseNodeFineIndices[0] = coarseNodeCoarseIndices[0]*coarseRate[0];
908 if(coarseNodeFineIndices[0] > gFineNodesPerDir[0] - 1) {
909 coarseNodeFineIndices[0] = gFineNodesPerDir[0] - 1;
910 }
911 coarseNodeCoarseIndices[1] = startGhostedCoarseNode[1] + ijk[1];
912 coarseNodeFineIndices[1] = coarseNodeCoarseIndices[1]*coarseRate[1];
913 if(coarseNodeFineIndices[1] > gFineNodesPerDir[1] - 1) {
914 coarseNodeFineIndices[1] = gFineNodesPerDir[1] - 1;
915 }
916 coarseNodeCoarseIndices[2] = startGhostedCoarseNode[2] + ijk[2];
917 coarseNodeFineIndices[2] = coarseNodeCoarseIndices[2]*coarseRate[2];
918 if(coarseNodeFineIndices[2] > gFineNodesPerDir[2] - 1) {
919 coarseNodeFineIndices[2] = gFineNodesPerDir[2] - 1;
920 }
921 GO myGID = 0, myCoarseGID = -1;
922 GO factor[3] = {};
923 factor[2] = gFineNodesPerDir[1]*gFineNodesPerDir[0];
924 factor[1] = gFineNodesPerDir[0];
925 factor[0] = 1;
926 for(int dim = 0; dim < 3; ++dim) {
927 if(dim < numDimensions) {
928 if(gIndices[dim] - myOffset[dim] + ijk[dim]*coarseRate[dim]
929 < gFineNodesPerDir[dim] - 1) {
930 myGID += (gIndices[dim] - myOffset[dim]
931 + ijk[dim]*coarseRate[dim])*factor[dim];
932 } else {
933 myGID += (gIndices[dim] - myOffset[dim]
934 + (ijk[dim] - 1)*coarseRate[dim] + endRate[dim])*factor[dim];
935 }
936 }
937 }
938 myCoarseGID = coarseNodeCoarseIndices[0]
939 + coarseNodeCoarseIndices[1]*gCoarseNodesPerDir[0]
940 + coarseNodeCoarseIndices[2]*gCoarseNodesPerDir[1]*gCoarseNodesPerDir[0];
941 ghostedCoarseNodes->GIDs[currentIndex] = myGID;
942 ghostedCoarseNodes->coarseGIDs[currentIndex] = myCoarseGID;
943 }
944 }
945 }
946 coordinatesMap->getRemoteIndexList(ghostedCoarseNodes->GIDs(),
947 ghostedCoarseNodes->PIDs(),
948 ghostedCoarseNodes->LIDs());
949
950 // Phase 2 //
951 // ------------------------------------------------------------------ //
952 // Build a list of GIDs to import the required ghost nodes. //
953 // The ordering of the ghosts nodes will be as natural as possible, //
954 // i.e. it should follow the GID ordering of the mesh. //
955 // ------------------------------------------------------------------ //
956
957 // Saddly we have to more or less redo what was just done to figure out the value of
958 // lNumGhostNodes, there might be some optimization possibility here...
959 ghostGIDs.resize(lNumGhostNodes);
960 LO countGhosts = 0;
961 // Get the GID of the first point on the current partition.
962 GO startingGID = minGlobalIndex;
963 Array<GO> startingIndices(3);
964 // We still want ghost nodes even if have with a 0 offset,
965 // except when we are on a boundary
966 if(ghostInterface[4] && (myOffset[2] == 0)) {
967 if(gIndices[2] + coarseRate[2] > gFineNodesPerDir[2]) {
968 startingGID -= endRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
969 } else {
970 startingGID -= coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
971 }
972 } else {
973 startingGID -= myOffset[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
974 }
975 if(ghostInterface[2] && (myOffset[1] == 0)) {
976 if(gIndices[1] + coarseRate[1] > gFineNodesPerDir[1]) {
977 startingGID -= endRate[1]*gFineNodesPerDir[0];
978 } else {
979 startingGID -= coarseRate[1]*gFineNodesPerDir[0];
980 }
981 } else {
982 startingGID -= myOffset[1]*gFineNodesPerDir[0];
983 }
984 if(ghostInterface[0] && (myOffset[0] == 0)) {
985 if(gIndices[0] + coarseRate[0] > gFineNodesPerDir[0]) {
986 startingGID -= endRate[0];
987 } else {
988 startingGID -= coarseRate[0];
989 }
990 } else {
991 startingGID -= myOffset[0];
992 }
993
994 { // scope for tmp
995 GO tmp;
996 startingIndices[2] = startingGID / (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
997 tmp = startingGID % (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
998 startingIndices[1] = tmp / gFineNodesPerDir[0];
999 startingIndices[0] = tmp % gFineNodesPerDir[0];
1000 }
1001
1002 GO ghostOffset[3] = {0, 0, 0};
1003 LO lengthZero = lCoarseNodesPerDir[0];
1004 LO lengthOne = lCoarseNodesPerDir[1];
1005 LO lengthTwo = lCoarseNodesPerDir[2];
1006 if(ghostInterface[0]) {++lengthZero;}
1007 if(ghostInterface[1]) {++lengthZero;}
1008 if(ghostInterface[2]) {++lengthOne;}
1009 if(ghostInterface[3]) {++lengthOne;}
1010 if(ghostInterface[4]) {++lengthTwo;}
1011 if(ghostInterface[5]) {++lengthTwo;}
1012
1013 // First check the bottom face as it will have the lowest GIDs
1014 if(ghostInterface[4]) {
1015 ghostOffset[2] = startingGID;
1016 for(LO j = 0; j < lengthOne; ++j) {
1017 if( (j == lengthOne-1)
1018 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1019 ghostOffset[1] = ((j-1)*coarseRate[1] + endRate[1])*gFineNodesPerDir[0];
1020 } else {
1021 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1022 }
1023 for(LO k = 0; k < lengthZero; ++k) {
1024 if( (k == lengthZero-1)
1025 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1026 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1027 } else {
1028 ghostOffset[0] = k*coarseRate[0];
1029 }
1030 // If the partition includes a changed rate at the edge, ghost nodes need to be picked
1031 // carefully.
1032 // This if statement is repeated each time a ghost node is selected.
1033 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1034 ++countGhosts;
1035 }
1036 }
1037 }
1038
1039 // Sweep over the lCoarseNodesPerDir[2] coarse layers in direction 2 and gather necessary ghost
1040 // nodes located on these layers.
1041 for(LO i = 0; i < lengthTwo; ++i) {
1042 // Exclude the cases where ghost nodes exists on the faces in directions 2, these faces are
1043 // swept seperatly for efficiency.
1044 if( !((i == lengthTwo-1) && ghostInterface[5]) && !((i == 0) && ghostInterface[4]) ) {
1045 // Set the ghostOffset in direction 2 taking into account a possible endRate different
1046 // from the regular coarseRate.
1047 if( (i == lengthTwo-1) && !ghostInterface[5] ) {
1048 ghostOffset[2] = startingGID + ((i-1)*coarseRate[2] + endRate[2])
1049 *gFineNodesPerDir[1]*gFineNodesPerDir[0];
1050 } else {
1051 ghostOffset[2] = startingGID + i*coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1052 }
1053 for(LO j = 0; j < lengthOne; ++j) {
1054 if( (j == 0) && ghostInterface[2] ) {
1055 for(LO k = 0; k < lengthZero; ++k) {
1056 if( (k == lengthZero-1)
1057 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1058 if(k == 0) {
1059 ghostOffset[0] = endRate[0];
1060 } else {
1061 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1062 }
1063 } else {
1064 ghostOffset[0] = k*coarseRate[0];
1065 }
1066 // In this case j == 0 so ghostOffset[1] is zero and can be ignored
1067 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[0];
1068 ++countGhosts;
1069 }
1070 } else if( (j == lengthOne-1) && ghostInterface[3] ) {
1071 // Set the ghostOffset in direction 1 taking into account a possible endRate different
1072 // from the regular coarseRate.
1073 if( (j == lengthOne-1)
1074 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1075 ghostOffset[1] = ((j-1)*coarseRate[1] + endRate[1])*gFineNodesPerDir[0];
1076 } else {
1077 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1078 }
1079 for(LO k = 0; k < lengthZero; ++k) {
1080 if( (k == lengthZero-1)
1081 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1082 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1083 } else {
1084 ghostOffset[0] = k*coarseRate[0];
1085 }
1086 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1087 ++countGhosts;
1088 }
1089 } else {
1090 // Set ghostOffset[1] for side faces sweep
1091 if( (j == lengthOne-1)
1092 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1093 ghostOffset[1] = ( (j-1)*coarseRate[1] + endRate[1] )*gFineNodesPerDir[0];
1094 } else {
1095 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1096 }
1097
1098 // Set ghostOffset[0], ghostGIDs and countGhosts
1099 if(ghostInterface[0]) { // In that case ghostOffset[0]==0, so we can ignore it
1100 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1];
1101 ++countGhosts;
1102 }
1103 if(ghostInterface[1]) { // Grab ghost point at the end of direction 0.
1104 if( (startingIndices[0] + (lengthZero-1)*coarseRate[0]) > gFineNodesPerDir[0] - 1 ) {
1105 if(lengthZero > 2) {
1106 ghostOffset[0] = (lengthZero-2)*coarseRate[0] + endRate[0];
1107 } else {
1108 ghostOffset[0] = endRate[0];
1109 }
1110 } else {
1111 ghostOffset[0] = (lengthZero-1)*coarseRate[0];
1112 }
1113 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1114 ++countGhosts;
1115 }
1116 }
1117 }
1118 }
1119 }
1120
1121 // Finally check the top face
1122 if(ghostInterface[5]) {
1123 if( startingIndices[2] + (lengthTwo-1)*coarseRate[2] + 1 > gFineNodesPerDir[2] ) {
1124 ghostOffset[2] = startingGID
1125 + ((lengthTwo-2)*coarseRate[2] + endRate[2])*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1126 } else {
1127 ghostOffset[2] = startingGID
1128 + (lengthTwo-1)*coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1129 }
1130 for(LO j = 0; j < lengthOne; ++j) {
1131 if( (j == lengthOne-1)
1132 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1133 ghostOffset[1] = ( (j-1)*coarseRate[1] + endRate[1] )*gFineNodesPerDir[0];
1134 } else {
1135 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1136 }
1137 for(LO k = 0; k < lengthZero; ++k) {
1138 if( (k == lengthZero-1)
1139 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1140 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1141 } else {
1142 ghostOffset[0] = k*coarseRate[0];
1143 }
1144 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1145 ++countGhosts;
1146 }
1147 }
1148 }
1149
1150 // Phase 3 //
1151 // ------------------------------------------------------------------ //
1152 // Final phase of this function, lists are being built to create the //
1153 // column and domain maps of the projection as well as the map and //
1154 // arrays for the coarse coordinates multivector. //
1155 // ------------------------------------------------------------------ //
1156
1157 Array<GO> gCoarseNodesGIDs(lNumCoarseNodes);
1158 LO currentNode, offset2, offset1, offset0;
1159 // Find the GIDs of the coarse nodes on the partition.
1160 for(LO ind2 = 0; ind2 < lCoarseNodesPerDir[2]; ++ind2) {
1161 if(myOffset[2] == 0) {
1162 offset2 = startingIndices[2] + myOffset[2];
1163 } else {
1164 if(startingIndices[2] + endRate[2] == gFineNodesPerDir[2] - 1) {
1165 offset2 = startingIndices[2] + endRate[2];
1166 } else {
1167 offset2 = startingIndices[2] + coarseRate[2];
1168 }
1169 }
1170 if(offset2 + ind2*coarseRate[2] > gFineNodesPerDir[2] - 1) {
1171 offset2 += (ind2 - 1)*coarseRate[2] + endRate[2];
1172 } else {
1173 offset2 += ind2*coarseRate[2];
1174 }
1175 offset2 = offset2*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1176
1177 for(LO ind1 = 0; ind1 < lCoarseNodesPerDir[1]; ++ind1) {
1178 if(myOffset[1] == 0) {
1179 offset1 = startingIndices[1] + myOffset[1];
1180 } else {
1181 if(startingIndices[1] + endRate[1] == gFineNodesPerDir[1] - 1) {
1182 offset1 = startingIndices[1] + endRate[1];
1183 } else {
1184 offset1 = startingIndices[1] + coarseRate[1];
1185 }
1186 }
1187 if(offset1 + ind1*coarseRate[1] > gFineNodesPerDir[1] - 1) {
1188 offset1 += (ind1 - 1)*coarseRate[1] + endRate[1];
1189 } else {
1190 offset1 += ind1*coarseRate[1];
1191 }
1192 offset1 = offset1*gFineNodesPerDir[0];
1193 for(LO ind0 = 0; ind0 < lCoarseNodesPerDir[0]; ++ind0) {
1194 offset0 = startingIndices[0];
1195 if(myOffset[0] == 0) {
1196 offset0 += ind0*coarseRate[0];
1197 } else {
1198 offset0 += (ind0 + 1)*coarseRate[0];
1199 }
1200 if(offset0 > gFineNodesPerDir[0] - 1) {offset0 += endRate[0] - coarseRate[0];}
1201
1202 currentNode = ind2*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]
1203 + ind1*lCoarseNodesPerDir[0]
1204 + ind0;
1205 gCoarseNodesGIDs[currentNode] = offset2 + offset1 + offset0;
1206 }
1207 }
1208 }
1209
1210 // Actual loop over all the coarse/ghost nodes to find their index on the coarse mesh
1211 // and the corresponding dofs that will need to be added to colMapP.
1212 colGIDs.resize(BlkSize*(lNumCoarseNodes+lNumGhostNodes));
1213 coarseNodesGIDs.resize(lNumCoarseNodes);
1214 for(LO i = 0; i < numDimensions; ++i) {coarseNodes[i].resize(lNumCoarseNodes);}
1215 GO fineNodesPerCoarseSlab = coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1216 GO fineNodesEndCoarseSlab = endRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1217 GO fineNodesPerCoarsePlane = coarseRate[1]*gFineNodesPerDir[0];
1218 GO fineNodesEndCoarsePlane = endRate[1]*gFineNodesPerDir[0];
1219 GO coarseNodesPerCoarseLayer = gCoarseNodesPerDir[1]*gCoarseNodesPerDir[0];
1220 GO gCoarseNodeOnCoarseGridGID;
1221 LO gInd[3], lCol;
1222 Array<int> ghostPIDs (lNumGhostNodes);
1223 Array<LO> ghostLIDs (lNumGhostNodes);
1224 Array<LO> ghostPermut(lNumGhostNodes);
1225 for(LO k = 0; k < lNumGhostNodes; ++k) {ghostPermut[k] = k;}
1226 coordinatesMap->getRemoteIndexList(ghostGIDs, ghostPIDs, ghostLIDs);
1227 sh_sort_permute(ghostPIDs.begin(),ghostPIDs.end(), ghostPermut.begin(),ghostPermut.end());
1228
1229 { // scope for tmpInds, tmpVars and tmp.
1230 GO tmpInds[3], tmpVars[2];
1231 LO tmp;
1232 // Loop over the coarse nodes of the partition and add them to colGIDs
1233 // that will be used to construct the column and domain maps of P as well
1234 // as to construct the coarse coordinates map.
1235 // for(LO col = 0; col < lNumCoarseNodes; ++col) { // This should most likely be replaced
1236 // by loops of lCoarseNodesPerDir[] to simplify arithmetics
1237 LO col = 0;
1238 LO firstCoarseNodeInds[3], currentCoarseNode;
1239 for(LO dim = 0; dim < 3; ++dim) {
1240 if(myOffset[dim] == 0) {
1241 firstCoarseNodeInds[dim] = 0;
1242 } else {
1243 firstCoarseNodeInds[dim] = coarseRate[dim] - myOffset[dim];
1244 }
1245 }
1246 Array<ArrayRCP<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
1247 for(LO dim = 0; dim < numDimensions; ++dim) {fineNodes[dim] = coordinates->getData(dim);}
1248 for(LO k = 0; k < lCoarseNodesPerDir[2]; ++k) {
1249 for(LO j = 0; j < lCoarseNodesPerDir[1]; ++j) {
1250 for(LO i = 0; i < lCoarseNodesPerDir[0]; ++i) {
1251 col = k*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0] + j*lCoarseNodesPerDir[0] + i;
1252
1253 // Check for endRate
1254 currentCoarseNode = 0;
1255 if(firstCoarseNodeInds[0] + i*coarseRate[0] > lFineNodesPerDir[0] - 1) {
1256 currentCoarseNode += firstCoarseNodeInds[0] + (i-1)*coarseRate[0] + endRate[0];
1257 } else {
1258 currentCoarseNode += firstCoarseNodeInds[0] + i*coarseRate[0];
1259 }
1260 if(firstCoarseNodeInds[1] + j*coarseRate[1] > lFineNodesPerDir[1] - 1) {
1261 currentCoarseNode += (firstCoarseNodeInds[1] + (j-1)*coarseRate[1] + endRate[1])
1262 *lFineNodesPerDir[0];
1263 } else {
1264 currentCoarseNode += (firstCoarseNodeInds[1] + j*coarseRate[1])*lFineNodesPerDir[0];
1265 }
1266 if(firstCoarseNodeInds[2] + k*coarseRate[2] > lFineNodesPerDir[2] - 1) {
1267 currentCoarseNode += (firstCoarseNodeInds[2] + (k-1)*coarseRate[2] + endRate[2])
1268 *lFineNodesPerDir[1]*lFineNodesPerDir[0];
1269 } else {
1270 currentCoarseNode += (firstCoarseNodeInds[2] + k*coarseRate[2])
1271 *lFineNodesPerDir[1]*lFineNodesPerDir[0];
1272 }
1273 // Load coordinates
1274 for(LO dim = 0; dim < numDimensions; ++dim) {
1275 coarseNodes[dim][col] = fineNodes[dim][currentCoarseNode];
1276 }
1277
1278 if((endRate[2] != coarseRate[2])
1279 && (gCoarseNodesGIDs[col] > (gCoarseNodesPerDir[2] - 2)*fineNodesPerCoarseSlab
1280 + fineNodesEndCoarseSlab - 1)) {
1281 tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab + 1;
1282 tmpVars[0] = gCoarseNodesGIDs[col] - (tmpInds[2] - 1)*fineNodesPerCoarseSlab
1283 - fineNodesEndCoarseSlab;
1284 } else {
1285 tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab;
1286 tmpVars[0] = gCoarseNodesGIDs[col] % fineNodesPerCoarseSlab;
1287 }
1288 if((endRate[1] != coarseRate[1])
1289 && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2)*fineNodesPerCoarsePlane
1290 + fineNodesEndCoarsePlane - 1)) {
1291 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1292 tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1)*fineNodesPerCoarsePlane
1293 - fineNodesEndCoarsePlane;
1294 } else {
1295 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1296 tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1297 }
1298 if(tmpVars[1] == gFineNodesPerDir[0] - 1) {
1299 tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1300 } else {
1301 tmpInds[0] = tmpVars[1] / coarseRate[0];
1302 }
1303 gInd[2] = col / (lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]);
1304 tmp = col % (lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]);
1305 gInd[1] = tmp / lCoarseNodesPerDir[0];
1306 gInd[0] = tmp % lCoarseNodesPerDir[0];
1307 lCol = gInd[2]*(lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0])
1308 + gInd[1]*lCoarseNodesPerDir[0] + gInd[0];
1309 gCoarseNodeOnCoarseGridGID = tmpInds[2]*coarseNodesPerCoarseLayer
1310 + tmpInds[1]*gCoarseNodesPerDir[0] + tmpInds[0];
1311 coarseNodesGIDs[lCol] = gCoarseNodeOnCoarseGridGID;
1312 for(LO dof = 0; dof < BlkSize; ++dof) {
1313 colGIDs[BlkSize*lCol + dof] = BlkSize*gCoarseNodeOnCoarseGridGID + dof;
1314 }
1315 }
1316 }
1317 }
1318 // Now loop over the ghost nodes of the partition to add them to colGIDs
1319 // since they will need to be included in the column map of P
1320 for(col = lNumCoarseNodes; col < lNumCoarseNodes + lNumGhostNodes; ++col) {
1321 if((endRate[2] != coarseRate[2])
1322 && (ghostGIDs[ghostPermut[col - lNumCoarseNodes]] > (gCoarseNodesPerDir[2] - 2)
1323 *fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1324 tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab + 1;
1325 tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]]
1326 - (tmpInds[2] - 1)*fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1327 } else {
1328 tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab;
1329 tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] % fineNodesPerCoarseSlab;
1330 }
1331 if((endRate[1] != coarseRate[1])
1332 && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2)*fineNodesPerCoarsePlane
1333 + fineNodesEndCoarsePlane - 1)) {
1334 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1335 tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1)*fineNodesPerCoarsePlane
1336 - fineNodesEndCoarsePlane;
1337 } else {
1338 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1339 tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1340 }
1341 if(tmpVars[1] == gFineNodesPerDir[0] - 1) {
1342 tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1343 } else {
1344 tmpInds[0] = tmpVars[1] / coarseRate[0];
1345 }
1346 gCoarseNodeOnCoarseGridGID = tmpInds[2]*coarseNodesPerCoarseLayer
1347 + tmpInds[1]*gCoarseNodesPerDir[0] + tmpInds[0];
1348 for(LO dof = 0; dof < BlkSize; ++dof) {
1349 colGIDs[BlkSize*col + dof] = BlkSize*gCoarseNodeOnCoarseGridGID + dof;
1350 }
1351 }
1352 } // End of scope for tmpInds, tmpVars and tmp
1353
1354 } // GetGeometricData()
1355
1356 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1358 ComputeLocalEntries(const RCP<const Matrix>& Aghost, const Array<LO> coarseRate,
1359 const Array<LO> /* endRate */, const LO BlkSize, const Array<LO> elemInds,
1360 const Array<LO> /* lCoarseElementsPerDir */, const LO numDimensions,
1361 const Array<LO> lFineNodesPerDir, const Array<GO> /* gFineNodesPerDir */,
1362 const Array<GO> /* gIndices */, const Array<LO> /* lCoarseNodesPerDir */,
1363 const Array<bool> ghostInterface, const Array<int> elementFlags,
1364 const std::string stencilType, const std::string /* blockStrategy */,
1365 const Array<LO> elementNodesPerDir, const LO numNodesInElement,
1366 const Array<GO> /* colGIDs */,
1367 Teuchos::SerialDenseMatrix<LO,SC>& Pi, Teuchos::SerialDenseMatrix<LO,SC>& Pf,
1368 Teuchos::SerialDenseMatrix<LO,SC>& Pe, Array<LO>& dofType,
1369 Array<LO>& lDofInd) const {
1370
1371 // First extract data from Aghost and move it to the corresponding dense matrix
1372 // This step requires to compute the number of nodes (resp dofs) in the current
1373 // coarse element, then allocate storage for local dense matrices, finally loop
1374 // over the dofs and extract the corresponding row in Aghost before dispactching
1375 // its data to the dense matrices.
1376 // At the same time we want to compute a mapping from the element indexing to
1377 // group indexing. The groups are the following: corner, edge, face and interior
1378 // nodes. We uses these groups to operate on the dense matrices but need to
1379 // store the nodes in their original order after groupd operations are completed.
1380 LO countInterior=0, countFace=0, countEdge=0, countCorner =0;
1381 Array<LO> collapseDir(numNodesInElement*BlkSize);
1382 for(LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1383 for(LO je = 0; je < elementNodesPerDir[1]; ++je) {
1384 for(LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1385 // Check for corner node
1386 if((ke == 0 || ke == elementNodesPerDir[2]-1)
1387 && (je == 0 || je == elementNodesPerDir[1]-1)
1388 && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1389 for(LO dof = 0; dof < BlkSize; ++dof) {
1390 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1391 + je*elementNodesPerDir[0] + ie) + dof] = 0;
1392 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1393 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countCorner + dof;
1394 }
1395 ++countCorner;
1396
1397 // Check for edge node
1398 } else if (((ke == 0 || ke == elementNodesPerDir[2]-1)
1399 && (je == 0 || je == elementNodesPerDir[1]-1))
1400 || ((ke == 0 || ke == elementNodesPerDir[2]-1)
1401 && (ie == 0 || ie == elementNodesPerDir[0]-1))
1402 || ((je == 0 || je == elementNodesPerDir[1]-1)
1403 && (ie == 0 || ie == elementNodesPerDir[0]-1))) {
1404 for(LO dof = 0; dof < BlkSize; ++dof) {
1405 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1406 + je*elementNodesPerDir[0] + ie) + dof] = 1;
1407 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1408 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countEdge + dof;
1409 if((ke == 0 || ke == elementNodesPerDir[2]-1)
1410 && (je == 0 || je == elementNodesPerDir[1]-1)) {
1411 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1412 + je*elementNodesPerDir[0] + ie) + dof] = 0;
1413 } else if((ke == 0 || ke == elementNodesPerDir[2]-1)
1414 && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1415 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1416 + je*elementNodesPerDir[0] + ie) + dof] = 1;
1417 } else if((je == 0 || je == elementNodesPerDir[1]-1
1418 ) && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1419 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1420 + je*elementNodesPerDir[0] + ie) + dof] = 2;
1421 }
1422 }
1423 ++countEdge;
1424
1425 // Check for face node
1426 } else if ((ke == 0 || ke == elementNodesPerDir[2]-1)
1427 || (je == 0 || je == elementNodesPerDir[1]-1)
1428 || (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1429 for(LO dof = 0; dof < BlkSize; ++dof) {
1430 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1431 + je*elementNodesPerDir[0] + ie) + dof] = 2;
1432 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1433 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countFace + dof;
1434 if(ke == 0 || ke == elementNodesPerDir[2]-1) {
1435 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1436 + je*elementNodesPerDir[0] + ie) + dof] = 2;
1437 } else if(je == 0 || je == elementNodesPerDir[1]-1) {
1438 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1439 + je*elementNodesPerDir[0] + ie) + dof] = 1;
1440 } else if(ie == 0 || ie == elementNodesPerDir[0]-1) {
1441 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1442 + je*elementNodesPerDir[0] + ie) + dof] = 0;
1443 }
1444 }
1445 ++countFace;
1446
1447 // Otherwise it is an interior node
1448 } else {
1449 for(LO dof = 0; dof < BlkSize; ++dof) {
1450 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1451 + je*elementNodesPerDir[0] + ie) + dof] = 3;
1452 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1453 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countInterior +dof;
1454 }
1455 ++countInterior;
1456 }
1457 }
1458 }
1459 }
1460
1461 LO numInteriorNodes = 0, numFaceNodes = 0, numEdgeNodes = 0, numCornerNodes = 8;
1462 numInteriorNodes = (elementNodesPerDir[0] - 2)*(elementNodesPerDir[1] - 2)
1463 *(elementNodesPerDir[2] - 2);
1464 numFaceNodes = 2*(elementNodesPerDir[0] - 2)*(elementNodesPerDir[1] - 2)
1465 + 2*(elementNodesPerDir[0] - 2)*(elementNodesPerDir[2] - 2)
1466 + 2*(elementNodesPerDir[1] - 2)*(elementNodesPerDir[2] - 2);
1467 numEdgeNodes = 4*(elementNodesPerDir[0] - 2) + 4*(elementNodesPerDir[1] - 2)
1468 + 4*(elementNodesPerDir[2] - 2);
1469 // Diagonal blocks
1470 Teuchos::SerialDenseMatrix<LO,SC> Aii(BlkSize*numInteriorNodes, BlkSize*numInteriorNodes);
1471 Teuchos::SerialDenseMatrix<LO,SC> Aff(BlkSize*numFaceNodes, BlkSize*numFaceNodes);
1472 Teuchos::SerialDenseMatrix<LO,SC> Aee(BlkSize*numEdgeNodes, BlkSize*numEdgeNodes);
1473 // Upper triangular blocks
1474 Teuchos::SerialDenseMatrix<LO,SC> Aif(BlkSize*numInteriorNodes, BlkSize*numFaceNodes);
1475 Teuchos::SerialDenseMatrix<LO,SC> Aie(BlkSize*numInteriorNodes, BlkSize*numEdgeNodes);
1476 Teuchos::SerialDenseMatrix<LO,SC> Afe(BlkSize*numFaceNodes, BlkSize*numEdgeNodes);
1477 // Coarse nodes "right hand sides" and local prolongators
1478 Teuchos::SerialDenseMatrix<LO,SC> Aic(BlkSize*numInteriorNodes, BlkSize*numCornerNodes);
1479 Teuchos::SerialDenseMatrix<LO,SC> Afc(BlkSize*numFaceNodes, BlkSize*numCornerNodes);
1480 Teuchos::SerialDenseMatrix<LO,SC> Aec(BlkSize*numEdgeNodes, BlkSize*numCornerNodes);
1481 Pi.shape( BlkSize*numInteriorNodes, BlkSize*numCornerNodes);
1482 Pf.shape( BlkSize*numFaceNodes, BlkSize*numCornerNodes);
1483 Pe.shape( BlkSize*numEdgeNodes, BlkSize*numCornerNodes);
1484
1485 ArrayView<const LO> rowIndices;
1486 ArrayView<const SC> rowValues;
1487 LO idof, iInd, jInd;
1488 int iType = 0, jType = 0;
1489 int orientation = -1;
1490 int collapseFlags[3] = {};
1491 Array<SC> stencil((std::pow(3,numDimensions))*BlkSize);
1492 // LBV Note: we could skip the extraction of rows corresponding to coarse nodes
1493 // we might want to fuse the first three loops and do integer arithmetic
1494 // to have more optimization during compilation...
1495 for(LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1496 for(LO je = 0; je < elementNodesPerDir[1]; ++je) {
1497 for(LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1498 collapseFlags[0] = 0; collapseFlags[1] = 0; collapseFlags[2] = 0;
1499 if((elementFlags[0] == 1 || elementFlags[0] == 3) && ie == 0) {
1500 collapseFlags[0] += 1;
1501 }
1502 if((elementFlags[0] == 2 || elementFlags[0] == 3) && ie == elementNodesPerDir[0] - 1) {
1503 collapseFlags[0] += 2;
1504 }
1505 if((elementFlags[1] == 1 || elementFlags[1] == 3) && je == 0) {
1506 collapseFlags[1] += 1;
1507 }
1508 if((elementFlags[1] == 2 || elementFlags[1] == 3) && je == elementNodesPerDir[1] - 1) {
1509 collapseFlags[1] += 2;
1510 }
1511 if((elementFlags[2] == 1 || elementFlags[2] == 3) && ke == 0) {
1512 collapseFlags[2] += 1;
1513 }
1514 if((elementFlags[2] == 2 || elementFlags[2] == 3) && ke == elementNodesPerDir[2] - 1) {
1515 collapseFlags[2] += 2;
1516 }
1517
1518 // Based on (ie, je, ke) we detect the type of node we are dealing with
1519 GetNodeInfo(ie, je, ke, elementNodesPerDir, &iType, iInd, &orientation);
1520 for(LO dof0 = 0; dof0 < BlkSize; ++dof0) {
1521 // Compute the dof index for each dof at point (ie, je, ke)
1522 idof = ((elemInds[2]*coarseRate[2] + ke)*lFineNodesPerDir[1]*lFineNodesPerDir[0]
1523 + (elemInds[1]*coarseRate[1] + je)*lFineNodesPerDir[0]
1524 + elemInds[0]*coarseRate[0] + ie)*BlkSize + dof0;
1525 Aghost->getLocalRowView(idof, rowIndices, rowValues);
1526 FormatStencil(BlkSize, ghostInterface, ie, je, ke, rowValues,
1527 elementNodesPerDir, collapseFlags, stencilType, stencil);
1528 LO io, jo, ko;
1529 if(iType == 3) {// interior node, no stencil collapse needed
1530 for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1531 // Find (if, jf, kf) the indices associated with the interacting node
1532 ko = ke + (interactingNode / 9 - 1);
1533 {
1534 LO tmp = interactingNode % 9;
1535 jo = je + (tmp / 3 - 1);
1536 io = ie + (tmp % 3 - 1);
1537 int dummy;
1538 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1539 }
1540 if((io > -1 && io < elementNodesPerDir[0]) &&
1541 (jo > -1 && jo < elementNodesPerDir[1]) &&
1542 (ko > -1 && ko < elementNodesPerDir[2])) {
1543 for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1544 if (jType == 3) {
1545 Aii(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1546 stencil[interactingNode*BlkSize + dof1];
1547 } else if(jType == 2) {
1548 Aif(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1549 stencil[interactingNode*BlkSize + dof1];
1550 } else if(jType == 1) {
1551 Aie(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1552 stencil[interactingNode*BlkSize + dof1];
1553 } else if(jType == 0) {
1554 Aic(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1555 -stencil[interactingNode*BlkSize + dof1];
1556 }
1557 }
1558 }
1559 }
1560 } else if(iType == 2) {// Face node, collapse stencil along face normal (*orientation)
1561 CollapseStencil(2, orientation, collapseFlags, stencil);
1562 for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1563 // Find (if, jf, kf) the indices associated with the interacting node
1564 ko = ke + (interactingNode / 9 - 1);
1565 {
1566 LO tmp = interactingNode % 9;
1567 jo = je + (tmp / 3 - 1);
1568 io = ie + (tmp % 3 - 1);
1569 int dummy;
1570 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1571 }
1572 if((io > -1 && io < elementNodesPerDir[0]) &&
1573 (jo > -1 && jo < elementNodesPerDir[1]) &&
1574 (ko > -1 && ko < elementNodesPerDir[2])) {
1575 for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1576 if(jType == 2) {
1577 Aff(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1578 stencil[interactingNode*BlkSize + dof1];
1579 } else if(jType == 1) {
1580 Afe(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1581 stencil[interactingNode*BlkSize + dof1];
1582 } else if(jType == 0) {
1583 Afc(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1584 -stencil[interactingNode*BlkSize + dof1];
1585 }
1586 }
1587 }
1588 }
1589 } else if(iType == 1) {// Edge node, collapse stencil perpendicular to edge direction
1590 CollapseStencil(1, orientation, collapseFlags, stencil);
1591 for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1592 // Find (if, jf, kf) the indices associated with the interacting node
1593 ko = ke + (interactingNode / 9 - 1);
1594 {
1595 LO tmp = interactingNode % 9;
1596 jo = je + (tmp / 3 - 1);
1597 io = ie + (tmp % 3 - 1);
1598 int dummy;
1599 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1600 }
1601 if((io > -1 && io < elementNodesPerDir[0]) &&
1602 (jo > -1 && jo < elementNodesPerDir[1]) &&
1603 (ko > -1 && ko < elementNodesPerDir[2])) {
1604 for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1605 if(jType == 1) {
1606 Aee(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1607 stencil[interactingNode*BlkSize + dof1];
1608 } else if(jType == 0) {
1609 Aec(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1610 -stencil[interactingNode*BlkSize + dof1];
1611 }
1612 } // Check if interacting node is in element or not
1613 } // Pc is the identity so no need to treat iType == 0
1614 } // Loop over interacting nodes within a row
1615 } // Check for current node type
1616 } // Loop over degrees of freedom associated to a given node
1617 } // Loop over i
1618 } // Loop over j
1619 } // Loop over k
1620
1621 // Compute the projection operators for edge and interior nodes
1622 //
1623 // [P_i] = [A_{ii} & A_{if} & A_{ie}]^{-1} [A_{ic}]
1624 // [P_f] = [ 0 & A_{ff} & A_{fe}] [A_{fc}]
1625 // [P_e] = [ 0 & 0 & A_{ee}] [A_{ec}]
1626 // [P_c] = I_c
1627 //
1628 { // Compute P_e
1629 // We need to solve for P_e in: A_{ee}*P_e = A_{fc}
1630 // So we simple do P_e = A_{ee}^{-1)*A_{ec}
1631 Teuchos::SerialDenseSolver<LO,SC> problem;
1632 problem.setMatrix(Teuchos::rcp(&Aee, false));
1633 problem.setVectors(Teuchos::rcp(&Pe, false), Teuchos::rcp(&Aec, false));
1634 problem.factorWithEquilibration(true);
1635 problem.solve();
1636 problem.unequilibrateLHS();
1637 }
1638
1639 { // Compute P_f
1640 // We need to solve for P_f in: A_{ff}*P_f + A_{fe}*P_e = A_{fc}
1641 // Step one: A_{fc} = 1.0*A_{fc} + (-1.0)*A_{fe}*P_e
1642 Afc.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Afe, Pe, 1.0);
1643 // Step two: P_f = A_{ff}^{-1}*A_{fc}
1644 Teuchos::SerialDenseSolver<LO,SC> problem;
1645 problem.setMatrix(Teuchos::rcp(&Aff, false));
1646 problem.setVectors(Teuchos::rcp(&Pf, false), Teuchos::rcp(&Afc, false));
1647 problem.factorWithEquilibration(true);
1648 problem.solve();
1649 problem.unequilibrateLHS();
1650 }
1651
1652 { // Compute P_i
1653 // We need to solve for P_i in: A_{ii}*P_i + A_{if}*P_f + A_{ie}*P_e = A_{ic}
1654 // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{ie}*P_e
1655 Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aie, Pe, 1.0);
1656 // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{if}*P_f
1657 Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aif, Pf, 1.0);
1658 // Step two: P_i = A_{ii}^{-1}*A_{ic}
1659 Teuchos::SerialDenseSolver<LO,SC> problem;
1660 problem.setMatrix(Teuchos::rcp(&Aii, false));
1661 problem.setVectors(Teuchos::rcp(&Pi, false), Teuchos::rcp(&Aic, false));
1662 problem.factorWithEquilibration(true);
1663 problem.solve();
1664 problem.unequilibrateLHS();
1665 }
1666
1667 } // ComputeLocalEntries()
1668
1669 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1671 CollapseStencil(const int type, const int orientation, const int /* collapseFlags */[3],
1672 Array<SC>& stencil) const {
1673
1674 if(type == 2) {// Face stencil collapse
1675 // Let's hope things vectorize well inside this if statement
1676 if(orientation == 0) {
1677 for(LO i = 0; i < 9; ++i) {
1678 stencil[3*i + 1] = stencil[3*i] + stencil[3*i + 1] + stencil[3*i + 2];
1679 stencil[3*i] = 0;
1680 stencil[3*i + 2] = 0;
1681 }
1682 } else if(orientation == 1) {
1683 for(LO i = 0; i < 3; ++i) {
1684 stencil[9*i + 3] = stencil[9*i + 0] + stencil[9*i + 3] + stencil[9*i + 6];
1685 stencil[9*i + 0] = 0;
1686 stencil[9*i + 6] = 0;
1687 stencil[9*i + 4] = stencil[9*i + 1] + stencil[9*i + 4] + stencil[9*i + 7];
1688 stencil[9*i + 1] = 0;
1689 stencil[9*i + 7] = 0;
1690 stencil[9*i + 5] = stencil[9*i + 2] + stencil[9*i + 5] + stencil[9*i + 8];
1691 stencil[9*i + 2] = 0;
1692 stencil[9*i + 8] = 0;
1693 }
1694 } else if(orientation == 2) {
1695 for(LO i = 0; i < 9; ++i) {
1696 stencil[i + 9] = stencil[i] + stencil[i + 9] + stencil[i + 18];
1697 stencil[i] = 0;
1698 stencil[i + 18] = 0;
1699 }
1700 }
1701 } else if(type == 1) {// Edge stencil collapse
1702 SC tmp1 = 0, tmp2 = 0, tmp3 = 0;
1703 if(orientation == 0) {
1704 for(LO i = 0; i < 9; ++i) {
1705 tmp1 += stencil[0 + 3*i];
1706 stencil[0 + 3*i] = 0;
1707 tmp2 += stencil[1 + 3*i];
1708 stencil[1 + 3*i] = 0;
1709 tmp3 += stencil[2 + 3*i];
1710 stencil[2 + 3*i] = 0;
1711 }
1712 stencil[12] = tmp1;
1713 stencil[13] = tmp2;
1714 stencil[14] = tmp3;
1715 } else if(orientation == 1) {
1716 for(LO i = 0; i < 3; ++i) {
1717 tmp1 += stencil[0 + i] + stencil[9 + i] + stencil[18 + i];
1718 stencil[ 0 + i] = 0;
1719 stencil[ 9 + i] = 0;
1720 stencil[18 + i] = 0;
1721 tmp2 += stencil[3 + i] + stencil[12 + i] + stencil[21 + i];
1722 stencil[ 3 + i] = 0;
1723 stencil[12 + i] = 0;
1724 stencil[21 + i] = 0;
1725 tmp3 += stencil[6 + i] + stencil[15 + i] + stencil[24 + i];
1726 stencil[ 6 + i] = 0;
1727 stencil[15 + i] = 0;
1728 stencil[24 + i] = 0;
1729 }
1730 stencil[10] = tmp1;
1731 stencil[13] = tmp2;
1732 stencil[16] = tmp3;
1733 } else if(orientation == 2) {
1734 for(LO i = 0; i < 9; ++i) {
1735 tmp1 += stencil[i];
1736 stencil[i] = 0;
1737 tmp2 += stencil[i + 9];
1738 stencil[i + 9] = 0;
1739 tmp3 += stencil[i + 18];
1740 stencil[i + 18] = 0;
1741 }
1742 stencil[ 4] = tmp1;
1743 stencil[13] = tmp2;
1744 stencil[22] = tmp3;
1745 }
1746 }
1747 } // CollapseStencil
1748
1749 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1751 FormatStencil(const LO BlkSize, const Array<bool> /* ghostInterface */, const LO /* ie */, const LO /* je */,
1752 const LO /* ke */, const ArrayView<const SC> rowValues,const Array<LO> /* elementNodesPerDir */,
1753 const int collapseFlags[3], const std::string stencilType, Array<SC>& stencil)
1754 const {
1755
1756 if(stencilType == "reduced") {
1757 Array<int> fullStencilInds(7);
1758 fullStencilInds[0] = 4; fullStencilInds[1] = 10; fullStencilInds[2] = 12;
1759 fullStencilInds[3] = 13; fullStencilInds[4] = 14; fullStencilInds[5] = 16;
1760 fullStencilInds[6] = 22;
1761
1762 // Create a mask array to automate mesh boundary processing
1763 Array<int> mask(7);
1764 int stencilSize = rowValues.size();
1765 if(collapseFlags[0] + collapseFlags[1] + collapseFlags[2] > 0) {
1766 if(stencilSize == 1) {
1767 // The assumption is made that if only one non-zero exists in the row
1768 // then this represent a Dirichlet BC being enforced.
1769 // One might want to zero out the corresponding row in the prolongator.
1770 mask[0] = 1; mask[1] = 1; mask[2] = 1;
1771 mask[4] = 1; mask[5] = 1; mask[6] = 1;
1772 } else {
1773 // Here we are looking at Neumann like BC where only a flux is prescribed
1774 // and less non-zeros will be present in the corresponding row.
1775 if(collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1776 mask[0] = 1;
1777 }
1778 if(collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1779 mask[6] = 1;
1780 }
1781 if(collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1782 mask[1] = 1;
1783 }
1784 if(collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1785 mask[5] = 1;
1786 }
1787 if(collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1788 mask[2] = 1;
1789 }
1790 if(collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1791 mask[4] = 1;
1792 }
1793 }
1794 }
1795
1796 int offset = 0;
1797 for(int ind = 0; ind < 7; ++ind) {
1798 if(mask[ind] == 1) {
1799 for(int dof = 0; dof < BlkSize; ++dof) {
1800 stencil[BlkSize*fullStencilInds[ind] + dof] = 0.0;
1801 }
1802 ++offset; // The offset is used to stay within rowValues bounds
1803 } else {
1804 for(int dof = 0; dof < BlkSize; ++dof) {
1805 stencil[BlkSize*fullStencilInds[ind] + dof] = rowValues[BlkSize*(ind - offset) + dof];
1806 }
1807 }
1808 }
1809 } else if(stencilType == "full") {
1810 // Create a mask array to automate mesh boundary processing
1811 Array<int> mask(27);
1812 if(collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1813 for(int ind = 0; ind < 9; ++ind) {
1814 mask[ind] = 1;
1815 }
1816 }
1817 if(collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1818 for(int ind = 0; ind < 9; ++ind) {
1819 mask[18 + ind] = 1;
1820 }
1821 }
1822 if(collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1823 for(int ind1 = 0; ind1 < 3; ++ind1) {
1824 for(int ind2 = 0; ind2 < 3; ++ind2) {
1825 mask[ind1*9 + ind2] = 1;
1826 }
1827 }
1828 }
1829 if(collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1830 for(int ind1 = 0; ind1 < 3; ++ind1) {
1831 for(int ind2 = 0; ind2 < 3; ++ind2) {
1832 mask[ind1*9 + ind2 + 6] = 1;
1833 }
1834 }
1835 }
1836 if(collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1837 for(int ind = 0; ind < 9; ++ind) {
1838 mask[3*ind] = 1;
1839 }
1840 }
1841 if(collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1842 for(int ind = 0; ind < 9; ++ind) {
1843 mask[3*ind + 2] = 1;
1844 }
1845 }
1846
1847 // Now the stencil is extracted and formated
1848 int offset = 0;
1849 for(LO ind = 0; ind < 27; ++ind) {
1850 if(mask[ind] == 0) {
1851 for(int dof = 0; dof < BlkSize; ++dof) {
1852 stencil[BlkSize*ind + dof] = 0.0;
1853 }
1854 ++offset; // The offset is used to stay within rowValues bounds
1855 } else {
1856 for(int dof = 0; dof < BlkSize; ++dof) {
1857 stencil[BlkSize*ind + dof] = rowValues[BlkSize*(ind - offset) + dof];
1858 }
1859 }
1860 }
1861 } // stencilTpye
1862 } // FormatStencil()
1863
1864 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1866 const LO ie, const LO je, const LO ke,
1867 const Array<LO> elementNodesPerDir,
1868 int* type, LO& ind, int* orientation) const {
1869
1870 // Initialize flags with -1 to be able to catch issues easily
1871 *type = -1, ind = 0, *orientation = -1;
1872 if((ke == 0 || ke == elementNodesPerDir[2]-1)
1873 && (je == 0 || je == elementNodesPerDir[1]-1)
1874 && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1875 // Corner node
1876 *type = 0;
1877 ind = 4*ke / (elementNodesPerDir[2]-1) + 2*je / (elementNodesPerDir[1]-1)
1878 + ie / (elementNodesPerDir[0]-1);
1879 } else if(((ke == 0 || ke == elementNodesPerDir[2]-1)
1880 && (je == 0 || je == elementNodesPerDir[1]-1))
1881 || ((ke == 0 || ke == elementNodesPerDir[2]-1)
1882 && (ie == 0 || ie == elementNodesPerDir[0]-1))
1883 || ((je == 0 || je == elementNodesPerDir[1]-1)
1884 && (ie == 0 || ie == elementNodesPerDir[0]-1))) {
1885 // Edge node
1886 *type = 1;
1887 if(ke > 0) {ind += 2*(elementNodesPerDir[0] - 2 + elementNodesPerDir[1] - 2);}
1888 if(ke == elementNodesPerDir[2] - 1) {ind += 4*(elementNodesPerDir[2] - 2);}
1889 if((ke == 0) || (ke == elementNodesPerDir[2] - 1)) { // je or ie edges
1890 if(je == 0) {// jlo ie edge
1891 *orientation = 0;
1892 ind += ie - 1;
1893 } else if(je == elementNodesPerDir[1] - 1) {// jhi ie edge
1894 *orientation = 0;
1895 ind += 2*(elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1896 } else {// ilo or ihi je edge
1897 *orientation = 1;
1898 ind += elementNodesPerDir[0] - 2 + 2*(je - 1) + ie / (elementNodesPerDir[0] - 1);
1899 }
1900 } else {// ke edges
1901 *orientation = 2;
1902 ind += 4*(ke - 1) + 2*(je/(elementNodesPerDir[1] - 1)) + ie / (elementNodesPerDir[0] - 1);
1903 }
1904 } else if ((ke == 0 || ke == elementNodesPerDir[2]-1)
1905 || (je == 0 || je == elementNodesPerDir[1]-1)
1906 || (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1907 // Face node
1908 *type = 2;
1909 if(ke == 0) {// current node is on "bottom" face
1910 *orientation = 2;
1911 ind = (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1912 } else {
1913 // add nodes from "bottom" face
1914 ind += (elementNodesPerDir[1] - 2)*(elementNodesPerDir[0] - 2);
1915 // add nodes from side faces
1916 ind += 2*(ke - 1)*(elementNodesPerDir[1] - 2 + elementNodesPerDir[0] - 2);
1917 if(ke == elementNodesPerDir[2]-1) {// current node is on "top" face
1918 *orientation = 2;
1919 ind += (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1920 } else {// current node is on a side face
1921 if(je == 0) {
1922 *orientation = 1;
1923 ind += ie - 1;
1924 } else if(je == elementNodesPerDir[1] - 1) {
1925 *orientation = 1;
1926 ind += 2*(elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1927 } else {
1928 *orientation = 0;
1929 ind += elementNodesPerDir[0] - 2 + 2*(je - 1) + ie / (elementNodesPerDir[0]-1);
1930 }
1931 }
1932 }
1933 } else {
1934 // Interior node
1935 *type = 3;
1936 ind = (ke - 1)*(elementNodesPerDir[1] - 2)*(elementNodesPerDir[0] - 2)
1937 + (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1938 }
1939 } // GetNodeInfo()
1940
1941 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1943 const typename Teuchos::Array<LocalOrdinal>::iterator& first1,
1944 const typename Teuchos::Array<LocalOrdinal>::iterator& last1,
1945 const typename Teuchos::Array<LocalOrdinal>::iterator& first2,
1946 const typename Teuchos::Array<LocalOrdinal>::iterator& /* last2 */) const
1947 {
1948 typedef typename std::iterator_traits<typename Teuchos::Array<LocalOrdinal>::iterator>::difference_type DT;
1949 DT n = last1 - first1;
1950 DT m = n / 2;
1951 DT z = Teuchos::OrdinalTraits<DT>::zero();
1952 while (m > z)
1953 {
1954 DT max = n - m;
1955 for (DT j = 0; j < max; j++)
1956 {
1957 for (DT k = j; k >= 0; k-=m)
1958 {
1959 if (first1[first2[k+m]] >= first1[first2[k]])
1960 break;
1961 std::swap(first2[k+m], first2[k]);
1962 }
1963 }
1964 m = m/2;
1965 }
1966 }
1967
1968} //namespace MueLu
1969
1970#define MUELU_BLACKBOXPFACTORY_SHORT
1971#endif // MUELU_BLACKBOXPFACTORY_DEF_HPP
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void FormatStencil(const LO BlkSize, const Array< bool > ghostInterface, const LO ie, const LO je, const LO ke, const ArrayView< const SC > rowValues, const Array< LO > elementNodesPerDir, const int collapseFlags[3], const std::string stencilType, Array< SC > &stencil) const
void GetNodeInfo(const LO ie, const LO je, const LO ke, const Array< LO > elementNodesPerDir, int *type, LO &ind, int *orientation) const
void sh_sort_permute(const typename Teuchos::Array< LocalOrdinal >::iterator &first1, const typename Teuchos::Array< LocalOrdinal >::iterator &last1, const typename Teuchos::Array< LocalOrdinal >::iterator &first2, const typename Teuchos::Array< LocalOrdinal >::iterator &last2) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void ComputeLocalEntries(const RCP< const Matrix > &Aghost, const Array< LO > coarseRate, const Array< LO > endRate, const LO BlkSize, const Array< LO > elemInds, const Array< LO > lCoarseElementsPerDir, const LO numDimensions, const Array< LO > lFineNodesPerDir, const Array< GO > gFineNodesPerDir, const Array< GO > gIndices, const Array< LO > lCoarseNodesPerDir, const Array< bool > ghostInterface, const Array< int > elementFlags, const std::string stencilType, const std::string blockStrategy, const Array< LO > elementNodesPerDir, const LO numNodesInElement, const Array< GO > colGIDs, Teuchos::SerialDenseMatrix< LO, SC > &Pi, Teuchos::SerialDenseMatrix< LO, SC > &Pf, Teuchos::SerialDenseMatrix< LO, SC > &Pe, Array< LO > &dofType, Array< LO > &lDofInd) const
void CollapseStencil(const int type, const int orientation, const int collapseFlags[3], Array< SC > &stencil) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void GetGeometricData(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LO, GO, NO > > &coordinates, const Array< LO > coarseRate, const Array< GO > gFineNodesPerDir, const Array< LO > lFineNodesPerDir, const LO BlkSize, Array< GO > &gIndices, Array< LO > &myOffset, Array< bool > &ghostInterface, Array< LO > &endRate, Array< GO > &gCoarseNodesPerDir, Array< LO > &lCoarseNodesPerDir, Array< LO > &glCoarseNodesPerDir, Array< GO > &ghostGIDs, Array< GO > &coarseNodesGIDs, Array< GO > &colGIDs, GO &gNumCoarseNodes, LO &lNumCoarseNodes, ArrayRCP< Array< typename Teuchos::ScalarTraits< Scalar >::magnitudeType > > coarseNodes, Array< int > &boundaryFlags, RCP< NodesIDs > ghostedCoarseNodes) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput().
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static const NoFactory * get()
virtual const Teuchos::ParameterList & GetParameterList() const
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
Namespace for MueLu classes and methods.
@ Runtime0
One-liner description of what is happening.