MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_RegionRFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
47#define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
48
49#include "Kokkos_UnorderedMap.hpp"
50
52
53#include <Xpetra_Matrix.hpp>
54#include <Xpetra_CrsGraphFactory.hpp>
55
56#include "MueLu_Types.hpp"
57
58
59
60namespace MueLu {
61
62 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
65 RCP<ParameterList> validParamList = rcp(new ParameterList());
66
67 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null,
68 "Generating factory of the matrix A");
69 validParamList->set<RCP<const FactoryBase> >("numDimensions", Teuchos::null,
70 "Number of spatial dimensions in the problem.");
71 validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null,
72 "Number of local nodes per spatial dimension on the fine grid.");
73 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null,
74 "Fine level nullspace used to construct the coarse level nullspace.");
75 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null,
76 "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
77 validParamList->set<bool> ("keep coarse coords", false, "Flag to keep coordinates for special coarse grid solve");
78
79 return validParamList;
80 } // GetValidParameterList()
81
82 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
84 DeclareInput(Level& fineLevel, Level& /* coarseLevel */) const {
85
86 Input(fineLevel, "A");
87 Input(fineLevel, "numDimensions");
88 Input(fineLevel, "lNodesPerDim");
89 Input(fineLevel, "Nullspace");
90 Input(fineLevel, "Coordinates");
91
92 } // DeclareInput()
93
94 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
96 Build(Level& fineLevel, Level& coarseLevel) const {
97
98 // Set debug outputs based on environment variable
99 RCP<Teuchos::FancyOStream> out;
100 if(const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
101 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
102 out->setShowAllFrontMatter(false).setShowProcRank(true);
103 } else {
104 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
105 }
106
107 *out << "Starting RegionRFactory_kokkos::Build." << std::endl;
108
109 // First get the inputs from the fineLevel
110 const int numDimensions = Get<int>(fineLevel, "numDimensions");
111 Array<LO> lFineNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
112 {
113 Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel, "lNodesPerDim");
114 for(int dim = 0; dim < numDimensions; ++dim) {
115 lFineNodesPerDim[dim] = lNodesPerDim[dim];
116 }
117 }
118 *out << "numDimensions " << numDimensions << " and lFineNodesPerDim: " << lFineNodesPerDim
119 << std::endl;
120
121 // Let us check that the inputs verify our assumptions
122 if(numDimensions < 1 || numDimensions > 3) {
123 throw std::runtime_error("numDimensions must be 1, 2 or 3!");
124 }
125 for(int dim = 0; dim < numDimensions; ++dim) {
126 if(lFineNodesPerDim[dim] % 3 != 1) {
127 throw std::runtime_error("The number of fine node in each direction need to be 3n+1");
128 }
129 }
130 Array<LO> lCoarseNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
131
132 const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
133
134 RCP<realvaluedmultivector_type> fineCoordinates, coarseCoordinates;
135 fineCoordinates = Get< RCP<realvaluedmultivector_type> >(fineLevel, "Coordinates");
136 if(static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
137 throw std::runtime_error("The number of vectors in the coordinates is not equal to numDimensions!");
138 }
139
140 // Let us create R and pass it down to the
141 // appropriate specialization and see what we
142 // get back!
143 RCP<Matrix> R;
144
145 if(numDimensions == 1) {
146 throw std::runtime_error("RegionRFactory_kokkos no implemented for 1D case yet.");
147 } else if(numDimensions == 2) {
148 throw std::runtime_error("RegionRFactory_kokkos no implemented for 2D case yet.");
149 } else if(numDimensions == 3) {
150 Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
151 R, coarseCoordinates, lCoarseNodesPerDim);
152 }
153
154 const Teuchos::ParameterList& pL = GetParameterList();
155
156 // Reuse pattern if available (multiple solve)
157 RCP<ParameterList> Tparams;
158 if(pL.isSublist("matrixmatrix: kernel params"))
159 Tparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
160 else
161 Tparams= rcp(new ParameterList);
162
163 // R->describe(*out, Teuchos::VERB_EXTREME);
164 *out << "Compute P=R^t" << std::endl;
165 // By default, we don't need global constants for transpose
166 Tparams->set("compute global constants: temporaries",Tparams->get("compute global constants: temporaries", false));
167 Tparams->set("compute global constants", Tparams->get("compute global constants",false));
168 std::string label = "MueLu::RegionR-transR" + Teuchos::toString(coarseLevel.GetLevelID());
169 RCP<Matrix> P = Utilities::Transpose(*R, true, label, Tparams);
170
171 *out << "Compute coarse nullspace" << std::endl;
172 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
173 RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
174 fineNullspace->getNumVectors());
175 R->apply(*fineNullspace, *coarseNullspace, Teuchos::NO_TRANS, Teuchos::ScalarTraits<SC>::one(),
176 Teuchos::ScalarTraits<SC>::zero());
177
178 *out << "Set data on coarse level" << std::endl;
179 Set(coarseLevel, "numDimensions", numDimensions);
180 Set(coarseLevel, "lNodesPerDim", lCoarseNodesPerDim);
181 Set(coarseLevel, "Nullspace", coarseNullspace);
182 Set(coarseLevel, "Coordinates", coarseCoordinates);
183 if(pL.get<bool>("keep coarse coords")) {
184 coarseLevel.Set<RCP<realvaluedmultivector_type> >("Coordinates2", coarseCoordinates, NoFactory::get());
185 }
186
187 R->SetFixedBlockSize(A->GetFixedBlockSize());
188 P->SetFixedBlockSize(A->GetFixedBlockSize());
189
190 Set(coarseLevel, "R", R);
191 Set(coarseLevel, "P", P);
192
193 } // Build()
194
195 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
197 Build3D(const int numDimensions,
198 Teuchos::Array<LocalOrdinal>& lFineNodesPerDim,
199 const RCP<Matrix>& A,
200 const RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& fineCoordinates,
201 RCP<Matrix>& R,
202 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& coarseCoordinates,
203 Teuchos::Array<LocalOrdinal>& lCoarseNodesPerDim) const {
204 using local_matrix_type = typename CrsMatrix::local_matrix_type;
205 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
206 using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
207 using entries_type = typename local_matrix_type::index_type::non_const_type;
208 using values_type = typename local_matrix_type::values_type::non_const_type;
209 using impl_scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
210
211 // Set debug outputs based on environment variable
212 RCP<Teuchos::FancyOStream> out;
213 if(const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
214 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
215 out->setShowAllFrontMatter(false).setShowProcRank(true);
216 } else {
217 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
218 }
219
220 // Now compute number of coarse grid points
221 for(int dim = 0; dim < numDimensions; ++dim) {
222 lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
223 }
224 *out << "lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
225
226 // Grab the block size here and multiply all existing offsets by it
227 const LO blkSize = A->GetFixedBlockSize();
228 *out << "blkSize " << blkSize << std::endl;
229
230 // Based on lCoarseNodesPerDim and lFineNodesPerDim
231 // we can compute numRows, numCols and NNZ for R
232 const LO numRows = blkSize*lCoarseNodesPerDim[0]*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[2];
233 const LO numCols = blkSize*lFineNodesPerDim[0]*lFineNodesPerDim[1]*lFineNodesPerDim[2];
234
235 // Create the coarse coordinates multivector
236 // so we can fill it on the fly while computing
237 // the restriction operator
238 RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
239 Teuchos::OrdinalTraits<GO>::invalid(),
240 numRows,
241 A->getRowMap()->getIndexBase(),
242 A->getRowMap()->getComm());
243
244 RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
245 Teuchos::OrdinalTraits<GO>::invalid(),
246 lCoarseNodesPerDim[0]*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[2],
247 A->getRowMap()->getIndexBase(),
248 A->getRowMap()->getComm());
249
250 coarseCoordinates = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(coordRowMap,
251 numDimensions);
252
253 // Get device views of coordinates
254 auto fineCoordsView = fineCoordinates ->getDeviceLocalView(Xpetra::Access::ReadOnly);
255 auto coarseCoordsView = coarseCoordinates->getDeviceLocalView(Xpetra::Access::OverwriteAll);
256
257
258 Array<ArrayRCP<const real_type> > fineCoordData(numDimensions);
259 Array<ArrayRCP<real_type> > coarseCoordData(numDimensions);
260 for(int dim = 0; dim < numDimensions; ++dim) {
261 fineCoordData[dim] = fineCoordinates->getData(dim);
262 coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
263 }
264
265 // Let us set some parameter that will be useful
266 // while constructing R
267
268 // Length of interpolation stencils based on geometry
269 const LO cornerStencilLength = 27;
270 const LO edgeStencilLength = 45;
271 const LO faceStencilLength = 75;
272 const LO interiorStencilLength = 125;
273
274 // Number of corner, edge, face and interior nodes
275 const LO numCorners = 8;
276 const LO numEdges = 4*(lCoarseNodesPerDim[0] - 2)
277 + 4*(lCoarseNodesPerDim[1] - 2)
278 + 4*(lCoarseNodesPerDim[2] - 2);
279 const LO numFaces = 2*(lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[1] - 2)
280 + 2*(lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[2] - 2)
281 + 2*(lCoarseNodesPerDim[1] - 2)*(lCoarseNodesPerDim[2] - 2);
282 const LO numInteriors = (lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[1] - 2)
283 *(lCoarseNodesPerDim[2] - 2);
284
285 const LO nnz = (numCorners*cornerStencilLength + numEdges*edgeStencilLength
286 + numFaces*faceStencilLength + numInteriors*interiorStencilLength)*blkSize;
287
288 // Having the number of rows and columns we can genrate
289 // the appropriate maps for our operator.
290
291 *out << "R statistics:" << std::endl
292 << " -numRows= " << numRows << std::endl
293 << " -numCols= " << numCols << std::endl
294 << " -nnz= " << nnz << std::endl;
295
296 row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing("row_map"), numRows + 1);
297 typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map);
298
299 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("entries"), nnz);
300 typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries);
301
302 values_type values(Kokkos::ViewAllocateWithoutInitializing("values"), nnz);
303 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
304
305 // Compute the basic interpolation
306 // coefficients for 1D rate of 3
307 // coarsening.
308 Array<SC> coeffs({1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0});
309 row_map_h(0) = 0;
310
311 // Define some offsets that
312 // will be needed often later on
313 const LO edgeLineOffset = 2*cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength;
314 const LO faceLineOffset = 2*edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength;
315 const LO interiorLineOffset = 2*faceStencilLength
316 + (lCoarseNodesPerDim[0] - 2)*interiorStencilLength;
317
318 const LO facePlaneOffset = 2*edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset;
319 const LO interiorPlaneOffset = 2*faceLineOffset + (lCoarseNodesPerDim[1] - 2)*interiorLineOffset;
320
321 // Let us take care of the corners
322 // first since we always have
323 // corners to deal with!
324 {
325 // Corner 1
326 LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
327 for(LO l = 0; l < blkSize; ++l) {
328 for(LO k = 0; k < 3; ++k) {
329 for(LO j = 0; j < 3; ++j) {
330 for(LO i = 0; i < 3; ++i) {
331 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
332 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
333 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i + 2];
334 }
335 }
336 }
337 }
338 for(LO l = 0; l < blkSize; ++l) {
339 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
340 }
341 for(int dim = 0; dim <numDimensions; ++dim) {
342 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
343 }
344
345 // Corner 5
346 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
347 rowIdx = coordRowIdx*blkSize;
348 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
349 columnOffset = coordColumnOffset*blkSize;
350 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
351 for(LO l = 0; l < blkSize; ++l) {
352 for(LO k = 0; k < 3; ++k) {
353 for(LO j = 0; j < 3; ++j) {
354 for(LO i = 0; i < 3; ++i) {
355 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
356 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
357 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i + 2];
358 }
359 }
360 }
361 }
362 for(LO l = 0; l < blkSize; ++l) {
363 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
364 }
365 for(int dim = 0; dim <numDimensions; ++dim) {
366 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
367 }
368
369 // Corner 2
370 coordRowIdx = (lCoarseNodesPerDim[0] - 1);
371 rowIdx = coordRowIdx*blkSize;
372 coordColumnOffset = (lFineNodesPerDim[0] - 1);
373 columnOffset = coordColumnOffset*blkSize;
374 entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength)*blkSize;
375 for(LO l = 0; l < blkSize; ++l) {
376 for(LO k = 0; k < 3; ++k) {
377 for(LO j = 0; j < 3; ++j) {
378 for(LO i = 0; i < 3; ++i) {
379 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
380 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
381 + j*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
382 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i];
383 }
384 }
385 }
386 }
387 for(LO l = 0; l < blkSize; ++l) {
388 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
389 }
390 for(int dim = 0; dim <numDimensions; ++dim) {
391 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
392 }
393
394 // Corner 6
395 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
396 rowIdx = coordRowIdx*blkSize;
397 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
398 columnOffset = coordColumnOffset*blkSize;
399 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
400 for(LO l = 0; l < blkSize; ++l) {
401 for(LO k = 0; k < 3; ++k) {
402 for(LO j = 0; j < 3; ++j) {
403 for(LO i = 0; i < 3; ++i) {
404 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
405 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
406 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
407 }
408 }
409 }
410 }
411 for(LO l = 0; l < blkSize; ++l) {
412 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
413 }
414 for(int dim = 0; dim <numDimensions; ++dim) {
415 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
416 }
417
418 // Corner 3
419 coordRowIdx = (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0];
420 rowIdx = coordRowIdx*blkSize;
421 coordColumnOffset = (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0];
422 columnOffset = coordColumnOffset*blkSize;
423 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset)*blkSize;
424 for(LO l = 0; l < blkSize; ++l) {
425 for(LO k = 0; k < 3; ++k) {
426 for(LO j = 0; j < 3; ++j) {
427 for(LO i = 0; i < 3; ++i) {
428 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
429 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
430 + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
431 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i + 2];
432 }
433 }
434 }
435 }
436 for(LO l = 0; l < blkSize; ++l) {
437 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
438 }
439 for(int dim = 0; dim <numDimensions; ++dim) {
440 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
441 }
442
443 // Corner 7
444 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
445 rowIdx = coordRowIdx*blkSize;
446 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
447 columnOffset = coordColumnOffset*blkSize;
448 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
449 for(LO l = 0; l < blkSize; ++l) {
450 for(LO k = 0; k < 3; ++k) {
451 for(LO j = 0; j < 3; ++j) {
452 for(LO i = 0; i < 3; ++i) {
453 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
454 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
455 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
456 }
457 }
458 }
459 }
460 for(LO l = 0; l < blkSize; ++l) {
461 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
462 }
463 for(int dim = 0; dim <numDimensions; ++dim) {
464 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
465 }
466
467 // Corner 4
468 coordRowIdx = (lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0] - 1);
469 rowIdx = coordRowIdx*blkSize;
470 coordColumnOffset = (lFineNodesPerDim[1]*lFineNodesPerDim[0] - 1);
471 columnOffset = coordColumnOffset*blkSize;
472 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset +
473 cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength)*blkSize;
474 for(LO l = 0; l < blkSize; ++l) {
475 for(LO k = 0; k < 3; ++k) {
476 for(LO j = 0; j < 3; ++j) {
477 for(LO i = 0; i < 3; ++i) {
478 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
479 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
480 + (j - 2)*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
481 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
482 }
483 }
484 }
485 }
486 for(LO l = 0; l < blkSize; ++l) {
487 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
488 }
489 for(int dim = 0; dim <numDimensions; ++dim) {
490 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
491 }
492
493 // Corner 8
494 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
495 rowIdx = coordRowIdx*blkSize;
496 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
497 columnOffset = coordColumnOffset*blkSize;
498 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
499 for(LO l = 0; l < blkSize; ++l) {
500 for(LO k = 0; k < 3; ++k) {
501 for(LO j = 0; j < 3; ++j) {
502 for(LO i = 0; i < 3; ++i) {
503 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
504 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
505 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
506 }
507 }
508 }
509 }
510 for(LO l = 0; l < blkSize; ++l) {
511 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
512 }
513 for(int dim = 0; dim <numDimensions; ++dim) {
514 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
515 }
516 } // Corners are done!
517
518 // Edges along 0 direction
519 if(lCoarseNodesPerDim[0] - 2 > 0) {
520
521 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
522 for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
523
524 // Edge 0
525 coordRowIdx = (edgeIdx + 1);
526 rowIdx = coordRowIdx*blkSize;
527 coordColumnOffset = (edgeIdx + 1)*3;
528 columnOffset = coordColumnOffset*blkSize;
529 entryOffset = (cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
530 for(LO l = 0; l < blkSize; ++l) {
531 for(LO k = 0; k < 3; ++k) {
532 for(LO j = 0; j < 3; ++j) {
533 for(LO i = 0; i < 5; ++i) {
534 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
535 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
536 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i];
537 }
538 }
539 }
540 }
541 for(LO l = 0; l < blkSize; ++l) {
542 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
543 }
544 for(int dim = 0; dim <numDimensions; ++dim) {
545 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
546 }
547
548 // Edge 1
549 coordRowIdx = ((lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0] + edgeIdx + 1);
550 rowIdx = coordRowIdx*blkSize;
551 coordColumnOffset = ((lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0] + (edgeIdx + 1)*3);
552 columnOffset = coordColumnOffset*blkSize;
553 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset
554 + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
555 for(LO l = 0; l < blkSize; ++l) {
556 for(LO k = 0; k < 3; ++k) {
557 for(LO j = 0; j < 3; ++j) {
558 for(LO i = 0; i < 5; ++i) {
559 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
560 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
561 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
562 }
563 }
564 }
565 }
566 for(LO l = 0; l < blkSize; ++l) {
567 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
568 }
569 for(int dim = 0; dim <numDimensions; ++dim) {
570 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
571 }
572
573 // Edge 2
574 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
575 + edgeIdx + 1);
576 rowIdx = coordRowIdx*blkSize;
577 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
578 + (edgeIdx + 1)*3);
579 columnOffset = coordColumnOffset*blkSize;
580 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
581 + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
582 for(LO l = 0; l < blkSize; ++l) {
583 for(LO k = 0; k < 3; ++k) {
584 for(LO j = 0; j < 3; ++j) {
585 for(LO i = 0; i < 5; ++i) {
586 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
587 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
588 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
589 }
590 }
591 }
592 }
593 for(LO l = 0; l < blkSize; ++l) {
594 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
595 }
596 for(int dim = 0; dim <numDimensions; ++dim) {
597 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
598 }
599
600 // Edge 3
601 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
602 + (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0] + edgeIdx + 1);
603 rowIdx = coordRowIdx*blkSize;
604 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
605 + (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0] + (edgeIdx + 1)*3);
606 columnOffset = coordColumnOffset*blkSize;
607 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
608 + edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset
609 + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
610 for(LO l = 0; l < blkSize; ++l) {
611 for(LO k = 0; k < 3; ++k) {
612 for(LO j = 0; j < 3; ++j) {
613 for(LO i = 0; i < 5; ++i) {
614 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
615 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
616 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
617 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
618 }
619 }
620 }
621 }
622 for(LO l = 0; l < blkSize; ++l) {
623 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
624 }
625 for(int dim = 0; dim <numDimensions; ++dim) {
626 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
627 }
628 }
629 }
630
631 // Edges along 1 direction
632 if(lCoarseNodesPerDim[1] - 2 > 0) {
633
634 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
635 for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
636
637 // Edge 0
638 coordRowIdx = (edgeIdx + 1)*lCoarseNodesPerDim[0];
639 rowIdx = coordRowIdx*blkSize;
640 coordColumnOffset = (edgeIdx + 1)*3*lFineNodesPerDim[0];
641 columnOffset = coordColumnOffset*blkSize;
642 entryOffset = (edgeLineOffset + edgeIdx*faceLineOffset)*blkSize;
643 for(LO l = 0; l < blkSize; ++l) {
644 for(LO k = 0; k < 3; ++k) {
645 for(LO j = 0; j < 5; ++j) {
646 for(LO i = 0; i < 3; ++i) {
647 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
648 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
649 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i + 2];
650 }
651 }
652 }
653 }
654 for(LO l = 0; l < blkSize; ++l) {
655 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
656 }
657 for(int dim = 0; dim <numDimensions; ++dim) {
658 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
659 }
660
661 // Edge 1
662 coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
663 rowIdx = coordRowIdx*blkSize;
664 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
665 columnOffset = coordColumnOffset*blkSize;
666 entryOffset = (edgeLineOffset + edgeIdx*faceLineOffset
667 + edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength)*blkSize;
668 for(LO l = 0; l < blkSize; ++l) {
669 for(LO k = 0; k < 3; ++k) {
670 for(LO j = 0; j < 5; ++j) {
671 for(LO i = 0; i < 3; ++i) {
672 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
673 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
674 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
675 }
676 }
677 }
678 }
679 for(LO l = 0; l < blkSize; ++l) {
680 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
681 }
682 for(int dim = 0; dim <numDimensions; ++dim) {
683 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
684 }
685
686 // Edge 2
687 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
688 + (edgeIdx + 1)*lCoarseNodesPerDim[0]);
689 rowIdx = coordRowIdx*blkSize;
690 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
691 + (edgeIdx + 1)*3*lFineNodesPerDim[0]);
692 columnOffset = coordColumnOffset*blkSize;
693 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
694 + edgeLineOffset + edgeIdx*faceLineOffset)*blkSize;
695 for(LO l = 0; l < blkSize; ++l) {
696 for(LO k = 0; k < 3; ++k) {
697 for(LO j = 0; j < 5; ++j) {
698 for(LO i = 0; i < 3; ++i) {
699 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
700 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
701 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
702 }
703 }
704 }
705 }
706 for(LO l = 0; l < blkSize; ++l) {
707 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
708 }
709 for(int dim = 0; dim <numDimensions; ++dim) {
710 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
711 }
712
713 // Edge 3
714 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
715 + (edgeIdx + 1)*lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
716 rowIdx = coordRowIdx*blkSize;
717 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
718 + (edgeIdx + 1)*3*lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
719 columnOffset = coordColumnOffset*blkSize;
720 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
721 + edgeLineOffset + edgeIdx*faceLineOffset
722 + edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength)*blkSize;
723 for(LO l = 0; l < blkSize; ++l) {
724 for(LO k = 0; k < 3; ++k) {
725 for(LO j = 0; j < 5; ++j) {
726 for(LO i = 0; i < 3; ++i) {
727 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
728 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
729 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
730 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
731 }
732 }
733 }
734 }
735 for(LO l = 0; l < blkSize; ++l) {
736 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
737 }
738 for(int dim = 0; dim <numDimensions; ++dim) {
739 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
740 }
741 }
742 }
743
744 // Edges along 2 direction
745 if(lCoarseNodesPerDim[2] - 2 > 0) {
746
747 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
748 for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
749
750 // Edge 0
751 coordRowIdx = (edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
752 rowIdx = coordRowIdx*blkSize;
753 coordColumnOffset = (edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0];
754 columnOffset = coordColumnOffset*blkSize;
755 entryOffset = (facePlaneOffset + edgeIdx*interiorPlaneOffset)*blkSize;
756 for(LO l = 0; l < blkSize; ++l) {
757 for(LO k = 0; k < 5; ++k) {
758 for(LO j = 0; j < 3; ++j) {
759 for(LO i = 0; i < 3; ++i) {
760 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
761 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
762 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i + 2];
763 }
764 }
765 }
766 }
767 for(LO l = 0; l < blkSize; ++l) {
768 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
769 }
770 for(int dim = 0; dim <numDimensions; ++dim) {
771 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
772 }
773
774 // Edge 1
775 coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
776 + lCoarseNodesPerDim[0] - 1);
777 rowIdx = coordRowIdx*blkSize;
778 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
779 + lFineNodesPerDim[0] - 1);
780 columnOffset = coordColumnOffset*blkSize;
781 entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength
782 + edgeIdx*interiorPlaneOffset)*blkSize;
783 for(LO l = 0; l < blkSize; ++l) {
784 for(LO k = 0; k < 5; ++k) {
785 for(LO j = 0; j < 3; ++j) {
786 for(LO i = 0; i < 3; ++i) {
787 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
788 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
789 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
790 }
791 }
792 }
793 }
794 for(LO l = 0; l < blkSize; ++l) {
795 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
796 }
797 for(int dim = 0; dim <numDimensions; ++dim) {
798 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
799 }
800
801 // Edge 2
802 coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
803 + (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0]);
804 rowIdx = coordRowIdx*blkSize;
805 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
806 + (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0]);
807 columnOffset = coordColumnOffset*blkSize;
808 entryOffset = (facePlaneOffset + edgeIdx*interiorPlaneOffset + faceLineOffset
809 + (lCoarseNodesPerDim[1] - 2)*interiorLineOffset)*blkSize;
810 for(LO l = 0; l < blkSize; ++l) {
811 for(LO k = 0; k < 5; ++k) {
812 for(LO j = 0; j < 3; ++j) {
813 for(LO i = 0; i < 3; ++i) {
814 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
815 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
816 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
817 }
818 }
819 }
820 }
821 for(LO l = 0; l < blkSize; ++l) {
822 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
823 }
824 for(int dim = 0; dim <numDimensions; ++dim) {
825 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
826 }
827
828 // Edge 3
829 coordRowIdx = ((edgeIdx + 2)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0] - 1);
830 rowIdx = coordRowIdx*blkSize;
831 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
832 + lFineNodesPerDim[1]*lFineNodesPerDim[0] - 1);
833 columnOffset = coordColumnOffset*blkSize;
834 entryOffset = (facePlaneOffset + (edgeIdx + 1)*interiorPlaneOffset - edgeStencilLength)*blkSize;
835 for(LO l = 0; l < blkSize; ++l) {
836 for(LO k = 0; k < 5; ++k) {
837 for(LO j = 0; j < 3; ++j) {
838 for(LO i = 0; i < 3; ++i) {
839 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
840 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
841 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
842 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
843 }
844 }
845 }
846 }
847 for(LO l = 0; l < blkSize; ++l) {
848 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
849 }
850 for(int dim = 0; dim <numDimensions; ++dim) {
851 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
852 }
853 }
854 }
855
856 //TODO: KOKKOS parallel_for used from here. Not sure if it should be used for edges.
857 Kokkos::deep_copy(row_map, row_map_h);
858 Kokkos::deep_copy(entries, entries_h);
859 Kokkos::deep_copy(values, values_h);
860
861 // Create views on device for nodes per dim
862 LOTupleView lFineNodesPerDim_d("lFineNodesPerDim");
863 LOTupleView lCoarseNodesPerDim_d("lCoarseNodesPerDim");
864
865 typename Kokkos::View<LO[3], device_type>::HostMirror lCoarseNodesPerDim_h = Kokkos::create_mirror_view( lCoarseNodesPerDim_d );
866 typename Kokkos::View<LO[3], device_type>::HostMirror lFineNodesPerDim_h = Kokkos::create_mirror_view( lFineNodesPerDim_d );
867
868 for(int dim = 0; dim < numDimensions; ++dim) {
869 lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
870 lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
871 }
872
873 Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
874 Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
875
876
877 // Faces in 0-1 plane
878 if((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
879
880 Kokkos::parallel_for("Faces in 0-1 plane region R",
881 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1]-2)*(lCoarseNodesPerDim[0]-2) ),
882 KOKKOS_LAMBDA(const LO faceIdx) {
883 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
884 LO gridIdx[3] = {0,0,0};
885 impl_scalar_type coeffs_d[5] = {1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0};
886 // Last step in the loop
887 // update the grid indices
888 // for next grid point
889 for(LO i = 0; i < faceIdx; i++){
890 ++gridIdx[0];
891 if(gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
892 gridIdx[0] = 0;
893 ++gridIdx[1];
894 }
895 }
896
897 // Face 0
898 coordRowIdx = ((gridIdx[1] + 1)*lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
899 rowIdx = coordRowIdx*blkSize;
900 coordColumnOffset = 3*((gridIdx[1] + 1)*lFineNodesPerDim_d(0) + gridIdx[0] + 1);
901 columnOffset = coordColumnOffset*blkSize;
902 entryOffset = (edgeLineOffset + edgeStencilLength
903 + gridIdx[1]*faceLineOffset + gridIdx[0]*faceStencilLength)*blkSize;
904 for(LO l = 0; l < blkSize; ++l) {
905 for(LO k = 0; k < 3; ++k) {
906 for(LO j = 0; j < 5; ++j) {
907 for(LO i = 0; i < 5; ++i) {
908 entries(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = columnOffset
909 + (k*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0) + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
910 values(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = coeffs_d[k + 2]*coeffs_d[j]*coeffs_d[i];
911 }
912 }
913 }
914 }
915 for(LO l = 0; l < blkSize; ++l) {
916 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
917 }
918 for(int dim = 0; dim <numDimensions; ++dim) {
919 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
920 }
921
922 // Face 1
923 coordRowIdx += (lCoarseNodesPerDim_d(2) - 1)*lCoarseNodesPerDim_d(1)*lCoarseNodesPerDim_d(0);
924 rowIdx = coordRowIdx*blkSize;
925 coordColumnOffset += (lFineNodesPerDim_d(2) - 1)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0);
926 columnOffset = coordColumnOffset*blkSize;
927 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2)*interiorPlaneOffset)*blkSize;
928 for(LO l = 0; l < blkSize; ++l) {
929 for(LO k = 0; k < 3; ++k) {
930 for(LO j = 0; j < 5; ++j) {
931 for(LO i = 0; i < 5; ++i) {
932 entries(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = columnOffset
933 + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
934 + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
935 values(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i];
936 }
937 }
938 }
939 }
940 for(LO l = 0; l < blkSize; ++l) {
941 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
942 }
943 for(int dim = 0; dim <numDimensions; ++dim) {
944 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
945 }
946
947 });// parallel_for faces in 0-1 plane
948 }
949
950 // Faces in 0-2 plane
951 if((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
952
953 Kokkos::parallel_for("Faces in 0-2 plane region R",
954 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2]-2)*(lCoarseNodesPerDim[0]-2) ),
955 KOKKOS_LAMBDA(const LO faceIdx) {
956 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
957 LO gridIdx[3] = {0,0,0};
958 impl_scalar_type coeffs_d[5] = {1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0};
959 // Last step in the loop
960 // update the grid indices
961 // for next grid point
962 for(LO i = 0; i < faceIdx; i++){
963 ++gridIdx[0];
964 if(gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
965 gridIdx[0] = 0;
966 ++gridIdx[2];
967 }
968 }
969
970 // Face 0
971 coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim_d(1)*lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
972 rowIdx = coordRowIdx*blkSize;
973 coordColumnOffset = ((gridIdx[2] + 1)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
974 + gridIdx[0] + 1)*3;
975 columnOffset = coordColumnOffset*blkSize;
976 entryOffset = (facePlaneOffset + gridIdx[2]*interiorPlaneOffset + edgeStencilLength
977 + gridIdx[0]*faceStencilLength)*blkSize;
978 for(LO l = 0; l < blkSize; ++l) {
979 for(LO k = 0; k < 5; ++k) {
980 for(LO j = 0; j < 3; ++j) {
981 for(LO i = 0; i < 5; ++i) {
982 entries(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = columnOffset
983 + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0) + j*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
984 values(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j + 2]*coeffs_d[i];
985 }
986 }
987 }
988 }
989 for(LO l = 0; l < blkSize; ++l) {
990 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
991 }
992 for(int dim = 0; dim <numDimensions; ++dim) {
993 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
994 }
995
996 // Face 1
997 coordRowIdx += (lCoarseNodesPerDim_d(1) - 1)*lCoarseNodesPerDim_d(0);
998 rowIdx = coordRowIdx*blkSize;
999 coordColumnOffset += (lFineNodesPerDim_d(1) - 1)*lFineNodesPerDim_d(0);
1000 columnOffset = coordColumnOffset*blkSize;
1001 entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2)*interiorLineOffset)*blkSize;
1002 for(LO l = 0; l < blkSize; ++l) {
1003 for(LO k = 0; k < 5; ++k) {
1004 for(LO j = 0; j < 3; ++j) {
1005 for(LO i = 0; i < 5; ++i) {
1006 entries(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = columnOffset
1007 + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1008 + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
1009 values(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i];
1010 }
1011 }
1012 }
1013 }
1014 for(LO l = 0; l < blkSize; ++l) {
1015 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1016 }
1017 for(int dim = 0; dim <numDimensions; ++dim) {
1018 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1019 }
1020
1021 });// parallel_for faces in 0-2 plane
1022 }
1023
1024 // Faces in 1-2 plane
1025 if((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
1026
1027 Kokkos::parallel_for("Faces in 1-2 plane region R",
1028 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2]-2)*(lCoarseNodesPerDim[1]-2) ),
1029 KOKKOS_LAMBDA(const LO faceIdx) {
1030 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1031 LO gridIdx[3] = {0,0,0};
1032 impl_scalar_type coeffs_d[5] = {1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0};
1033 // Last step in the loop
1034 // update the grid indices
1035 // for next grid point
1036 for(LO i = 0; i < faceIdx; i++){
1037 ++gridIdx[1];
1038 if(gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1039 gridIdx[1] = 0;
1040 ++gridIdx[2];
1041 }
1042 }
1043
1044 // Face 0
1045 coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim_d(1)*lCoarseNodesPerDim_d(0)
1046 + (gridIdx[1] + 1)*lCoarseNodesPerDim_d(0));
1047 rowIdx = coordRowIdx*blkSize;
1048 coordColumnOffset = ((gridIdx[2] + 1)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1049 + (gridIdx[1] + 1)*lFineNodesPerDim_d(0))*3;
1050 columnOffset = coordColumnOffset*blkSize;
1051 entryOffset = (facePlaneOffset + gridIdx[2]*interiorPlaneOffset + faceLineOffset
1052 + gridIdx[1]*interiorLineOffset)*blkSize;
1053 for(LO l = 0; l < blkSize; ++l) {
1054 for(LO k = 0; k < 5; ++k) {
1055 for(LO j = 0; j < 5; ++j) {
1056 for(LO i = 0; i < 3; ++i) {
1057 entries(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = columnOffset
1058 + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0) + (j - 2)*lFineNodesPerDim_d(0) + i)*blkSize + l;
1059 values(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i + 2];
1060 }
1061 }
1062 }
1063 }
1064 for(LO l = 0; l < blkSize; ++l) {
1065 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1066 }
1067 for(int dim = 0; dim <numDimensions; ++dim) {
1068 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1069 }
1070
1071 // Face 1
1072 coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
1073 rowIdx = coordRowIdx*blkSize;
1074 coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
1075 columnOffset = coordColumnOffset*blkSize;
1076 entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2)*interiorStencilLength)*blkSize;
1077 for(LO l = 0; l < blkSize; ++l) {
1078 for(LO k = 0; k < 5; ++k) {
1079 for(LO j = 0; j < 5; ++j) {
1080 for(LO i = 0; i < 3; ++i) {
1081 entries(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = columnOffset
1082 + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1083 + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
1084 values(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i];
1085 }
1086 }
1087 }
1088 }
1089 for(LO l = 0; l < blkSize; ++l) {
1090 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1091 }
1092 for(int dim = 0; dim <numDimensions; ++dim) {
1093 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1094 }
1095
1096 });// parallel_for faces in 1-2 plane
1097 }
1098
1099 if(numInteriors > 0) {
1100
1101 // Allocate and compute arrays
1102 // containing column offsets
1103 // and values associated with
1104 // interior points
1105 LO countRowEntries = 0;
1106 Kokkos::View<LO[125]> coordColumnOffsets_d("coordColOffset");
1107 auto coordColumnOffsets_h = Kokkos::create_mirror_view( coordColumnOffsets_d );
1108
1109 for(LO k = -2; k < 3; ++k) {
1110 for(LO j = -2; j < 3; ++j) {
1111 for(LO i = -2; i < 3; ++i) {
1112 coordColumnOffsets_h(countRowEntries) = k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
1113 + j*lFineNodesPerDim[0] + i;
1114 ++countRowEntries;
1115 }
1116 }
1117 }
1118 Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
1119
1120 LO countValues = 0;
1121 Kokkos::View<impl_scalar_type*> interiorValues_d("interiorValues",125);
1122 auto interiorValues_h = Kokkos::create_mirror_view( interiorValues_d );
1123 for(LO k = 0; k < 5; ++k) {
1124 for(LO j = 0; j < 5; ++j) {
1125 for(LO i = 0; i < 5; ++i) {
1126 interiorValues_h(countValues) = coeffs[k]*coeffs[j]*coeffs[i];
1127 ++countValues;
1128 }
1129 }
1130 }
1131 Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1132
1133 Kokkos::parallel_for("interior idx region R",Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1134 KOKKOS_LAMBDA(const LO interiorIdx) {
1135 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1136 LO gridIdx[3];
1137 gridIdx[0] = 0; gridIdx[1] = 0; gridIdx[2] = 0;
1138 // First step in the loop
1139 // update the grid indices
1140 // for the grid point
1141 for(LO i=0; i<interiorIdx; i++){
1142 ++gridIdx[0];
1143 if(gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1144 gridIdx[0] = 0;
1145 ++gridIdx[1];
1146 if(gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1147 gridIdx[1] = 0;
1148 ++gridIdx[2];
1149 }
1150 }
1151 }
1152
1153 coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim_d(0)*lCoarseNodesPerDim_d(1)
1154 + (gridIdx[1] + 1)*lCoarseNodesPerDim_d(0)
1155 + gridIdx[0] + 1);
1156 rowIdx = coordRowIdx*blkSize;
1157 coordColumnOffset = ((gridIdx[2] + 1)*3*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1158 + (gridIdx[1] + 1)*3*lFineNodesPerDim_d(0) + (gridIdx[0] + 1)*3);
1159 columnOffset = coordColumnOffset*blkSize;
1160 entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength
1161 + gridIdx[2]*interiorPlaneOffset + gridIdx[1]*interiorLineOffset
1162 + gridIdx[0]*interiorStencilLength)*blkSize;
1163 for(LO l = 0; l < blkSize; ++l) {
1164 row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength*(l+1);
1165 }
1166 // Fill the column indices
1167 // and values in the approproate
1168 // views.
1169 for(LO l = 0; l < blkSize; ++l) {
1170 for(LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1171 entries(entryOffset + entryIdx + interiorStencilLength*l) = columnOffset + coordColumnOffsets_d(entryIdx)*blkSize + l;
1172 values(entryOffset + entryIdx + interiorStencilLength*l) = interiorValues_d(entryIdx);
1173 }
1174 }
1175 for(int dim = 0; dim <numDimensions; ++dim) {
1176 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1177 }
1178
1179 });// Kokkos::parallel_for interior idx
1180 //
1181 }
1182
1183 local_graph_type localGraph(entries, row_map);
1184 local_matrix_type localR("R", numCols, values, localGraph);
1185
1186 R = MatrixFactory::Build(localR, // the local data
1187 rowMap, // rowMap
1188 A->getRowMap(), // colMap
1189 A->getRowMap(), // domainMap == colMap
1190 rowMap, // rangeMap == rowMap
1191 Teuchos::null); // params for optimized construction
1192
1193 } // Build3D()
1194
1195} //namespace MueLu
1196
1197#define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1198#endif // MUELU_REGIONRFACTORY_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const NoFactory * get()
virtual const Teuchos::ParameterList & GetParameterList() const
typename Kokkos::View< LO[3], device_type > LOTupleView
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Input.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.