201 struct Hierarchical_HGRAD_PYR_Functor
203 using ExecutionSpace =
typename DeviceType::execution_space;
204 using ScratchSpace =
typename ExecutionSpace::scratch_memory_space;
205 using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
206 using OutputScratchView2D = Kokkos::View<OutputScalar**,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
207 using PointScratchView = Kokkos::View<PointScalar*, ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
209 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
210 using TeamMember =
typename TeamPolicy::member_type;
214 OutputFieldType output_;
215 InputPointsType inputPoints_;
218 bool defineVertexFunctions_;
219 int numFields_, numPoints_;
221 size_t fad_size_output_;
223 static const int numVertices = 5;
224 static const int numMixedEdges = 4;
225 static const int numTriEdges = 4;
226 static const int numEdges = 8;
230 const int edge_start_[numEdges] = {0,1,2,3,0,1,2,3};
231 const int edge_end_[numEdges] = {1,2,3,0,4,4,4,4};
234 static const int numQuadFaces = 1;
235 static const int numTriFaces = 4;
238 const int tri_face_vertex_0[numTriFaces] = {0,1,3,0};
239 const int tri_face_vertex_1[numTriFaces] = {1,2,2,3};
240 const int tri_face_vertex_2[numTriFaces] = {4,4,4,4};
242 Hierarchical_HGRAD_PYR_Functor(EOperator opType, OutputFieldType output, InputPointsType inputPoints,
243 int polyOrder,
bool defineVertexFunctions)
244 : opType_(opType), output_(output), inputPoints_(inputPoints),
245 polyOrder_(polyOrder), defineVertexFunctions_(defineVertexFunctions),
248 numFields_ = output.extent_int(0);
249 numPoints_ = output.extent_int(1);
250 const auto & p = polyOrder;
251 const auto p3 = p * p * p;
252 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != inputPoints.extent_int(0), std::invalid_argument,
"point counts need to match!");
253 INTREPID2_TEST_FOR_EXCEPTION(numFields_ != p3 + 3 * p + 1, std::invalid_argument,
"output field size does not match basis cardinality");
256 KOKKOS_INLINE_FUNCTION
257 void operator()(
const TeamMember & teamMember )
const
259 auto pointOrdinal = teamMember.league_rank();
260 OutputScratchView scratch1D_1, scratch1D_2, scratch1D_3;
261 OutputScratchView scratch1D_4, scratch1D_5, scratch1D_6;
262 OutputScratchView scratch1D_7, scratch1D_8, scratch1D_9;
263 OutputScratchView2D scratch2D_1, scratch2D_2, scratch2D_3;
264 const int numAlphaValues = (polyOrder_-1 > 1) ? (polyOrder_-1) : 1;
265 if (fad_size_output_ > 0) {
266 scratch1D_1 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
267 scratch1D_2 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
268 scratch1D_3 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
269 scratch1D_4 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
270 scratch1D_5 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
271 scratch1D_6 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
272 scratch1D_7 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
273 scratch1D_8 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
274 scratch1D_9 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
275 scratch2D_1 = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1, fad_size_output_);
276 scratch2D_2 = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1, fad_size_output_);
277 scratch2D_3 = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1, fad_size_output_);
280 scratch1D_1 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
281 scratch1D_2 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
282 scratch1D_3 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
283 scratch1D_4 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
284 scratch1D_5 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
285 scratch1D_6 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
286 scratch1D_7 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
287 scratch1D_8 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
288 scratch1D_9 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
289 scratch2D_1 = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1);
290 scratch2D_2 = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1);
291 scratch2D_3 = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1);
294 const auto & x = inputPoints_(pointOrdinal,0);
295 const auto & y = inputPoints_(pointOrdinal,1);
296 const auto & z = inputPoints_(pointOrdinal,2);
302 Kokkos::Array<PointScalar,3> coords;
307 Array<PointScalar,5> lambda;
308 Array<Kokkos::Array<PointScalar,3>,5> lambdaGrad;
310 Array<Array<PointScalar,3>,2> mu;
311 Array<Array<Array<PointScalar,3>,3>,2> muGrad;
313 Array<Array<PointScalar,2>,3> nu;
314 Array<Array<Array<PointScalar,3>,2>,3> nuGrad;
316 affinePyramid(lambda, lambdaGrad, mu, muGrad, nu, nuGrad, coords);
323 for (
int vertexOrdinal=0; vertexOrdinal<numVertices; vertexOrdinal++)
325 output_(vertexOrdinal,pointOrdinal) = lambda[vertexOrdinal];
327 if (!defineVertexFunctions_)
331 output_(0,pointOrdinal) = 1.0;
335 auto & Li_a1 = scratch1D_1;
336 auto & Li_a2 = scratch1D_2;
343 int fieldOrdinalOffset = numVertices;
344 for (
int edgeOrdinal=0; edgeOrdinal<numMixedEdges; edgeOrdinal++)
348 int a = (edgeOrdinal % 2 == 0) ? 1 : 2;
350 auto & Li = (a == 1) ? Li_a1 : Li_a2;
353 int c = ((edgeOrdinal == 0) || (edgeOrdinal == 3)) ? 0 : 1;
354 for (
int i=2; i<=polyOrder_; i++)
356 output_(fieldOrdinalOffset,pointOrdinal) = mu[c][b-1] * Li(i);
357 fieldOrdinalOffset++;
363 auto & Li = scratch1D_1;
364 for (
int edgeOrdinal=0; edgeOrdinal<numMixedEdges; edgeOrdinal++)
366 const auto & lambda_a = lambda[edgeOrdinal];
368 for (
int i=2; i<=polyOrder_; i++)
370 output_(fieldOrdinalOffset,pointOrdinal) = Li(i);
371 fieldOrdinalOffset++;
380 auto & Lj = scratch1D_2;
384 for (
int j=2; j<=polyOrder_; j++)
386 for (
int i=2; i<=polyOrder_; i++)
388 output_(fieldOrdinalOffset,pointOrdinal) = mu[0][2] * Li(i) * Lj(j);
389 fieldOrdinalOffset++;
403 for (
int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
407 int a = (faceOrdinal % 2 == 0) ? 1 : 2;
411 int c = ((faceOrdinal == 0) || (faceOrdinal == 3)) ? 0 : 1;
413 const auto & s0 = nu[0][a-1];
414 const auto & s1 = nu[1][a-1];
415 const auto & s2 = nu[2][a-1];
416 const PointScalar jacobiScaling = s0 + s1 + s2;
420 auto & jacobi = scratch2D_1;
421 for (
int n=2; n<=polyOrder_; n++)
423 const double alpha = n*2;
424 const int alphaOrdinal = n-2;
425 using Kokkos::subview;
427 auto jacobi_alpha = subview(jacobi, alphaOrdinal, ALL);
431 auto & edge_s0s1 = scratch1D_1;
434 for (
int totalPolyOrder=3; totalPolyOrder<=polyOrder_; totalPolyOrder++)
436 for (
int i=2; i<totalPolyOrder; i++)
438 const auto & edgeValue = edge_s0s1(i);
439 const int alphaOrdinal = i-2;
441 const int j = totalPolyOrder - i;
442 const auto & jacobiValue = jacobi(alphaOrdinal,j);
443 output_(fieldOrdinalOffset,pointOrdinal) = mu[c][b-1] * edgeValue * jacobiValue;
445 fieldOrdinalOffset++;
457 auto & Lk = scratch1D_3;
462 for (
int k=2; k<=polyOrder_; k++)
464 for (
int j=2; j<=polyOrder_; j++)
466 for (
int i=2; i<=polyOrder_; i++)
468 output_(fieldOrdinalOffset,pointOrdinal) = Lk(k) * Li(i) * Lj(j);
469 fieldOrdinalOffset++;
480 for (
int vertexOrdinal=0; vertexOrdinal<numVertices; vertexOrdinal++)
482 for (
int d=0; d<3; d++)
484 output_(vertexOrdinal,pointOrdinal,d) = lambdaGrad[vertexOrdinal][d];
488 if (!defineVertexFunctions_)
492 output_(0,pointOrdinal,0) = 0.0;
493 output_(0,pointOrdinal,1) = 0.0;
494 output_(0,pointOrdinal,2) = 0.0;
498 int fieldOrdinalOffset = numVertices;
501 auto & P_i_minus_1 = scratch1D_1;
502 auto & L_i_dt = scratch1D_2;
503 auto & L_i = scratch1D_3;
505 for (
int edgeOrdinal=0; edgeOrdinal<numMixedEdges; edgeOrdinal++)
509 int a = (edgeOrdinal % 2 == 0) ? 1 : 2;
518 int c = ((edgeOrdinal == 0) || (edgeOrdinal == 3)) ? 0 : 1;
519 for (
int i=2; i<=polyOrder_; i++)
522 const auto & R_i_minus_1 = L_i_dt(i);
524 for (
int d=0; d<3; d++)
528 OutputScalar grad_Li_d = P_i_minus_1(i-1) * nuGrad[1][a-1][d] + R_i_minus_1 * (nuGrad[0][a-1][d] + nuGrad[1][a-1][d]);
529 output_(fieldOrdinalOffset,pointOrdinal,d) = muGrad[c][b-1][d] * L_i(i) + mu[c][b-1] * grad_Li_d;
531 fieldOrdinalOffset++;
536 P_i_minus_1 = scratch1D_1;
537 L_i_dt = scratch1D_2;
539 for (
int edgeOrdinal=0; edgeOrdinal<numMixedEdges; edgeOrdinal++)
541 const auto & lambda_a = lambda [edgeOrdinal];
542 const auto & lambdaGrad_a = lambdaGrad[edgeOrdinal];
547 for (
int i=2; i<=polyOrder_; i++)
549 const auto & R_i_minus_1 = L_i_dt(i);
550 for (
int d=0; d<3; d++)
554 OutputScalar grad_Li_d = P_i_minus_1(i-1) * lambdaGrad[4][d] + R_i_minus_1 * (lambdaGrad_a[d] + lambdaGrad[4][d]);
555 output_(fieldOrdinalOffset,pointOrdinal,d) = grad_Li_d;
557 fieldOrdinalOffset++;
563 P_i_minus_1 = scratch1D_1;
564 L_i_dt = scratch1D_2;
566 auto & P_j_minus_1 = scratch1D_4;
567 auto & L_j_dt = scratch1D_5;
568 auto & L_j = scratch1D_6;
581 for (
int j=2; j<=polyOrder_; j++)
583 const auto & R_j_minus_1 = L_j_dt(j);
585 for (
int i=2; i<=polyOrder_; i++)
587 const auto & R_i_minus_1 = L_i_dt(i);
589 OutputScalar phi_quad = L_i(i) * L_j(j);
591 for (
int d=0; d<3; d++)
595 OutputScalar grad_Lj_d = P_j_minus_1(j-1) * muGrad[1][1][d] + R_j_minus_1 * (muGrad[0][1][d] + muGrad[1][1][d]);
597 OutputScalar grad_Li_d = P_i_minus_1(i-1) * muGrad[1][0][d] + R_i_minus_1 * (muGrad[0][0][d] + muGrad[1][0][d]);
599 OutputScalar grad_phi_quad_d = L_i(i) * grad_Lj_d + L_j(j) * grad_Li_d;
601 output_(fieldOrdinalOffset,pointOrdinal,d) = mu[0][2] * grad_phi_quad_d + phi_quad * muGrad[0][2][d];
604 fieldOrdinalOffset++;
609 for (
int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
613 int a = (faceOrdinal % 2 == 0) ? 1 : 2;
617 int c = ((faceOrdinal == 0) || (faceOrdinal == 3)) ? 0 : 1;
619 const auto & s0 = nu[0][a-1];
620 const auto & s1 = nu[1][a-1];
621 const auto & s2 = nu[2][a-1];
623 const auto & s0Grad = nuGrad[0][a-1];
624 const auto & s1Grad = nuGrad[1][a-1];
625 const auto & s2Grad = nuGrad[2][a-1];
627 const PointScalar jacobiScaling = s0 + s1 + s2;
632 auto & P_i_minus_1 = scratch1D_1;
633 auto & L_i_dt = scratch1D_2;
634 auto & L_i = scratch1D_3;
636 auto & L_2i_j_dt = scratch2D_1;
637 auto & L_2i_j = scratch2D_2;
638 auto & P_2i_j_minus_1 = scratch2D_3;
639 for (
int n=2; n<=polyOrder_; n++)
641 const double alpha = n*2;
642 const int alphaOrdinal = n-2;
643 using Kokkos::subview;
645 auto L_2i_j_dt_alpha = subview(L_2i_j_dt, alphaOrdinal, ALL);
646 auto L_2i_j_alpha = subview(L_2i_j, alphaOrdinal, ALL);
647 auto P_2i_j_minus_1_alpha = subview(P_2i_j_minus_1, alphaOrdinal, ALL);
656 for (
int totalPolyOrder=3; totalPolyOrder<=polyOrder_; totalPolyOrder++)
658 for (
int i=2; i<totalPolyOrder; i++)
660 const int alphaOrdinal = i-2;
661 const int j = totalPolyOrder - i;
663 const auto & R_i_minus_1 = L_i_dt(i);
664 OutputScalar phi_tri = L_2i_j(alphaOrdinal,j) * L_i(i);
666 for (
int d=0; d<3; d++)
669 OutputScalar grad_Li_d = P_i_minus_1(i-1) * s1Grad[d] + R_i_minus_1 * (s0Grad[d] + s1Grad[d]);
670 OutputScalar grad_L2i_j_d = P_2i_j_minus_1(alphaOrdinal,j-1) * s2Grad[d] + L_2i_j_dt(alphaOrdinal,j) * (s0Grad[d] + s1Grad[d] + s2Grad[d]);
671 OutputScalar grad_phi_tri_d = L_i(i) * grad_L2i_j_d + L_2i_j(alphaOrdinal,j) * grad_Li_d;
673 output_(fieldOrdinalOffset,pointOrdinal,d) = mu[c][b-1] * grad_phi_tri_d + phi_tri * muGrad[c][b-1][d];
675 fieldOrdinalOffset++;
681 P_i_minus_1 = scratch1D_1;
682 L_i_dt = scratch1D_2;
684 P_j_minus_1 = scratch1D_4;
685 L_j_dt = scratch1D_5;
687 auto & P_k_minus_1 = scratch1D_7;
688 auto & L_k_dt = scratch1D_8;
689 auto & L_k = scratch1D_9;
704 for (
int k=2; k<=polyOrder_; k++)
706 const auto & R_k_minus_1 = L_k_dt(k);
708 for (
int j=2; j<=polyOrder_; j++)
710 const auto & R_j_minus_1 = L_j_dt(j);
712 for (
int i=2; i<=polyOrder_; i++)
714 const auto & R_i_minus_1 = L_i_dt(i);
716 OutputScalar phi_quad = L_i(i) * L_j(j);
718 for (
int d=0; d<3; d++)
722 OutputScalar grad_Li_d = P_i_minus_1(i-1) * muGrad[1][0][d] + R_i_minus_1 * (muGrad[0][0][d] + muGrad[1][0][d]);
724 OutputScalar grad_Lj_d = P_j_minus_1(j-1) * muGrad[1][1][d] + R_j_minus_1 * (muGrad[0][1][d] + muGrad[1][1][d]);
726 OutputScalar grad_Lk_d = P_k_minus_1(k-1) * muGrad[1][2][d] + R_k_minus_1 * (muGrad[0][2][d] + muGrad[1][2][d]);
728 OutputScalar grad_phi_quad_d = L_i(i) * grad_Lj_d + L_j(j) * grad_Li_d;
730 output_(fieldOrdinalOffset,pointOrdinal,d) = L_k(k) * grad_phi_quad_d + phi_quad * grad_Lk_d;
733 fieldOrdinalOffset++;
738 for (
int basisOrdinal=0; basisOrdinal<numFields_; basisOrdinal++)
741 const auto dx_eseas = output_(basisOrdinal,pointOrdinal,0);
742 const auto dy_eseas = output_(basisOrdinal,pointOrdinal,1);
743 const auto dz_eseas = output_(basisOrdinal,pointOrdinal,2);
745 auto &dx_int2 = output_(basisOrdinal,pointOrdinal,0);
746 auto &dy_int2 = output_(basisOrdinal,pointOrdinal,1);
747 auto &dz_int2 = output_(basisOrdinal,pointOrdinal,2);
763 INTREPID2_TEST_FOR_ABORT(
true,
764 ">>> ERROR: (Intrepid2::Hierarchical_HGRAD_PYR_Functor) Computing of second and higher-order derivatives is not currently supported");
774 size_t team_shmem_size (
int team_size)
const
781 const int numAlphaValues = std::max(polyOrder_-1, 1);
782 size_t shmem_size = 0;
783 if (fad_size_output_ > 0)
786 shmem_size += 9 * OutputScratchView::shmem_size(polyOrder_ + 1, fad_size_output_);
788 shmem_size += 3 * OutputScratchView2D::shmem_size(numAlphaValues, polyOrder_ + 1, fad_size_output_);
793 shmem_size += 9 * OutputScratchView::shmem_size(polyOrder_ + 1);
795 shmem_size += 3 * OutputScratchView2D::shmem_size(numAlphaValues, polyOrder_ + 1);