67 using ExecutionSpace =
typename DeviceType::execution_space;
68 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
69 using TeamMember =
typename TeamPolicy::member_type;
71 using IntegralViewType = Kokkos::View<typename RankExpander<Scalar, integralViewRank>::value_type, DeviceType>;
72 IntegralViewType integralView_;
79 int leftComponentSpan_;
80 int rightComponentSpan_;
81 int numTensorComponents_;
82 int leftFieldOrdinalOffset_;
83 int rightFieldOrdinalOffset_;
84 bool forceNonSpecialized_;
86 size_t fad_size_output_ = 0;
88 Kokkos::Array<int, 7> offsetsForComponentOrdinal_;
92 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldBounds_;
93 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldBounds_;
94 Kokkos::Array<int,Parameters::MaxTensorComponents> pointBounds_;
96 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldRelativeEnumerationSpans_;
97 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldRelativeEnumerationSpans_;
110 int leftFieldOrdinalOffset,
111 int rightFieldOrdinalOffset,
112 bool forceNonSpecialized)
114 integralView_(integralData.template getUnderlyingView<integralViewRank>()),
115 leftComponent_(leftComponent),
116 composedTransform_(composedTransform),
117 rightComponent_(rightComponent),
118 cellMeasures_(cellMeasures),
121 leftComponentSpan_(leftComponent.
extent_int(2)),
122 rightComponentSpan_(rightComponent.
extent_int(2)),
124 leftFieldOrdinalOffset_(leftFieldOrdinalOffset),
125 rightFieldOrdinalOffset_(rightFieldOrdinalOffset),
126 forceNonSpecialized_(forceNonSpecialized)
128 INTREPID2_TEST_FOR_EXCEPTION(numTensorComponents_ != rightComponent_.numTensorComponents(), std::invalid_argument,
"Left and right components must have matching number of tensorial components");
131 const int FIELD_DIM = 0;
132 const int POINT_DIM = 1;
136 for (
int r=0; r<numTensorComponents_; r++)
138 leftFieldBounds_[r] = leftComponent_.getTensorComponent(r).extent_int(FIELD_DIM);
139 maxFieldsLeft_ = std::max(maxFieldsLeft_, leftFieldBounds_[r]);
140 rightFieldBounds_[r] = rightComponent_.getTensorComponent(r).extent_int(FIELD_DIM);
141 maxFieldsRight_ = std::max(maxFieldsRight_, rightFieldBounds_[r]);
142 pointBounds_[r] = leftComponent_.getTensorComponent(r).extent_int(POINT_DIM);
143 maxPointCount_ = std::max(maxPointCount_, pointBounds_[r]);
147 int leftRelativeEnumerationSpan = 1;
148 int rightRelativeEnumerationSpan = 1;
149 for (
int startingComponent=numTensorComponents_-2; startingComponent>=0; startingComponent--)
151 leftRelativeEnumerationSpan *= leftFieldBounds_[startingComponent];
152 rightRelativeEnumerationSpan *= rightFieldBounds_[startingComponent];
153 leftFieldRelativeEnumerationSpans_ [startingComponent] = leftRelativeEnumerationSpan;
154 rightFieldRelativeEnumerationSpans_[startingComponent] = rightRelativeEnumerationSpan;
160 const bool allocateFadStorage = !std::is_pod<Scalar>::value;
161 if (allocateFadStorage)
166 const int R = numTensorComponents_ - 1;
169 int allocationSoFar = 0;
170 offsetsForComponentOrdinal_[R] = allocationSoFar;
173 for (
int r=R-1; r>0; r--)
178 num_ij *= leftFields * rightFields;
180 offsetsForComponentOrdinal_[r] = allocationSoFar;
181 allocationSoFar += num_ij;
183 offsetsForComponentOrdinal_[0] = allocationSoFar;
186 template<
size_t maxComponents,
size_t numComponents = maxComponents>
187 KOKKOS_INLINE_FUNCTION
188 int incrementArgument( Kokkos::Array<int,maxComponents> &arguments,
189 const Kokkos::Array<int,maxComponents> &bounds)
const
191 if (numComponents == 0)
197 int r =
static_cast<int>(numComponents - 1);
198 while (arguments[r] + 1 >= bounds[r])
204 if (r >= 0) ++arguments[r];
210 KOKKOS_INLINE_FUNCTION
212 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
213 const int &numComponents)
const
215 if (numComponents == 0)
return -1;
216 int r =
static_cast<int>(numComponents - 1);
217 while (arguments[r] + 1 >= bounds[r])
223 if (r >= 0) ++arguments[r];
227 template<
size_t maxComponents,
size_t numComponents = maxComponents>
228 KOKKOS_INLINE_FUNCTION
229 int nextIncrementResult(
const Kokkos::Array<int,maxComponents> &arguments,
230 const Kokkos::Array<int,maxComponents> &bounds)
const
232 if (numComponents == 0)
238 int r =
static_cast<int>(numComponents - 1);
239 while (arguments[r] + 1 >= bounds[r])
249 KOKKOS_INLINE_FUNCTION
251 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
252 const int &numComponents)
const
254 if (numComponents == 0)
return -1;
255 int r = numComponents - 1;
256 while (arguments[r] + 1 >= bounds[r])
264 template<
size_t maxComponents,
size_t numComponents = maxComponents>
265 KOKKOS_INLINE_FUNCTION
266 int relativeEnumerationIndex(
const Kokkos::Array<int,maxComponents> &arguments,
267 const Kokkos::Array<int,maxComponents> &bounds,
268 const int startIndex)
const
271 if (numComponents == 0)
277 int enumerationIndex = 0;
278 for (
size_t r=numComponents-1; r>
static_cast<size_t>(startIndex); r--)
280 enumerationIndex += arguments[r];
281 enumerationIndex *= bounds[r-1];
283 enumerationIndex += arguments[startIndex];
284 return enumerationIndex;
298 KOKKOS_INLINE_FUNCTION
301 constexpr int numTensorComponents = 3;
303 Kokkos::Array<int,numTensorComponents> pointBounds;
304 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
305 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
306 for (
unsigned r=0; r<numTensorComponents; r++)
308 pointBounds[r] = pointBounds_[r];
309 leftFieldBounds[r] = leftFieldBounds_[r];
310 rightFieldBounds[r] = rightFieldBounds_[r];
313 const int cellDataOrdinal = teamMember.league_rank();
314 const int numThreads = teamMember.team_size();
315 const int scratchViewSize = offsetsForComponentOrdinal_[0];
317 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> scratchView;
318 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> pointWeights;
319 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_x, leftFields_y, leftFields_z, rightFields_x, rightFields_y, rightFields_z;
320 if (fad_size_output_ > 0) {
321 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize * numThreads, fad_size_output_);
322 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.extent_int(1), fad_size_output_);
323 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[0], pointBounds[0], fad_size_output_);
324 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[0], pointBounds[0], fad_size_output_);
325 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[1], pointBounds[1], fad_size_output_);
326 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[1], pointBounds[1], fad_size_output_);
327 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[2], pointBounds[2], fad_size_output_);
328 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[2], pointBounds[2], fad_size_output_);
331 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize * numThreads);
332 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.extent_int(1));
333 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[0], pointBounds[0]);
334 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[0], pointBounds[0]);
335 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[1], pointBounds[1]);
336 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[1], pointBounds[1]);
337 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[2], pointBounds[2]);
338 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[2], pointBounds[2]);
344 constexpr int R = numTensorComponents - 1;
346 const int composedTransformRank = composedTransform_.rank();
348 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
350 const int a = a_offset_ + a_component;
351 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
353 const int b = b_offset_ + b_component;
358 const int numLeftFieldsFinal = leftFinalComponent.
extent_int(0);
359 const int numRightFieldsFinal = rightFinalComponent.
extent_int(0);
368 const int maxFields = (maxFieldsLeft_ > maxFieldsRight_) ? maxFieldsLeft_ : maxFieldsRight_;
369 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,maxFields * maxPointCount_), [&] (
const int& fieldOrdinalPointOrdinal) {
370 const int fieldOrdinal = fieldOrdinalPointOrdinal % maxFields;
371 const int pointOrdinal = fieldOrdinalPointOrdinal / maxFields;
372 if ((fieldOrdinal < leftTensorComponent_x.
extent_int(0)) && (pointOrdinal < leftTensorComponent_x.
extent_int(1)))
374 const int leftRank = leftTensorComponent_x.
rank();
375 leftFields_x(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_x(fieldOrdinal,pointOrdinal) : leftTensorComponent_x(fieldOrdinal,pointOrdinal,a_component);
377 if ((fieldOrdinal < leftTensorComponent_y.
extent_int(0)) && (pointOrdinal < leftTensorComponent_y.
extent_int(1)))
379 const int leftRank = leftTensorComponent_y.
rank();
380 leftFields_y(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_y(fieldOrdinal,pointOrdinal) : leftTensorComponent_y(fieldOrdinal,pointOrdinal,a_component);
382 if ((fieldOrdinal < leftTensorComponent_z.
extent_int(0)) && (pointOrdinal < leftTensorComponent_z.
extent_int(1)))
384 const int leftRank = leftTensorComponent_z.
rank();
385 leftFields_z(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_z(fieldOrdinal,pointOrdinal) : leftTensorComponent_z(fieldOrdinal,pointOrdinal,a_component);
387 if ((fieldOrdinal < rightTensorComponent_x.
extent_int(0)) && (pointOrdinal < rightTensorComponent_x.
extent_int(1)))
389 const int rightRank = rightTensorComponent_x.
rank();
390 rightFields_x(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_x(fieldOrdinal,pointOrdinal) : rightTensorComponent_x(fieldOrdinal,pointOrdinal,b_component);
392 if ((fieldOrdinal < rightTensorComponent_y.
extent_int(0)) && (pointOrdinal < rightTensorComponent_y.
extent_int(1)))
394 const int rightRank = rightTensorComponent_y.
rank();
395 rightFields_y(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_y(fieldOrdinal,pointOrdinal) : rightTensorComponent_y(fieldOrdinal,pointOrdinal,b_component);
397 if ((fieldOrdinal < rightTensorComponent_z.
extent_int(0)) && (pointOrdinal < rightTensorComponent_z.
extent_int(1)))
399 const int rightRank = rightTensorComponent_z.
rank();
400 rightFields_z(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_z(fieldOrdinal,pointOrdinal) : rightTensorComponent_z(fieldOrdinal,pointOrdinal,b_component);
404 if (composedTransform_.underlyingMatchesLogical())
406 if (composedTransformRank == 4)
408 const auto & composedTransformView = composedTransform_.getUnderlyingView4();
409 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransformView.extent_int(1)), [&] (
const int& pointOrdinal) {
410 pointWeights(pointOrdinal) = composedTransformView(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
415 const auto & composedTransformView = composedTransform_.getUnderlyingView2();
416 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransformView.extent_int(1)), [&] (
const int& pointOrdinal) {
417 pointWeights(pointOrdinal) = composedTransformView(cellDataOrdinal,pointOrdinal) * cellMeasures_(cellDataOrdinal,pointOrdinal);
423 if (composedTransformRank == 4)
425 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
426 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
431 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
432 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal) * cellMeasures_(cellDataOrdinal,pointOrdinal);
440 teamMember.team_barrier();
443 const int scratchOffsetForThread = teamMember.team_rank() * scratchViewSize;
444 for (
int i=scratchOffsetForThread; i<scratchOffsetForThread+scratchViewSize; i++)
446 scratchView(i) = 0.0;
450 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numLeftFieldsFinal * numRightFieldsFinal), [&] (
const int& leftRightFieldEnumeration) {
451 const int iz = leftRightFieldEnumeration % numLeftFieldsFinal;
452 const int jz = leftRightFieldEnumeration / numLeftFieldsFinal;
456 Kokkos::Array<int,numTensorComponents> leftFieldArguments;
457 Kokkos::Array<int,numTensorComponents> rightFieldArguments;
458 rightFieldArguments[R] = jz;
459 leftFieldArguments[R] = iz;
461 Kokkos::Array<int,numTensorComponents> pointArguments;
462 for (
int i=0; i<numTensorComponents; i++)
464 pointArguments[i] = 0;
467 for (
int lx=0; lx<pointBounds[0]; lx++)
469 pointArguments[0] = lx;
473 const int Gy_start_index = scratchOffsetForThread + offsetsForComponentOrdinal_[1];
474 const int Gy_end_index = scratchOffsetForThread + offsetsForComponentOrdinal_[0];
476 for (
int Gy_index=Gy_start_index; Gy_index < Gy_end_index; Gy_index++)
478 scratchView(Gy_index) = 0;
481 for (
int ly=0; ly<pointBounds[1]; ly++)
483 pointArguments[1] = ly;
485 Scalar * Gz = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[R]);
488 for (
int lz=0; lz < pointBounds[2]; lz++)
490 const Scalar & leftValue = leftFields_z(iz,lz);
491 const Scalar & rightValue = rightFields_z(jz,lz);
493 pointArguments[2] = lz;
494 int pointEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(pointArguments, pointBounds, 0);
496 *Gz += leftValue * pointWeights(pointEnumerationIndex) * rightValue;
501 for (
int iy=0; iy<leftFieldBounds_[1]; iy++)
503 leftFieldArguments[1] = iy;
504 const int leftEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, 1);
506 const Scalar & leftValue = leftFields_y(iy,ly);
508 for (
int jy=0; jy<rightFieldBounds_[1]; jy++)
510 rightFieldArguments[1] = jy;
512 const int rightEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, 1);
513 const Scalar & rightValue = rightFields_y(jy,ly);
515 const int & rightEnumerationSpan_y = rightFieldRelativeEnumerationSpans_[1];
516 const int Gy_index = leftEnumerationIndex_y * rightEnumerationSpan_y + rightEnumerationIndex_y;
518 const int Gz_index = 0;
519 const Scalar & Gz = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[2] + Gz_index);
521 Scalar & Gy = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[1] + Gy_index);
523 Gy += leftValue * Gz * rightValue;
528 for (
int ix=0; ix<leftFieldBounds_[0]; ix++)
530 leftFieldArguments[0] = ix;
531 const Scalar & leftValue = leftFields_x(ix,lx);
533 for (
int iy=0; iy<leftFieldBounds_[1]; iy++)
535 leftFieldArguments[1] = iy;
537 const int leftEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, 1);
539 for (
int jx=0; jx<rightFieldBounds_[0]; jx++)
541 rightFieldArguments[0] = jx;
542 const Scalar & rightValue = rightFields_x(jx,lx);
544 for (
int jy=0; jy<rightFieldBounds_[1]; jy++)
546 rightFieldArguments[1] = jy;
547 const int rightEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, 1);
549 const int rightEnumerationSpan_y = rightFieldRelativeEnumerationSpans_[1];
551 const int Gy_index = leftEnumerationIndex_y * rightEnumerationSpan_y + rightEnumerationIndex_y;
552 Scalar & Gy = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[1] + Gy_index);
555 int leftEnumerationIndex = relativeEnumerationIndex<numTensorComponents>( leftFieldArguments, leftFieldBounds, 0);
556 int rightEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(rightFieldArguments, rightFieldBounds, 0);
557 const int leftFieldIndex = leftEnumerationIndex + leftFieldOrdinalOffset_;
558 const int rightFieldIndex = rightEnumerationIndex + rightFieldOrdinalOffset_;
560 if (integralViewRank == 3)
582 integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex) += leftValue * Gy * rightValue;
587 integralView_.access(leftFieldIndex,rightFieldIndex,0) += leftValue * Gy * rightValue;
601 template<
size_t numTensorComponents>
602 KOKKOS_INLINE_FUNCTION
603 void run(
const TeamMember & teamMember )
const
605 Kokkos::Array<int,numTensorComponents> pointBounds;
606 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
607 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
608 for (
unsigned r=0; r<numTensorComponents; r++)
610 pointBounds[r] = pointBounds_[r];
611 leftFieldBounds[r] = leftFieldBounds_[r];
612 rightFieldBounds[r] = rightFieldBounds_[r];
615 const int cellDataOrdinal = teamMember.league_rank();
616 const int numThreads = teamMember.team_size();
617 const int scratchViewSize = offsetsForComponentOrdinal_[0];
618 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> scratchView;
619 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> pointWeights;
620 Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged> leftFields, rightFields;
621 if (fad_size_output_ > 0) {
622 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize * numThreads, fad_size_output_);
623 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.
extent_int(1), fad_size_output_);
624 leftFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsLeft_, maxPointCount_, fad_size_output_);
625 rightFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsRight_, maxPointCount_, fad_size_output_);
628 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize*numThreads);
629 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.
extent_int(1));
630 leftFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsLeft_, maxPointCount_);
631 rightFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsRight_, maxPointCount_);
637 constexpr int R = numTensorComponents - 1;
639 const int composedTransformRank = composedTransform_.rank();
641 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
643 const int a = a_offset_ + a_component;
644 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
646 const int b = b_offset_ + b_component;
648 const Data<Scalar,DeviceType> & leftFinalComponent = leftComponent_.getTensorComponent(R);
649 const Data<Scalar,DeviceType> & rightFinalComponent = rightComponent_.getTensorComponent(R);
651 const int numLeftFieldsFinal = leftFinalComponent.extent_int(0);
652 const int numRightFieldsFinal = rightFinalComponent.extent_int(0);
654 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numTensorComponents), [&] (
const int& r) {
655 const Data<Scalar,DeviceType> & leftTensorComponent = leftComponent_.getTensorComponent(r);
656 const Data<Scalar,DeviceType> & rightTensorComponent = rightComponent_.getTensorComponent(r);
657 const int leftFieldCount = leftTensorComponent.extent_int(0);
658 const int pointCount = leftTensorComponent.extent_int(1);
659 const int leftRank = leftTensorComponent.rank();
660 const int rightFieldCount = rightTensorComponent.extent_int(0);
661 const int rightRank = rightTensorComponent.rank();
662 for (
int fieldOrdinal=0; fieldOrdinal<maxFieldsLeft_; fieldOrdinal++)
665 const int fieldAddress = (fieldOrdinal < leftFieldCount) ? fieldOrdinal : leftFieldCount - 1;
666 for (
int pointOrdinal=0; pointOrdinal<maxPointCount_; pointOrdinal++)
668 const int pointAddress = (pointOrdinal < pointCount) ? pointOrdinal : pointCount - 1;
669 leftFields(r,fieldAddress,pointAddress) = (leftRank == 2) ? leftTensorComponent(fieldAddress,pointAddress) : leftTensorComponent(fieldAddress,pointAddress,a_component);
672 for (
int fieldOrdinal=0; fieldOrdinal<maxFieldsRight_; fieldOrdinal++)
675 const int fieldAddress = (fieldOrdinal < rightFieldCount) ? fieldOrdinal : rightFieldCount - 1;
676 for (
int pointOrdinal=0; pointOrdinal<maxPointCount_; pointOrdinal++)
678 const int pointAddress = (pointOrdinal < pointCount) ? pointOrdinal : pointCount - 1;
679 rightFields(r,fieldAddress,pointAddress) = (rightRank == 2) ? rightTensorComponent(fieldAddress,pointAddress) : rightTensorComponent(fieldAddress,pointAddress,b_component);
684 if (composedTransformRank == 4)
686 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
687 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
692 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
693 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal) * cellMeasures_(cellDataOrdinal,pointOrdinal);
699 teamMember.team_barrier();
701 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numLeftFieldsFinal * numRightFieldsFinal), [&] (
const int& leftRightFieldEnumeration) {
702 const int scratchOffsetForThread = teamMember.team_rank() * scratchViewSize;
703 const int i_R = leftRightFieldEnumeration % numLeftFieldsFinal;
704 const int j_R = leftRightFieldEnumeration / numLeftFieldsFinal;
708 Kokkos::Array<int,numTensorComponents> leftFieldArguments;
709 Kokkos::Array<int,numTensorComponents> rightFieldArguments;
710 rightFieldArguments[R] = j_R;
711 leftFieldArguments[R] = i_R;
714 for (
int i=scratchOffsetForThread; i<scratchOffsetForThread+scratchViewSize; i++)
716 scratchView(i) = 0.0;
718 Kokkos::Array<int,numTensorComponents> pointArguments;
719 for (
unsigned i=0; i<numTensorComponents; i++)
721 pointArguments[i] = 0;
728 const int pointBounds_R = pointBounds[R];
729 int & pointArgument_R = pointArguments[R];
730 for (pointArgument_R=0; pointArgument_R < pointBounds_R; pointArgument_R++)
735 G_R = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[R]);
739 const int leftFieldIndex = i_R + leftFieldOrdinalOffset_;
740 const int rightFieldIndex = j_R + rightFieldOrdinalOffset_;
742 if (integralViewRank == 3)
745 G_R = &integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex);
750 G_R = &integralView_.access(leftFieldIndex,rightFieldIndex,0);
754 const int & pointIndex_R = pointArguments[R];
756 const Scalar & leftValue = leftFields(R,i_R,pointIndex_R);
757 const Scalar & rightValue = rightFields(R,j_R,pointIndex_R);
759 int pointEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(pointArguments, pointBounds, 0);
761 *G_R += leftValue * pointWeights(pointEnumerationIndex) * rightValue;
766 const int r_next = nextIncrementResult<numTensorComponents,numTensorComponents-1>(pointArguments, pointBounds);
767 const int r_min = (r_next >= 0) ? r_next : 0;
769 for (
int s=R-1; s>=r_min; s--)
771 const int & pointIndex_s = pointArguments[s];
774 for (
int s1=s; s1<R; s1++)
776 leftFieldArguments[s1] = 0;
780 const int & i_s = leftFieldArguments[s];
781 const int & j_s = rightFieldArguments[s];
784 const int & rightEnumerationSpan_s = rightFieldRelativeEnumerationSpans_[s];
785 const int & rightEnumerationSpan_sp = rightFieldRelativeEnumerationSpans_[s+1];
789 const int leftEnumerationIndex_s = relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, s);
792 const int leftEnumerationIndex_sp = (s+1 == R) ? 0 : relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, s+1);
794 const Scalar & leftValue = leftFields(s,i_s,pointIndex_s);
796 for (
int s1=s; s1<R; s1++)
798 rightFieldArguments[s1] = 0;
803 const int rightEnumerationIndex_s = relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, s);
806 const int rightEnumerationIndex_sp = (s+1 == R) ? 0 : relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, s+1);
808 const Scalar & rightValue = rightFields(s,j_s,pointIndex_s);
810 const int G_s_index = leftEnumerationIndex_s * rightEnumerationSpan_s + rightEnumerationIndex_s;
816 const int G_sp_index = leftEnumerationIndex_sp * rightEnumerationSpan_sp + rightEnumerationIndex_sp;
818 const Scalar & G_sp = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[s+1] + G_sp_index);
823 G_s = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[s] + G_s_index);
828 int leftEnumerationIndex = relativeEnumerationIndex<numTensorComponents>( leftFieldArguments, leftFieldBounds, 0);
829 int rightEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(rightFieldArguments, rightFieldBounds, 0);
830 const int leftFieldIndex = leftEnumerationIndex + leftFieldOrdinalOffset_;
831 const int rightFieldIndex = rightEnumerationIndex + rightFieldOrdinalOffset_;
833 if (integralViewRank == 3)
836 G_s = &integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex);
841 G_s = &integralView_.access(leftFieldIndex,rightFieldIndex,0);
845 *G_s += leftValue * G_sp * rightValue;
850 sRight = incrementArgument<numTensorComponents,R>(rightFieldArguments, rightFieldBounds);
854 sLeft = incrementArgument<numTensorComponents,R>(leftFieldArguments, leftFieldBounds);
861 const int endIndex = scratchOffsetForThread + offsetsForComponentOrdinal_[r_min];
862 for (
int i=scratchOffsetForThread; i<endIndex; i++)
864 scratchView(i) = 0.0;
871 r = incrementArgument<numTensorComponents,numTensorComponents-1>(pointArguments, pointBounds);
879 KOKKOS_INLINE_FUNCTION
880 void operator()(
const TeamMember & teamMember )
const
882 switch (numTensorComponents_)
884 case 1: run<1>(teamMember);
break;
885 case 2: run<2>(teamMember);
break;
887 if (forceNonSpecialized_)
892 case 4: run<4>(teamMember);
break;
893 case 5: run<5>(teamMember);
break;
894 case 6: run<6>(teamMember);
break;
895 case 7: run<7>(teamMember);
break;
896#ifdef INTREPID2_HAVE_DEBUG
909 const int R = numTensorComponents_ - 1;
913 const int flopsPerPoint_ab = cellMeasures_.numTensorComponents();
915 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
917 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
922 const int numLeftFieldsFinal = leftFinalComponent.
extent_int(0);
923 const int numRightFieldsFinal = rightFinalComponent.
extent_int(0);
925 flopCount += flopsPerPoint_ab * cellMeasures_.extent_int(1);
927 int flopsPer_i_R_j_R = 0;
931 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldArguments;
932 leftFieldArguments[R] = 0;
934 Kokkos::Array<int,Parameters::MaxTensorComponents> pointArguments;
935 for (
int i=0; i<numTensorComponents_; i++)
937 pointArguments[i] = 0;
942 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldArguments;
943 rightFieldArguments[R] = 0;
949 for (pointArguments[R]=0; pointArguments[R] < pointBounds_[R]; pointArguments[R]++)
951 flopsPer_i_R_j_R += 4;
959 const int r_next = nextIncrementResult(pointArguments, pointBounds_, numTensorComponents_);
960 const int r_min = (r_next >= 0) ? r_next : 0;
962 for (
int s=R-1; s>=r_min; s--)
965 int numLeftIterations = leftFieldRelativeEnumerationSpans_[s];
966 int numRightIterations = rightFieldRelativeEnumerationSpans_[s];
968 flopsPer_i_R_j_R += numLeftIterations * numRightIterations * 3;
972 r = incrementArgument(pointArguments, pointBounds_, numTensorComponents_);
975 flopCount += flopsPer_i_R_j_R * numLeftFieldsFinal * numRightFieldsFinal;
983 int teamSize(
const int &maxTeamSizeFromKokkos)
const
985 const int R = numTensorComponents_ - 1;
986 const int threadParallelismExpressed = leftFieldBounds_[R] * rightFieldBounds_[R];
987 return std::min(maxTeamSizeFromKokkos, threadParallelismExpressed);
994 size_t shmem_size = 0;
996 if (fad_size_output_ > 0)
998 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(offsetsForComponentOrdinal_[0] * team_size, fad_size_output_);
999 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(composedTransform_.extent_int(1), fad_size_output_);
1000 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsLeft_, maxPointCount_, fad_size_output_);
1001 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsRight_, maxPointCount_, fad_size_output_);
1005 shmem_size += Kokkos::View<Scalar*, DeviceType>::shmem_size(offsetsForComponentOrdinal_[0] * team_size);
1006 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(composedTransform_.extent_int(1));
1007 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsLeft_, maxPointCount_);
1008 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsRight_, maxPointCount_);
1025 class F_IntegratePointValueCache
1027 using ExecutionSpace =
typename DeviceType::execution_space;
1028 using TeamPolicy = Kokkos::TeamPolicy<DeviceType>;
1029 using TeamMember =
typename TeamPolicy::member_type;
1031 using IntegralViewType = Kokkos::View<typename RankExpander<Scalar, integralViewRank>::value_type, DeviceType>;
1032 IntegralViewType integralView_;
1039 int leftComponentSpan_;
1040 int rightComponentSpan_;
1041 int numTensorComponents_;
1042 int leftFieldOrdinalOffset_;
1043 int rightFieldOrdinalOffset_;
1045 size_t fad_size_output_ = 0;
1049 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldBounds_;
1050 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldBounds_;
1051 Kokkos::Array<int,Parameters::MaxTensorComponents> pointBounds_;
1054 int maxFieldsRight_;
1064 int leftFieldOrdinalOffset,
1065 int rightFieldOrdinalOffset)
1067 integralView_(integralData.template getUnderlyingView<integralViewRank>()),
1068 leftComponent_(leftComponent),
1069 composedTransform_(composedTransform),
1070 rightComponent_(rightComponent),
1071 cellMeasures_(cellMeasures),
1072 a_offset_(a_offset),
1073 b_offset_(b_offset),
1074 leftComponentSpan_(leftComponent.
extent_int(2)),
1075 rightComponentSpan_(rightComponent.
extent_int(2)),
1077 leftFieldOrdinalOffset_(leftFieldOrdinalOffset),
1078 rightFieldOrdinalOffset_(rightFieldOrdinalOffset)
1080 INTREPID2_TEST_FOR_EXCEPTION(numTensorComponents_ != rightComponent_.numTensorComponents(), std::invalid_argument,
"Left and right components must have matching number of tensorial components");
1082 const int FIELD_DIM = 0;
1083 const int POINT_DIM = 1;
1085 maxFieldsRight_ = 0;
1087 for (
int r=0; r<numTensorComponents_; r++)
1089 leftFieldBounds_[r] = leftComponent_.getTensorComponent(r).extent_int(FIELD_DIM);
1090 maxFieldsLeft_ = std::max(maxFieldsLeft_, leftFieldBounds_[r]);
1091 rightFieldBounds_[r] = rightComponent_.getTensorComponent(r).extent_int(FIELD_DIM);
1092 maxFieldsRight_ = std::max(maxFieldsRight_, rightFieldBounds_[r]);
1093 pointBounds_[r] = leftComponent_.getTensorComponent(r).extent_int(POINT_DIM);
1094 maxPointCount_ = std::max(maxPointCount_, pointBounds_[r]);
1100 const bool allocateFadStorage = !std::is_pod<Scalar>::value;
1101 if (allocateFadStorage)
1107 template<
size_t maxComponents,
size_t numComponents = maxComponents>
1108 KOKKOS_INLINE_FUNCTION
1109 int incrementArgument( Kokkos::Array<int,maxComponents> &arguments,
1110 const Kokkos::Array<int,maxComponents> &bounds)
const
1112 if (numComponents == 0)
return -1;
1113 int r =
static_cast<int>(numComponents - 1);
1114 while (arguments[r] + 1 >= bounds[r])
1120 if (r >= 0) ++arguments[r];
1125 KOKKOS_INLINE_FUNCTION
1127 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
1128 const int &numComponents)
const
1130 if (numComponents == 0)
return -1;
1131 int r =
static_cast<int>(numComponents - 1);
1132 while (arguments[r] + 1 >= bounds[r])
1138 if (r >= 0) ++arguments[r];
1142 template<
size_t maxComponents,
size_t numComponents = maxComponents>
1143 KOKKOS_INLINE_FUNCTION
1144 int nextIncrementResult(
const Kokkos::Array<int,maxComponents> &arguments,
1145 const Kokkos::Array<int,maxComponents> &bounds)
const
1147 if (numComponents == 0)
return -1;
1148 int r =
static_cast<int>(numComponents - 1);
1149 while (arguments[r] + 1 >= bounds[r])
1158 KOKKOS_INLINE_FUNCTION
1160 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
1161 const int &numComponents)
const
1163 if (numComponents == 0)
return -1;
1164 int r = numComponents - 1;
1165 while (arguments[r] + 1 >= bounds[r])
1173 template<
size_t maxComponents,
size_t numComponents = maxComponents>
1174 KOKKOS_INLINE_FUNCTION
1175 int relativeEnumerationIndex(
const Kokkos::Array<int,maxComponents> &arguments,
1176 const Kokkos::Array<int,maxComponents> &bounds,
1177 const int startIndex)
const
1180 if (numComponents == 0)
1184 int enumerationIndex = 0;
1185 for (
size_t r=numComponents-1; r>
static_cast<size_t>(startIndex); r--)
1187 enumerationIndex += arguments[r];
1188 enumerationIndex *= bounds[r-1];
1190 enumerationIndex += arguments[startIndex];
1191 return enumerationIndex;
1195 KOKKOS_INLINE_FUNCTION
1196 enable_if_t<rank==3 && rank==integralViewRank, Scalar &>
1197 integralViewEntry(
const IntegralViewType& integralView,
const int &cellDataOrdinal,
const int &i,
const int &j)
const
1199 return integralView(cellDataOrdinal,i,j);
1203 KOKKOS_INLINE_FUNCTION
1204 enable_if_t<rank==2 && rank==integralViewRank, Scalar &>
1205 integralViewEntry(
const IntegralViewType& integralView,
const int &cellDataOrdinal,
const int &i,
const int &j)
const
1207 return integralView(i,j);
1211 KOKKOS_INLINE_FUNCTION
1214 constexpr int numTensorComponents = 3;
1216 const int pointBounds_x = pointBounds_[0];
1217 const int pointBounds_y = pointBounds_[1];
1218 const int pointBounds_z = pointBounds_[2];
1219 const int pointsInNonzeroComponentDimensions = pointBounds_y * pointBounds_z;
1221 const int leftFieldBounds_x = leftFieldBounds_[0];
1222 const int rightFieldBounds_x = rightFieldBounds_[0];
1223 const int leftFieldBounds_y = leftFieldBounds_[1];
1224 const int rightFieldBounds_y = rightFieldBounds_[1];
1225 const int leftFieldBounds_z = leftFieldBounds_[2];
1226 const int rightFieldBounds_z = rightFieldBounds_[2];
1228 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
1229 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
1230 for (
unsigned r=0; r<numTensorComponents; r++)
1232 leftFieldBounds[r] = leftFieldBounds_[r];
1233 rightFieldBounds[r] = rightFieldBounds_[r];
1236 const auto integralView = integralView_;
1237 const auto leftFieldOrdinalOffset = leftFieldOrdinalOffset_;
1238 const auto rightFieldOrdinalOffset = rightFieldOrdinalOffset_;
1240 const int cellDataOrdinal = teamMember.league_rank();
1241 const int threadNumber = teamMember.team_rank();
1243 const int numThreads = teamMember.team_size();
1244 const int GyEntryCount = pointBounds_z;
1245 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> GxIntegrals;
1246 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> GyIntegrals;
1247 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> GzIntegral;
1248 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> pointWeights;
1250 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_x, rightFields_x;
1251 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_y, rightFields_y;
1252 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_z, rightFields_z;
1253 if (fad_size_output_ > 0) {
1254 GxIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), pointsInNonzeroComponentDimensions, fad_size_output_);
1255 GyIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), GyEntryCount * numThreads, fad_size_output_);
1256 GzIntegral = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numThreads, fad_size_output_);
1257 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.extent_int(1), fad_size_output_);
1259 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_x, pointBounds_x, fad_size_output_);
1260 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_x, pointBounds_x, fad_size_output_);
1261 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_y, pointBounds_y, fad_size_output_);
1262 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_y, pointBounds_y, fad_size_output_);
1263 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_z, pointBounds_z, fad_size_output_);
1264 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_z, pointBounds_z, fad_size_output_);
1267 GxIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), pointsInNonzeroComponentDimensions);
1268 GyIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), GyEntryCount * numThreads);
1269 GzIntegral = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numThreads);
1270 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.extent_int(1));
1272 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_x, pointBounds_x);
1273 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_x, pointBounds_x);
1274 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_y, pointBounds_y);
1275 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_y, pointBounds_y);
1276 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_z, pointBounds_z);
1277 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_z, pointBounds_z);
1285 const int composedTransformRank = composedTransform_.rank();
1288 teamMember.team_barrier();
1290 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
1292 const int a = a_offset_ + a_component;
1293 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
1295 const int b = b_offset_ + b_component;
1304 const int maxFields = (maxFieldsLeft_ > maxFieldsRight_) ? maxFieldsLeft_ : maxFieldsRight_;
1305 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,maxFields), [&] (
const int& fieldOrdinal) {
1306 if (fieldOrdinal < leftTensorComponent_x.
extent_int(0))
1308 const int pointCount = leftTensorComponent_x.
extent_int(1);
1309 const int leftRank = leftTensorComponent_x.
rank();
1310 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1312 leftFields_x(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_x(fieldOrdinal,pointOrdinal) : leftTensorComponent_x(fieldOrdinal,pointOrdinal,a_component);
1315 if (fieldOrdinal < leftTensorComponent_y.
extent_int(0))
1317 const int pointCount = leftTensorComponent_y.
extent_int(1);
1318 const int leftRank = leftTensorComponent_y.
rank();
1319 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1321 leftFields_y(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_y(fieldOrdinal,pointOrdinal) : leftTensorComponent_y(fieldOrdinal,pointOrdinal,a_component);
1324 if (fieldOrdinal < leftTensorComponent_z.
extent_int(0))
1326 const int pointCount = leftTensorComponent_z.
extent_int(1);
1327 const int leftRank = leftTensorComponent_z.
rank();
1328 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1330 leftFields_z(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_z(fieldOrdinal,pointOrdinal) : leftTensorComponent_z(fieldOrdinal,pointOrdinal,a_component);
1333 if (fieldOrdinal < rightTensorComponent_x.
extent_int(0))
1335 const int pointCount = rightTensorComponent_x.
extent_int(1);
1336 const int rightRank = rightTensorComponent_x.
rank();
1337 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1339 rightFields_x(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_x(fieldOrdinal,pointOrdinal) : rightTensorComponent_x(fieldOrdinal,pointOrdinal,a_component);
1342 if (fieldOrdinal < rightTensorComponent_y.
extent_int(0))
1344 const int pointCount = rightTensorComponent_y.
extent_int(1);
1345 const int rightRank = rightTensorComponent_y.
rank();
1346 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1348 rightFields_y(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_y(fieldOrdinal,pointOrdinal) : rightTensorComponent_y(fieldOrdinal,pointOrdinal,a_component);
1351 if (fieldOrdinal < rightTensorComponent_z.
extent_int(0))
1353 const int pointCount = rightTensorComponent_z.
extent_int(1);
1354 const int rightRank = rightTensorComponent_z.
rank();
1355 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1357 rightFields_z(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_z(fieldOrdinal,pointOrdinal) : rightTensorComponent_z(fieldOrdinal,pointOrdinal,a_component);
1362 if (composedTransformRank == 4)
1364 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
1365 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
1370 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
1371 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal) * cellMeasures_(cellDataOrdinal,pointOrdinal);
1376 teamMember.team_barrier();
1378 for (
int i0=0; i0<leftFieldBounds_x; i0++)
1380 for (
int j0=0; j0<rightFieldBounds_x; j0++)
1382 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,pointsInNonzeroComponentDimensions), [&] (
const int& pointEnumerationIndexLaterDimensions) {
1384 const int tensorPointEnumerationOffset = pointBounds_x * pointEnumerationIndexLaterDimensions;
1386 Scalar & Gx = GxIntegrals(pointEnumerationIndexLaterDimensions);
1389 if (fad_size_output_ == 0)
1392 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, pointBounds_x), [&] (
const int &x_pointOrdinal, Scalar &integralThusFar)
1394 integralThusFar += leftFields_x(i0,x_pointOrdinal) * rightFields_x(j0,x_pointOrdinal) * pointWeights(tensorPointEnumerationOffset + x_pointOrdinal);
1399 for (
int x_pointOrdinal=0; x_pointOrdinal<pointBounds_x; x_pointOrdinal++)
1401 Gx += leftFields_x(i0,x_pointOrdinal) * rightFields_x(j0,x_pointOrdinal) * pointWeights(tensorPointEnumerationOffset + x_pointOrdinal);
1407 teamMember.team_barrier();
1409 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,leftFieldBounds_y * rightFieldBounds_y), [&] (
const int& i1j1) {
1410 const int i1 = i1j1 % leftFieldBounds_y;
1411 const int j1 = i1j1 / leftFieldBounds_y;
1413 int Gy_index = GyEntryCount * threadNumber;
1415 int pointEnumerationIndex = 0;
1416 for (
int lz=0; lz<pointBounds_z; lz++)
1418 Scalar & Gy = GyIntegrals(Gy_index);
1421 for (
int ly=0; ly<pointBounds_y; ly++)
1423 const Scalar & leftValue = leftFields_y(i1,ly);
1424 const Scalar & rightValue = rightFields_y(j1,ly);
1426 Gy += leftValue * rightValue * GxIntegrals(pointEnumerationIndex);
1428 pointEnumerationIndex++;
1433 Scalar & Gz = GzIntegral(threadNumber);
1434 for (
int i2=0; i2<leftFieldBounds_z; i2++)
1436 for (
int j2=0; j2<rightFieldBounds_z; j2++)
1440 int Gy_index = GyEntryCount * threadNumber;
1442 for (
int lz=0; lz<pointBounds_z; lz++)
1444 const Scalar & leftValue = leftFields_z(i2,lz);
1445 const Scalar & rightValue = rightFields_z(j2,lz);
1447 Gz += leftValue * rightValue * GyIntegrals(Gy_index);
1452 const int i = leftFieldOrdinalOffset + i0 + (i1 + i2 * leftFieldBounds_y) * leftFieldBounds_x;
1453 const int j = rightFieldOrdinalOffset + j0 + (j1 + j2 * rightFieldBounds_y) * rightFieldBounds_x;
1458 integralViewEntry<integralViewRank>(integralView, cellDataOrdinal, i, j) += Gz;
1463 teamMember.team_barrier();
1470 template<
size_t numTensorComponents>
1471 KOKKOS_INLINE_FUNCTION
1472 void run(
const TeamMember & teamMember )
const
1610 KOKKOS_INLINE_FUNCTION
1611 void operator()(
const TeamMember & teamMember )
const
1613 switch (numTensorComponents_)
1615 case 1: run<1>(teamMember);
break;
1616 case 2: run<2>(teamMember);
break;
1618 case 4: run<4>(teamMember);
break;
1619 case 5: run<5>(teamMember);
break;
1620 case 6: run<6>(teamMember);
break;
1621 case 7: run<7>(teamMember);
break;
1622#ifdef INTREPID2_HAVE_DEBUG
1635 constexpr int numTensorComponents = 3;
1636 Kokkos::Array<int,numTensorComponents> pointBounds;
1637 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
1638 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
1640 int pointsInNonzeroComponentDimensions = 1;
1641 for (
unsigned r=0; r<numTensorComponents; r++)
1643 pointBounds[r] = pointBounds_[r];
1644 if (r > 0) pointsInNonzeroComponentDimensions *= pointBounds[r];
1645 leftFieldBounds[r] = leftFieldBounds_[r];
1646 rightFieldBounds[r] = rightFieldBounds_[r];
1649 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
1651 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
1656 flopCount += composedTransform_.extent_int(1) * cellMeasures_.numTensorComponents();
1658 for (
int i0=0; i0<leftFieldBounds[0]; i0++)
1660 for (
int j0=0; j0<rightFieldBounds[0]; j0++)
1662 flopCount += pointsInNonzeroComponentDimensions * pointBounds[0] * 3;
1702 flopCount += leftFieldBounds[1] * rightFieldBounds[1] * pointBounds[1] * pointBounds[2] * 3;
1703 flopCount += leftFieldBounds[1] * rightFieldBounds[1] * leftFieldBounds[2] * rightFieldBounds[2] * pointBounds[2] * 3;
1704 flopCount += leftFieldBounds[1] * rightFieldBounds[1] * leftFieldBounds[2] * rightFieldBounds[2] * 1;
1780 const int R = numTensorComponents_ - 1;
1781 const int threadParallelismExpressed = leftFieldBounds_[R] * rightFieldBounds_[R];
1782 return std::min(maxTeamSizeFromKokkos, threadParallelismExpressed);
1789 size_t shmem_size = 0;
1791 const int GyEntryCount = pointBounds_[2];
1793 int pointsInNonzeroComponentDimensions = 1;
1794 for (
int d=1; d<numTensorComponents_; d++)
1796 pointsInNonzeroComponentDimensions *= pointBounds_[d];
1799 if (fad_size_output_ > 0)
1801 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(pointsInNonzeroComponentDimensions, fad_size_output_);
1802 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(GyEntryCount * numThreads, fad_size_output_);
1803 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( 1 * numThreads, fad_size_output_);
1804 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size (composedTransform_.extent_int(1), fad_size_output_);
1806 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[0], pointBounds_[0], fad_size_output_);
1807 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[0], pointBounds_[0], fad_size_output_);
1808 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[1], pointBounds_[1], fad_size_output_);
1809 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[1], pointBounds_[1], fad_size_output_);
1810 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[2], pointBounds_[2], fad_size_output_);
1811 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[2], pointBounds_[2], fad_size_output_);
1815 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(pointsInNonzeroComponentDimensions);
1816 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(GyEntryCount * numThreads);
1817 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( 1 * numThreads);
1818 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size (composedTransform_.extent_int(1));
1820 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[0], pointBounds_[0]);
1821 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[0], pointBounds_[0]);
1822 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[1], pointBounds_[1]);
1823 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[1], pointBounds_[1]);
1824 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[2], pointBounds_[2]);
1825 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[2], pointBounds_[2]);
1941 double* approximateFlops)
1943 using ExecutionSpace =
typename DeviceType::execution_space;
1947 const bool leftHasOrdinalFilter = basisValuesLeft.basisValues().ordinalFilter().extent_int(0) > 0;
1948 const bool rightHasOrdinalFilter = basisValuesRight.basisValues().ordinalFilter().extent_int(0) > 0;
1949 TEUCHOS_TEST_FOR_EXCEPTION(leftHasOrdinalFilter || rightHasOrdinalFilter, std::invalid_argument,
"Ordinal filters for BasisValues are not yet supported by IntegrationTools");
1951 if (approximateFlops != NULL)
1953 *approximateFlops = 0;
1965 const int numFieldsLeft = basisValuesLeft.
numFields();
1966 const int numFieldsRight = basisValuesRight.
numFields();
1967 const int spaceDim = basisValuesLeft.
spaceDim();
1969 INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.
spaceDim() != basisValuesRight.
spaceDim(), std::invalid_argument,
"vectorDataLeft and vectorDataRight must agree on the space dimension");
1971 const int leftFamilyCount = basisValuesLeft.basisValues().
numFamilies();
1972 const int rightFamilyCount = basisValuesRight.basisValues().
numFamilies();
1976 int numTensorComponentsLeft = -1;
1977 const bool isVectorValued = basisValuesLeft.
vectorData().isValid();
1980 const bool rightIsVectorValued = basisValuesRight.
vectorData().isValid();
1981 INTREPID2_TEST_FOR_EXCEPTION(!rightIsVectorValued, std::invalid_argument,
"left and right must either both be vector-valued, or both scalar-valued");
1982 const auto &refVectorLeft = basisValuesLeft.
vectorData();
1983 int numFamiliesLeft = refVectorLeft.numFamilies();
1984 int numVectorComponentsLeft = refVectorLeft.numComponents();
1985 Kokkos::Array<int,7> maxFieldsForComponentLeft {0,0,0,0,0,0,0};
1986 Kokkos::Array<int,7> maxFieldsForComponentRight {0,0,0,0,0,0,0};
1987 for (
int familyOrdinal=0; familyOrdinal<numFamiliesLeft; familyOrdinal++)
1989 for (
int vectorComponent=0; vectorComponent<numVectorComponentsLeft; vectorComponent++)
1994 if (numTensorComponentsLeft == -1)
1998 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsLeft != tensorData.
numTensorComponents(), std::invalid_argument,
"Each valid entry in vectorDataLeft must have the same number of tensor components as every other");
1999 for (
int r=0; r<numTensorComponentsLeft; r++)
2006 int numTensorComponentsRight = -1;
2007 const auto &refVectorRight = basisValuesRight.
vectorData();
2008 int numFamiliesRight = refVectorRight.numFamilies();
2009 int numVectorComponentsRight = refVectorRight.numComponents();
2010 for (
int familyOrdinal=0; familyOrdinal<numFamiliesRight; familyOrdinal++)
2012 for (
int vectorComponent=0; vectorComponent<numVectorComponentsRight; vectorComponent++)
2014 const auto &tensorData = refVectorRight.getComponent(familyOrdinal,vectorComponent);
2015 if (tensorData.numTensorComponents() > 0)
2017 if (numTensorComponentsRight == -1)
2019 numTensorComponentsRight = tensorData.numTensorComponents();
2021 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsRight != tensorData.numTensorComponents(), std::invalid_argument,
"Each valid entry in vectorDataRight must have the same number of tensor components as every other");
2022 for (
int r=0; r<numTensorComponentsRight; r++)
2024 maxFieldsForComponentRight[r] = std::max(tensorData.getTensorComponent(r).extent_int(0), maxFieldsForComponentRight[r]);
2029 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsLeft != numVectorComponentsRight, std::invalid_argument,
"Left and right vector entries must have the same number of tensorial components");
2034 for (
int familyOrdinal = 0; familyOrdinal < leftFamilyCount; familyOrdinal++)
2036 INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.basisValues().
tensorData(familyOrdinal).
numTensorComponents() != numTensorComponentsLeft, std::invalid_argument,
"All families must match in the number of tensor components");
2040 for (
int familyOrdinal=0; familyOrdinal< rightFamilyCount; familyOrdinal++)
2042 INTREPID2_TEST_FOR_EXCEPTION(basisValuesRight.basisValues().
tensorData(familyOrdinal).
numTensorComponents() != numTensorComponentsLeft, std::invalid_argument,
"Right families must match left in the number of tensor components");
2047 if ((numPointTensorComponents == numTensorComponentsLeft) && basisValuesLeft.
axisAligned() && basisValuesRight.
axisAligned())
2056 Kokkos::Array<int,Parameters::MaxTensorComponents> pointDimensions;
2057 for (
int r=0; r<numPointTensorComponents; r++)
2064 Kokkos::View<Scalar**, DeviceType> integralView2;
2065 Kokkos::View<Scalar***, DeviceType> integralView3;
2066 if (integralViewRank == 3)
2074 for (
int leftFamilyOrdinal=0; leftFamilyOrdinal<leftFamilyCount; leftFamilyOrdinal++)
2079 const int leftVectorComponentCount = isVectorValued ? basisValuesLeft.
vectorData().numComponents() : 1;
2080 for (
int leftVectorComponentOrdinal = 0; leftVectorComponentOrdinal < leftVectorComponentCount; leftVectorComponentOrdinal++)
2083 : basisValuesLeft.basisValues().
tensorData(leftFamilyOrdinal);
2089 const int leftDimSpan = leftComponent.
extent_int(2);
2091 const int leftComponentFieldCount = leftComponent.
extent_int(0);
2093 for (
int rightFamilyOrdinal=0; rightFamilyOrdinal<rightFamilyCount; rightFamilyOrdinal++)
2096 int rightFieldOffset = basisValuesRight.
vectorData().familyFieldOrdinalOffset(rightFamilyOrdinal);
2098 const int rightVectorComponentCount = isVectorValued ? basisValuesRight.
vectorData().numComponents() : 1;
2099 for (
int rightVectorComponentOrdinal = 0; rightVectorComponentOrdinal < rightVectorComponentCount; rightVectorComponentOrdinal++)
2102 : basisValuesRight.basisValues().
tensorData(rightFamilyOrdinal);
2103 if (!rightComponent.
isValid())
2108 const int rightDimSpan = rightComponent.
extent_int(2);
2110 const int rightComponentFieldCount = rightComponent.
extent_int(0);
2113 if ((a_offset + leftDimSpan <= b_offset) || (b_offset + rightDimSpan <= a_offset))
2115 b_offset += rightDimSpan;
2121 INTREPID2_TEST_FOR_EXCEPTION(( a_offset != b_offset), std::logic_error,
"left and right dimension offsets should match.");
2122 INTREPID2_TEST_FOR_EXCEPTION(( leftDimSpan != rightDimSpan), std::invalid_argument,
"left and right components must span the same number of dimensions.");
2124 const int d_start = a_offset;
2125 const int d_end = d_start + leftDimSpan;
2128 ComponentIntegralsArray componentIntegrals;
2129 for (
int r=0; r<numPointTensorComponents; r++)
2146 const int numPoints = pointDimensions[r];
2153 const int leftTensorComponentDimSpan = leftTensorComponent.
extent_int(2);
2154 const int leftTensorComponentFields = leftTensorComponent.
extent_int(0);
2155 const int rightTensorComponentDimSpan = rightTensorComponent.
extent_int(2);
2156 const int rightTensorComponentFields = rightTensorComponent.
extent_int(0);
2158 INTREPID2_TEST_FOR_EXCEPTION(( leftTensorComponentDimSpan != rightTensorComponentDimSpan), std::invalid_argument,
"left and right components must span the same number of dimensions.");
2160 for (
int d=d_start; d<d_end; d++)
2162 ScalarView<Scalar,DeviceType> componentIntegralView;
2164 const bool allocateFadStorage = !std::is_pod<Scalar>::value;
2165 if (allocateFadStorage)
2168 componentIntegralView = ScalarView<Scalar,DeviceType>(
"componentIntegrals for tensor component " + std::to_string(r) +
", in dimension " + std::to_string(d), leftTensorComponentFields, rightTensorComponentFields, fad_size_output);
2172 componentIntegralView = ScalarView<Scalar,DeviceType>(
"componentIntegrals for tensor component " + std::to_string(r) +
", in dimension " + std::to_string(d), leftTensorComponentFields, rightTensorComponentFields);
2175 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{leftTensorComponentFields,rightTensorComponentFields});
2178 leftTensorComponentDimSpan);
2179 Kokkos::parallel_for(
"compute componentIntegrals", policy, refSpaceIntegralFunctor);
2181 componentIntegrals[r][d] = componentIntegralView;
2183 if (approximateFlops != NULL)
2185 *approximateFlops += leftTensorComponentFields*rightTensorComponentFields*numPoints*(3);
2190 ExecutionSpace().fence();
2192 Kokkos::Array<int,3> upperBounds {cellDataExtent,leftComponentFieldCount,rightComponentFieldCount};
2193 Kokkos::Array<int,3> lowerBounds {0,0,0};
2194 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>(lowerBounds, upperBounds);
2196 Kokkos::parallel_for(
"compute field integrals", policy,
2197 KOKKOS_LAMBDA (
const int &cellDataOrdinal,
const int &leftFieldOrdinal,
const int &rightFieldOrdinal) {
2198 const Scalar & cellMeasureWeight = cellMeasures.
getTensorComponent(0)(cellDataOrdinal);
2206 Scalar integralSum = 0.0;
2207 for (
int d=d_start; d<d_end; d++)
2209 const Scalar & transformLeft_d = basisValuesLeft.
transformWeight(cellDataOrdinal,0,d,d);
2210 const Scalar & transformRight_d = basisValuesRight.
transformWeight(cellDataOrdinal,0,d,d);
2212 const Scalar & leftRightTransform_d = transformLeft_d * transformRight_d;
2215 Scalar integral_d = 1.0;
2217 for (
int r=0; r<numPointTensorComponents; r++)
2219 integral_d *= componentIntegrals[r][d](leftTensorIterator.
argument(r),rightTensorIterator.
argument(r));
2222 integralSum += leftRightTransform_d * integral_d;
2225 const int i = leftFieldOrdinal + leftFieldOffset;
2226 const int j = rightFieldOrdinal + rightFieldOffset;
2228 if (integralViewRank == 3)
2230 integralView3(cellDataOrdinal,i,j) += cellMeasureWeight * integralSum;
2234 integralView2(i,j) += cellMeasureWeight * integralSum;
2238 b_offset += rightDimSpan;
2241 a_offset += leftDimSpan;
2245 if (approximateFlops != NULL)
2248 *approximateFlops += (2 + spaceDim * (3 + numPointTensorComponents)) * cellDataExtent * numFieldsLeft * numFieldsRight;
2256 const bool transposeLeft =
true;
2257 const bool transposeRight =
false;
2261 const bool matrixTransform = (leftTransform.
rank() == 4) || (rightTransform.
rank() == 4);
2266 if (matrixTransform)
2268 composedTransform = leftTransform.
allocateMatMatResult(transposeLeft, leftTransform, transposeRight, rightTransform);
2269 composedTransform.
storeMatMat(transposeLeft, leftTransform, transposeRight, rightTransform);
2272 if (approximateFlops != NULL)
2283 const int newRank = 4;
2284 auto extents = composedTransform.
getExtents();
2286 composedTransform = composedTransform.
shallowCopy(newRank, extents, variationTypes);
2287 if (approximateFlops != NULL)
2293 else if (leftTransform.
isValid())
2296 composedTransform = leftTransform;
2298 else if (rightTransform.
isValid())
2301 composedTransform = rightTransform;
2306 Kokkos::Array<ordinal_type,4> extents {basisValuesLeft.
numCells(),basisValuesLeft.
numPoints(),spaceDim,spaceDim};
2309 Kokkos::View<Scalar*,DeviceType> identityUnderlyingView(
"Intrepid2::FST::integrate() - identity view",spaceDim);
2310 Kokkos::deep_copy(identityUnderlyingView, 1.0);
2318 const int leftFamilyCount = basisValuesLeft. basisValues().numFamilies();
2319 const int rightFamilyCount = basisValuesRight.basisValues().
numFamilies();
2320 const int leftComponentCount = isVectorValued ? basisValuesLeft. vectorData().numComponents() : 1;
2321 const int rightComponentCount = isVectorValued ? basisValuesRight.
vectorData().numComponents() : 1;
2323 int leftFieldOrdinalOffset = 0;
2324 for (
int leftFamilyOrdinal=0; leftFamilyOrdinal<leftFamilyCount; leftFamilyOrdinal++)
2329 bool haveLaunchedContributionToCurrentFamilyLeft =
false;
2330 for (
int leftComponentOrdinal=0; leftComponentOrdinal<leftComponentCount; leftComponentOrdinal++)
2333 : basisValuesLeft.basisValues().
tensorData(leftFamilyOrdinal);
2337 a_offset += basisValuesLeft.
vectorData().numDimsForComponent(leftComponentOrdinal);
2341 int rightFieldOrdinalOffset = 0;
2342 for (
int rightFamilyOrdinal=0; rightFamilyOrdinal<rightFamilyCount; rightFamilyOrdinal++)
2346 bool haveLaunchedContributionToCurrentFamilyRight =
false;
2348 for (
int rightComponentOrdinal=0; rightComponentOrdinal<rightComponentCount; rightComponentOrdinal++)
2351 : basisValuesRight.basisValues().
tensorData(rightFamilyOrdinal);
2352 if (!rightComponent.
isValid())
2355 b_offset += basisValuesRight.
vectorData().numDimsForComponent(rightComponentOrdinal);
2362 Kokkos::TeamPolicy<ExecutionSpace> policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,Kokkos::AUTO(),vectorSize);
2371 bool forceNonSpecialized =
false;
2372 bool usePointCacheForRank3Tensor =
true;
2376 if (haveLaunchedContributionToCurrentFamilyLeft && haveLaunchedContributionToCurrentFamilyRight)
2378 ExecutionSpace().fence();
2380 haveLaunchedContributionToCurrentFamilyLeft =
true;
2381 haveLaunchedContributionToCurrentFamilyRight =
true;
2383 if (integralViewRank == 2)
2387 auto functor =
Impl::F_IntegratePointValueCache<Scalar, DeviceType, 2>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset);
2389 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2390 const int teamSize = functor.teamSize(recommendedTeamSize);
2392 policy = Kokkos::TeamPolicy<DeviceType>(cellDataExtent,teamSize,vectorSize);
2394 Kokkos::parallel_for(
"F_IntegratePointValueCache rank 2", policy, functor);
2396 if (approximateFlops != NULL)
2398 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2403 auto functor =
Impl::F_Integrate<Scalar, DeviceType, 2>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset, forceNonSpecialized);
2405 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2406 const int teamSize = functor.teamSize(recommendedTeamSize);
2408 policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,teamSize,vectorSize);
2410 Kokkos::parallel_for(
"F_Integrate rank 2", policy, functor);
2412 if (approximateFlops != NULL)
2414 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2418 else if (integralViewRank == 3)
2422 auto functor =
Impl::F_IntegratePointValueCache<Scalar, DeviceType, 3>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset);
2424 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2425 const int teamSize = functor.teamSize(recommendedTeamSize);
2427 policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,teamSize,vectorSize);
2429 Kokkos::parallel_for(
"F_IntegratePointValueCache rank 3", policy, functor);
2431 if (approximateFlops != NULL)
2433 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2438 auto functor =
Impl::F_Integrate<Scalar, DeviceType, 3>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset, forceNonSpecialized);
2440 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2441 const int teamSize = functor.teamSize(recommendedTeamSize);
2443 policy = Kokkos::TeamPolicy<DeviceType>(cellDataExtent,teamSize,vectorSize);
2445 Kokkos::parallel_for(
"F_Integrate rank 3", policy, functor);
2447 if (approximateFlops != NULL)
2449 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2453 b_offset += isVectorValued ? basisValuesRight.
vectorData().numDimsForComponent(rightComponentOrdinal) : 1;
2455 rightFieldOrdinalOffset += isVectorValued ? basisValuesRight.
vectorData().numFieldsInFamily(rightFamilyOrdinal) : basisValuesRight.basisValues().numFieldsInFamily(rightFamilyOrdinal);
2457 a_offset += isVectorValued ? basisValuesLeft.
vectorData().numDimsForComponent(leftComponentOrdinal) : 1;
2459 leftFieldOrdinalOffset += isVectorValued ? basisValuesLeft.
vectorData().numFieldsInFamily(leftFamilyOrdinal) : basisValuesLeft.basisValues().numFieldsInFamily(leftFamilyOrdinal);
2466 ExecutionSpace().fence();