1 #ifndef _COMPADRE_GMLS_TARGETS_HPP_
2 #define _COMPADRE_GMLS_TARGETS_HPP_
19 &&
"_reconstruction_space(VectorOfScalar clones incompatible with scalar output sampling functional which is not a PointSample");
23 bool additional_evaluation_sites_need_handled =
28 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, P_target_row.extent(0)), [&] (
const int j) {
29 Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, P_target_row.extent(1)),
31 P_target_row(j,k) = 0;
34 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, delta.extent(0)), [&] (
const int j) { delta(j) = 0; });
35 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, thread_workspace.extent(0)), [&] (
const int j) { thread_workspace(j) = 0; });
36 teamMember.team_barrier();
38 const int target_NP = this->getNP(_poly_order, _dimensions, _reconstruction_space);
39 const int num_evaluation_sites = getNEvaluationSitesPerTarget(target_index);
41 for (
size_t i=0; i<_operations.size(); ++i) {
43 bool additional_evaluation_sites_handled =
false;
45 bool operation_handled =
true;
52 if (!operation_handled) {
61 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
62 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
63 int offset = getTargetOffsetIndexDevice(i, 0, 0, j);
64 for (
int k=0; k<target_NP; ++k) {
65 P_target_row(offset, k) = delta(k);
68 additional_evaluation_sites_handled =
true;
70 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
71 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
72 switch (_dimensions) {
74 P_target_row(offset, 4) = std::pow(_epsilons(target_index), -2);
75 P_target_row(offset, 6) = std::pow(_epsilons(target_index), -2);
76 P_target_row(offset, 9) = std::pow(_epsilons(target_index), -2);
79 P_target_row(offset, 3) = std::pow(_epsilons(target_index), -2);
80 P_target_row(offset, 5) = std::pow(_epsilons(target_index), -2);
83 P_target_row(offset, 2) = std::pow(_epsilons(target_index), -2);
87 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
88 for (
int d=0; d<_dimensions; ++d) {
89 int offset = getTargetOffsetIndexDevice(i, 0, d, j);
90 auto row = Kokkos::subview(P_target_row, offset, Kokkos::ALL());
91 this->calcGradientPij(teamMember, row.data(), thread_workspace.data(), target_index, -1 , 1 , d , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
94 additional_evaluation_sites_handled =
true;
96 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
97 int offset = getTargetOffsetIndexDevice(i, 0, 0, j);
98 auto row = Kokkos::subview(P_target_row, offset, Kokkos::ALL());
99 this->calcGradientPij(teamMember, row.data(), thread_workspace.data(), target_index, -1 , 1 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
101 additional_evaluation_sites_handled =
true;
104 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
105 int offset = getTargetOffsetIndexDevice(i, 0, 0, j);
106 auto row = Kokkos::subview(P_target_row, offset, Kokkos::ALL());
107 this->calcGradientPij(teamMember, row.data(), thread_workspace.data(), target_index, -1 , 1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
109 additional_evaluation_sites_handled =
true;
112 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
113 int offset = getTargetOffsetIndexDevice(i, 0, 0, j);
114 auto row = Kokkos::subview(P_target_row, offset, Kokkos::ALL());
115 this->calcGradientPij(teamMember, row.data(), thread_workspace.data(), target_index, -1 , 1 , 2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
117 additional_evaluation_sites_handled =
true;
123 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
124 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
125 switch (_dimensions) {
127 P_target_row(offset, 4) = std::pow(_epsilons(target_index), -2);
128 P_target_row(offset, 6) = std::pow(_epsilons(target_index), -2);
129 P_target_row(offset, 9) = std::pow(_epsilons(target_index), -2);
132 P_target_row(offset, 3) = std::pow(_epsilons(target_index), -2);
133 P_target_row(offset, 5) = std::pow(_epsilons(target_index), -2);
136 P_target_row(offset, 2) = std::pow(_epsilons(target_index), -2);
141 const double factorial[15] = {1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800, 479001600, 6227020800, 87178291200};
142 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
145 double cutoff_p = _epsilons(target_index);
146 int alphax, alphay, alphaz;
149 double triangle_coords[9];
154 for (
int j=0; j<3; ++j) {
155 triangle_coords_matrix(j, 0) = midpoint(j);
157 size_t num_vertices = _target_extra_data.extent(1) / 3;
162 for (
size_t v=0; v<num_vertices; ++v) {
164 int v2 = (v+1) % num_vertices;
166 for (
int j=0; j<3; ++j) {
167 triangle_coords_matrix(j,1) = _target_extra_data(target_index, v1*3+j) - triangle_coords_matrix(j,0);
168 triangle_coords_matrix(j,2) = _target_extra_data(target_index, v2*3+j) - triangle_coords_matrix(j,0);
174 auto T=triangle_coords_matrix;
175 for (
int quadrature = 0; quadrature<_qm.getNumberOfQuadraturePoints(); ++quadrature) {
176 double transformed_qp[3] = {0,0,0};
177 for (
int j=0; j<3; ++j) {
178 for (
int k=1; k<3; ++k) {
179 transformed_qp[j] += T(j,k)*_qm.getSite(quadrature, k-1);
181 transformed_qp[j] += T(j,0);
186 Kokkos::subview(T, Kokkos::ALL(), 1), Kokkos::subview(T, Kokkos::ALL(), 2));
188 for (
int j=0; j<3; ++j) {
189 relative_coord[j] = transformed_qp[j] - getTargetCoordinate(target_index, j);
194 const int start_index = 0;
195 for (
int n = start_index; n <= _poly_order; n++){
196 for (alphaz = 0; alphaz <= n; alphaz++){
198 for (alphay = 0; alphay <= s; alphay++){
200 alphaf = factorial[alphax]*factorial[alphay]*factorial[alphaz];
201 double val_to_sum = (area_scaling * _qm.getWeight(quadrature)
202 * std::pow(relative_coord.x/cutoff_p,alphax)
203 *std::pow(relative_coord.y/cutoff_p,alphay)
204 *std::pow(relative_coord.z/cutoff_p,alphaz)/alphaf) / (0.5 * area_scaling);
205 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
206 if (quadrature==0) P_target_row(offset, k) = val_to_sum;
207 else P_target_row(offset, k) += val_to_sum;
232 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
233 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
234 int offset = getTargetOffsetIndexDevice(i, 0, 0, j);
235 for (
int k=0; k<target_NP; ++k) {
236 P_target_row(offset, k) = delta(k);
239 additional_evaluation_sites_handled =
true;
241 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int e) {
242 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, e);
243 for (
int m=0; m<_sampling_multiplier; ++m) {
244 int output_components = _basis_multiplier;
245 for (
int c=0; c<output_components; ++c) {
246 int offset = getTargetOffsetIndexDevice(i, m , c , e);
250 for (
int j=0; j<target_NP; ++j) {
251 P_target_row(offset, c*target_NP + j) = delta(j);
256 additional_evaluation_sites_handled =
true;
260 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int e) {
261 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, e);
262 for (
int m=0; m<_sampling_multiplier; ++m) {
263 int output_components = _basis_multiplier;
264 for (
int c=0; c<output_components; ++c) {
265 int offset = getTargetOffsetIndexDevice(i, m , c , e);
269 for (
int j=0; j<target_NP; ++j) {
270 P_target_row(offset, c*target_NP + j) = delta(j);
275 additional_evaluation_sites_handled =
true;
277 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int e) {
278 int output_components = _basis_multiplier;
279 for (
int m2=0; m2<output_components; ++m2) {
280 int offset = getTargetOffsetIndexDevice(i, 0 , m2 , e);
281 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , m2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, e);
282 for (
int j=0; j<target_NP; ++j) {
283 P_target_row(offset, j) = delta(j);
287 additional_evaluation_sites_handled =
true;
289 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int e) {
290 for (
int m0=0; m0<_sampling_multiplier; ++m0) {
291 int output_components = _basis_multiplier;
292 for (
int m1=0; m1<output_components; ++m1) {
293 for (
int m2=0; m2<output_components; ++m2) {
294 int offset = getTargetOffsetIndexDevice(i, m0 , m1*output_components+m2 , e);
295 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , m2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, e);
296 for (
int j=0; j<target_NP; ++j) {
297 P_target_row(offset, m1*target_NP + j) = delta(j);
303 additional_evaluation_sites_handled =
true;
306 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
307 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);;
308 switch (_dimensions) {
310 P_target_row(offset, 1) = std::pow(_epsilons(target_index), -1);
311 P_target_row(offset, target_NP + 2) = std::pow(_epsilons(target_index), -1);
312 P_target_row(offset, 2*target_NP + 3) = std::pow(_epsilons(target_index), -1);
315 P_target_row(offset, 1) = std::pow(_epsilons(target_index), -1);
316 P_target_row(offset, target_NP + 2) = std::pow(_epsilons(target_index), -1);
319 P_target_row(offset, 1) = std::pow(_epsilons(target_index), -1);
323 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int e) {
324 for (
int m=0; m<_sampling_multiplier; ++m) {
325 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , m , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, e);
326 int offset = getTargetOffsetIndexDevice(i, m , 0 , e);
327 for (
int j=0; j<target_NP; ++j) {
328 P_target_row(offset, m*target_NP + j) = delta(j);
332 additional_evaluation_sites_handled =
true;
335 if (_dimensions==3) {
336 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
340 int offset = getTargetOffsetIndexDevice(i, 0 , 0 , 0);
344 offset = getTargetOffsetIndexDevice(i, 1 , 0 , 0);
347 P_target_row(offset, target_NP + 3) = -std::pow(_epsilons(target_index), -1);
349 offset = getTargetOffsetIndexDevice(i, 2 , 0 , 0);
352 P_target_row(offset, 2*target_NP + 2) = std::pow(_epsilons(target_index), -1);
358 int offset = getTargetOffsetIndexDevice(i, 0 , 1 , 0);
361 P_target_row(offset, 3) = std::pow(_epsilons(target_index), -1);
367 offset = getTargetOffsetIndexDevice(i, 2 , 1 , 0);
370 P_target_row(offset, 2*target_NP + 1) = -std::pow(_epsilons(target_index), -1);
376 int offset = getTargetOffsetIndexDevice(i, 0 , 2 , 0);
379 P_target_row(offset, 2) = -std::pow(_epsilons(target_index), -1);
381 offset = getTargetOffsetIndexDevice(i, 1 , 2 , 0);
384 P_target_row(offset, target_NP + 1) = std::pow(_epsilons(target_index), -1);
391 }
else if (_dimensions==2) {
392 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
396 int offset = getTargetOffsetIndexDevice(i, 0 , 0 , 0);
400 offset = getTargetOffsetIndexDevice(i, 1 , 0 , 0);
403 P_target_row(offset, target_NP + 2) = std::pow(_epsilons(target_index), -1);
409 int offset = getTargetOffsetIndexDevice(i, 0 , 1 , 0);
412 P_target_row(offset, 1) = -std::pow(_epsilons(target_index), -1);
436 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
437 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
438 int offset = getTargetOffsetIndexDevice(i, 0, 0, j);
439 for (
int k=0; k<target_NP; ++k) {
440 P_target_row(offset, k) = delta(k);
443 additional_evaluation_sites_handled =
true;
445 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int e) {
446 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, e);
447 for (
int m=0; m<_sampling_multiplier; ++m) {
448 for (
int c=0; c<_data_sampling_multiplier; ++c) {
449 int offset = getTargetOffsetIndexDevice(i, c , c , e);
450 for (
int j=0; j<target_NP; ++j) {
451 P_target_row(offset, j) = delta(j);
456 additional_evaluation_sites_handled =
true;
459 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
460 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
461 switch (_dimensions) {
463 P_target_row(offset, 4) = std::pow(_epsilons(target_index), -2);
464 P_target_row(offset, 6) = std::pow(_epsilons(target_index), -2);
465 P_target_row(offset, 9) = std::pow(_epsilons(target_index), -2);
468 P_target_row(offset, 3) = std::pow(_epsilons(target_index), -2);
469 P_target_row(offset, 5) = std::pow(_epsilons(target_index), -2);
472 P_target_row(offset, 2) = std::pow(_epsilons(target_index), -2);
477 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
478 for (
int d=0; d<_dimensions; ++d) {
479 int offset = getTargetOffsetIndexDevice(i, 0, d, j);
480 auto row = Kokkos::subview(P_target_row, offset, Kokkos::ALL());
481 this->calcGradientPij(teamMember, row.data(), thread_workspace.data(), target_index, -1 , 1 , d , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
484 additional_evaluation_sites_handled =
true;
486 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
487 for (
int m0=0; m0<_dimensions; ++m0) {
488 for (
int m1=0; m1<_dimensions; ++m1) {
489 for (
int m2=0; m2<_dimensions; ++m2) {
490 int offset = getTargetOffsetIndexDevice(i, m0 , m1*_dimensions+m2 , j);
491 auto row = Kokkos::subview(P_target_row, offset, Kokkos::ALL());
492 this->calcGradientPij(teamMember, row.data(), thread_workspace.data(), target_index, -1 , 1 , m2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
497 additional_evaluation_sites_handled =
true;
500 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
501 int offset = getTargetOffsetIndexDevice(i, 0, 0, j);
502 auto row = Kokkos::subview(P_target_row, offset, Kokkos::ALL());
503 this->calcGradientPij(teamMember, row.data(), thread_workspace.data(), target_index, -1 , 1 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
505 additional_evaluation_sites_handled =
true;
509 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
510 int offset = getTargetOffsetIndexDevice(i, 0, 0, j);
511 auto row = Kokkos::subview(P_target_row, offset, Kokkos::ALL());
512 this->calcGradientPij(teamMember, row.data(), thread_workspace.data(), target_index, -1 , 1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
514 additional_evaluation_sites_handled =
true;
519 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
520 int offset = getTargetOffsetIndexDevice(i, 0, 0, j);
521 auto row = Kokkos::subview(P_target_row, offset, Kokkos::ALL());
522 this->calcGradientPij(teamMember, row.data(), thread_workspace.data(), target_index, -1 , 1 , 2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
524 additional_evaluation_sites_handled =
true;
527 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
528 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
529 for (
int j=0; j<target_NP; ++j) {
530 P_target_row(offset, j) = 0;
533 P_target_row(offset, 1) = std::pow(_epsilons(target_index), -1);
536 offset = getTargetOffsetIndexDevice(i, 1, 0, 0);
537 for (
int j=0; j<target_NP; ++j) {
538 P_target_row(offset, j) = 0;
540 P_target_row(offset, 2) = std::pow(_epsilons(target_index), -1);
544 offset = getTargetOffsetIndexDevice(i, 2, 0, 0);
545 for (
int j=0; j<target_NP; ++j) {
546 P_target_row(offset, j) = 0;
548 P_target_row(offset, 3) = std::pow(_epsilons(target_index), -1);
555 if (_dimensions==3) {
556 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
560 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
561 for (
int j=0; j<target_NP; ++j) {
562 P_target_row(offset, j) = 0;
567 offset = getTargetOffsetIndexDevice(i, 1, 0, 0);
568 for (
int j=0; j<target_NP; ++j) {
569 P_target_row(offset, j) = 0;
573 P_target_row(offset, 3) = -std::pow(_epsilons(target_index), -1);
575 offset = getTargetOffsetIndexDevice(i, 2, 0, 0);
576 for (
int j=0; j<target_NP; ++j) {
577 P_target_row(offset, j) = 0;
581 P_target_row(offset, 2) = std::pow(_epsilons(target_index), -1);
587 int offset = getTargetOffsetIndexDevice(i, 0, 1, 0);
588 for (
int j=0; j<target_NP; ++j) {
589 P_target_row(offset, j) = 0;
593 P_target_row(offset, 3) = std::pow(_epsilons(target_index), -1);
595 offset = getTargetOffsetIndexDevice(i, 1, 1, 0);
596 for (
int j=0; j<target_NP; ++j) {
597 P_target_row(offset, j) = 0;
602 offset = getTargetOffsetIndexDevice(i, 2, 1, 0);
603 for (
int j=0; j<target_NP; ++j) {
604 P_target_row(offset, j) = 0;
608 P_target_row(offset, 1) = -std::pow(_epsilons(target_index), -1);
614 int offset = getTargetOffsetIndexDevice(i, 0, 2, 0);
615 for (
int j=0; j<target_NP; ++j) {
616 P_target_row(offset, j) = 0;
620 P_target_row(offset, 2) = -std::pow(_epsilons(target_index), -1);
622 offset = getTargetOffsetIndexDevice(i, 1, 2, 0);
623 for (
int j=0; j<target_NP; ++j) {
624 P_target_row(offset, j) = 0;
628 P_target_row(offset, 1) = std::pow(_epsilons(target_index), -1);
630 offset = getTargetOffsetIndexDevice(i, 2, 2, 0);
631 for (
int j=0; j<target_NP; ++j) {
632 P_target_row(offset, j) = 0;
638 }
else if (_dimensions==2) {
639 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
643 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
644 for (
int j=0; j<target_NP; ++j) {
645 P_target_row(offset, j) = 0;
650 offset = getTargetOffsetIndexDevice(i, 1, 0, 0);
651 for (
int j=0; j<target_NP; ++j) {
652 P_target_row(offset, j) = 0;
656 P_target_row(offset, 2) = std::pow(_epsilons(target_index), -1);
662 int offset = getTargetOffsetIndexDevice(i, 0, 1, 0);
663 for (
int j=0; j<target_NP; ++j) {
664 P_target_row(offset, j) = 0;
668 P_target_row(offset, 1) = -std::pow(_epsilons(target_index), -1);
670 offset = getTargetOffsetIndexDevice(i, 1, 1, 0);
671 for (
int j=0; j<target_NP; ++j) {
672 P_target_row(offset, j) = 0;
695 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int e) {
696 for (
int m0=0; m0<_sampling_multiplier; ++m0) {
697 for (
int m1=0; m1<_sampling_multiplier; ++m1) {
699 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(m1+1) , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e );
701 int offset = getTargetOffsetIndexDevice(i, m0 , m1 , e );
702 for (
int j=0; j<target_NP; ++j) {
703 P_target_row(offset, j) = delta(j);
708 additional_evaluation_sites_handled =
true;
710 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int e) {
711 for (
int m0=0; m0<_sampling_multiplier; ++m0) {
712 for (
int m1=0; m1<_sampling_multiplier; ++m1) {
713 int offset = getTargetOffsetIndexDevice(i, m0 , m1 , e );
714 if (_dimensions==3) {
718 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(2+1) , 1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
720 for (
int j=0; j<target_NP; ++j) {
721 P_target_row(offset, j) = delta(j);
723 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
725 for (
int j=0; j<target_NP; ++j) {
726 P_target_row(offset, j) -= delta(j);
732 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(2+1) , 1 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
734 for (
int j=0; j<target_NP; ++j) {
735 P_target_row(offset, j) = -delta(j);
737 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
739 for (
int j=0; j<target_NP; ++j) {
740 P_target_row(offset, j) += delta(j);
746 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
748 for (
int j=0; j<target_NP; ++j) {
749 P_target_row(offset, j) = delta(j);
751 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
753 for (
int j=0; j<target_NP; ++j) {
754 P_target_row(offset, j) -= delta(j);
762 P_target_row(offset, 2) = -std::pow(_epsilons(target_index), -1);
763 P_target_row(offset, 3) = std::pow(_epsilons(target_index), -1);
765 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
767 for (
int j=0; j<target_NP; ++j) {
768 P_target_row(offset, j) = delta(j);
770 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
772 for (
int j=0; j<target_NP; ++j) {
773 P_target_row(offset, j) -= delta(j);
781 additional_evaluation_sites_handled =
true;
783 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int e) {
784 for (
int m0=0; m0<_sampling_multiplier; ++m0) {
785 for (
int m1=0; m1<_sampling_multiplier; ++m1) {
786 int offset = getTargetOffsetIndexDevice(i, m0 , m1 , e );
787 if (_dimensions == 3) {
792 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 0 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
794 for (
int j=0; j<target_NP; ++j) {
795 P_target_row(offset, j) = delta(j);
797 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
799 for (
int j=0; j<target_NP; ++j) {
800 P_target_row(offset, j) -= delta(j);
802 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(2+1) , 1 , 0 , 2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
804 for (
int j=0; j<target_NP; ++j) {
805 P_target_row(offset, j) += delta(j);
807 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 2 , 2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
809 for (
int j=0; j<target_NP; ++j) {
810 P_target_row(offset, j) -= delta(j);
816 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 0 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
818 for (
int j=0; j<target_NP; ++j) {
819 P_target_row(offset, j) = -delta(j);
821 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 1 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
823 for (
int j=0; j<target_NP; ++j) {
824 P_target_row(offset, j) += delta(j);
826 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(2+1) , 1 , 1 , 2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
828 for (
int j=0; j<target_NP; ++j) {
829 P_target_row(offset, j) += delta(j);
831 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 2 , 2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
833 for (
int j=0; j<target_NP; ++j) {
834 P_target_row(offset, j) -= delta(j);
840 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(2+1) , 1 , 0 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
842 for (
int j=0; j<target_NP; ++j) {
843 P_target_row(offset, j) = -delta(j);
845 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 2 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
847 for (
int j=0; j<target_NP; ++j) {
848 P_target_row(offset, j) += delta(j);
850 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(2+1) , 1 , 1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
852 for (
int j=0; j<target_NP; ++j) {
853 P_target_row(offset, j) -= delta(j);
855 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 2 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
857 for (
int j=0; j<target_NP; ++j) {
858 P_target_row(offset, j) += delta(j);
864 if (_dimensions == 2) {
869 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 0 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
871 for (
int j=0; j<target_NP; ++j) {
872 P_target_row(offset, j) = delta(j);
874 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 1 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
876 for (
int j=0; j<target_NP; ++j) {
877 P_target_row(offset, j) += delta(j);
879 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 0 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
881 for (
int j=0; j<target_NP; ++j) {
882 P_target_row(offset, j) -= delta(j);
884 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
886 for (
int j=0; j<target_NP; ++j) {
887 P_target_row(offset, j) -= delta(j);
893 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(0+1) , 1 , 0 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
895 for (
int j=0; j<target_NP; ++j) {
896 P_target_row(offset, j) = delta(j);
898 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
900 for (
int j=0; j<target_NP; ++j) {
901 P_target_row(offset, j) += delta(j);
903 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 0 , 0 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
905 for (
int j=0; j<target_NP; ++j) {
906 P_target_row(offset, j) -= delta(j);
908 this->calcHessianPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(1+1) , 1 , 1 , 1 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
910 for (
int j=0; j<target_NP; ++j) {
911 P_target_row(offset, j) -= delta(j);
920 additional_evaluation_sites_handled =
true;
922 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int e) {
923 for (
int m0=0; m0<_sampling_multiplier; ++m0) {
924 for (
int m1=0; m1<_sampling_multiplier; ++m1) {
925 for (
int m2=0; m2<_sampling_multiplier; ++m2) {
926 int offset = getTargetOffsetIndexDevice(i, m0 , m1*_sampling_multiplier+m2 , e);
927 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, -(m1+1) , 1 , m2 , _dimensions, _poly_order,
false , NULL ,
ReconstructionSpace::DivergenceFreeVectorTaylorPolynomial,
VectorPointSample, e);
928 for (
int j=0; j<target_NP; ++j) {
929 P_target_row(offset, j) = delta(j);
935 additional_evaluation_sites_handled =
true;
944 compadre_kernel_assert_release(((additional_evaluation_sites_need_handled && additional_evaluation_sites_handled) || (!additional_evaluation_sites_need_handled)) &&
"Auxiliary evaluation coordinates are specified by user, but are calling a target operation that can not handle evaluating additional sites.");
946 teamMember.team_barrier();
950 KOKKOS_INLINE_FUNCTION
953 compadre_kernel_assert_release(((
int)thread_workspace.extent(0)>=(_curvature_poly_order+1)*_local_dimensions) &&
"Workspace thread_workspace not large enough.");
955 const int target_index = _initial_index_for_batch + teamMember.league_rank();
957 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, P_target_row.extent(0)), [&] (
const int j) {
958 Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, P_target_row.extent(1)),
960 P_target_row(j,k) = 0;
963 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, delta.extent(0)), [&] (
const int j) { delta(j) = 0; });
964 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, thread_workspace.extent(0)), [&] (
const int j) { thread_workspace(j) = 0; });
965 teamMember.team_barrier();
968 for (
size_t i=0; i<_curvature_support_operations.size(); ++i) {
970 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
971 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
972 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, local_neighbor_index, 0 , _dimensions-1, _curvature_poly_order,
false , V,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample);
973 for (
int j=0; j<manifold_NP; ++j) {
974 P_target_row(offset, j) = delta(j);
978 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
980 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
981 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, local_neighbor_index, 0 , 0 , _dimensions-1, _curvature_poly_order,
false , V,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample);
982 for (
int j=0; j<manifold_NP; ++j) {
983 P_target_row(offset, j) = delta(j);
987 offset = getTargetOffsetIndexDevice(i, 0, 1, 0);
988 this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, local_neighbor_index, 0 , 1 , _dimensions-1, _curvature_poly_order,
false , V,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample);
989 for (
int j=0; j<manifold_NP; ++j) {
990 P_target_row(offset, j) = delta(j);
1000 KOKKOS_INLINE_FUNCTION
1003 compadre_kernel_assert_release(((
int)thread_workspace.extent(0)>=(_poly_order+1)*_local_dimensions) &&
"Workspace thread_workspace not large enough.");
1006 const int target_index = _initial_index_for_batch + teamMember.league_rank();
1007 const int target_NP = this->getNP(_poly_order, _dimensions-1, _reconstruction_space);
1009 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, P_target_row.extent(0)), [&] (
const int j) {
1010 Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, P_target_row.extent(1)),
1011 [=] (const int& k) {
1012 P_target_row(j,k) = 0;
1015 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, delta.extent(0)), [&] (
const int j) { delta(j) = 0; });
1016 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, thread_workspace.extent(0)), [&] (
const int j) { thread_workspace(j) = 0; });
1017 teamMember.team_barrier();
1020 bool additional_evaluation_sites_need_handled =
1021 (_additional_evaluation_coordinates.extent(0) > 0) ?
true :
false;
1023 const int num_evaluation_sites = getNEvaluationSitesPerTarget(target_index);
1025 for (
size_t i=0; i<_operations.size(); ++i) {
1027 bool additional_evaluation_sites_handled =
false;
1029 bool operation_handled =
true;
1036 if (!operation_handled) {
1037 if (_dimensions>2) {
1039 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int j) {
1040 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , _dimensions-1, _poly_order,
false , &V,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, j);
1041 int offset = getTargetOffsetIndexDevice(i, 0, 0, j);
1042 for (
int k=0; k<target_NP; ++k) {
1043 P_target_row(offset, k) = delta(k);
1046 additional_evaluation_sites_handled =
true;
1049 if (_reconstruction_space_rank == 1) {
1050 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int k) {
1052 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , _dimensions-1, _poly_order,
false , &V,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, k);
1053 int offset = getTargetOffsetIndexDevice(i, 0, 0, k);
1054 for (
int j=0; j<target_NP; ++j) {
1055 P_target_row(offset, j) = delta(j);
1056 P_target_row(offset, target_NP + j) = 0;
1058 offset = getTargetOffsetIndexDevice(i, 1, 0, k);
1059 for (
int j=0; j<target_NP; ++j) {
1060 P_target_row(offset, j) = 0;
1061 P_target_row(offset, target_NP + j) = 0;
1065 offset = getTargetOffsetIndexDevice(i, 0, 1, k);
1066 for (
int j=0; j<target_NP; ++j) {
1067 P_target_row(offset, j) = 0;
1068 P_target_row(offset, target_NP + j) = 0;
1070 offset = getTargetOffsetIndexDevice(i, 1, 1, k);
1071 for (
int j=0; j<target_NP; ++j) {
1072 P_target_row(offset, j) = 0;
1073 P_target_row(offset, target_NP + j) = delta(j);
1076 additional_evaluation_sites_handled =
true;
1078 }
else if (_reconstruction_space_rank == 0) {
1079 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int k) {
1081 this->calcPij(teamMember, delta.data(), thread_workspace.data(), target_index, -1 , 1 , _dimensions-1, _poly_order,
false , &V,
ReconstructionSpace::ScalarTaylorPolynomial,
PointSample, k);
1082 int offset = getTargetOffsetIndexDevice(i, 0, 0, k);
1083 for (
int j=0; j<target_NP; ++j) {
1084 P_target_row(offset, j) = delta(j);
1086 offset = getTargetOffsetIndexDevice(i, 1, 0, k);
1087 for (
int j=0; j<target_NP; ++j) {
1088 P_target_row(offset, j) = 0;
1092 offset = getTargetOffsetIndexDevice(i, 0, 1, k);
1093 for (
int j=0; j<target_NP; ++j) {
1094 P_target_row(offset, j) = 0;
1096 offset = getTargetOffsetIndexDevice(i, 1, 1, k);
1097 for (
int j=0; j<target_NP; ++j) {
1098 P_target_row(offset, j) = delta(j);
1101 additional_evaluation_sites_handled =
true;
1105 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1107 double h = _epsilons(target_index);
1108 double a1=0, a2=0, a3=0, a4=0, a5=0;
1109 if (_curvature_poly_order > 0) {
1110 a1 = curvature_coefficients(1);
1111 a2 = curvature_coefficients(2);
1113 if (_curvature_poly_order > 1) {
1114 a3 = curvature_coefficients(3);
1115 a4 = curvature_coefficients(4);
1116 a5 = curvature_coefficients(5);
1118 double den = (h*h + a1*a1 + a2*a2);
1125 const int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1126 for (
int j=0; j<target_NP; ++j) {
1127 P_target_row(offset, j) = 0;
1130 if (_poly_order > 0 && _curvature_poly_order > 1) {
1131 P_target_row(offset, 1) = (-a1*((h*h+a2*a2)*a3 - 2*a1*a2*a4 + (h*h+a1*a1)*a5))/den/den/(h*h);
1132 P_target_row(offset, 2) = (-a2*((h*h+a2*a2)*a3 - 2*a1*a2*a4 + (h*h+a1*a1)*a5))/den/den/(h*h);
1134 if (_poly_order > 1 && _curvature_poly_order > 0) {
1135 P_target_row(offset, 3) = (h*h+a2*a2)/den/(h*h);
1136 P_target_row(offset, 4) = -2*a1*a2/den/(h*h);
1137 P_target_row(offset, 5) = (h*h+a1*a1)/den/(h*h);
1143 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1145 double h = _epsilons(target_index);
1146 double a1=0, a2=0, a3=0, a4=0, a5=0;
1147 if (_curvature_poly_order > 0) {
1148 a1 = curvature_coefficients(1);
1149 a2 = curvature_coefficients(2);
1151 if (_curvature_poly_order > 1) {
1152 a3 = curvature_coefficients(3);
1153 a4 = curvature_coefficients(4);
1154 a5 = curvature_coefficients(5);
1156 double den = (h*h + a1*a1 + a2*a2);
1158 double c0a = -a1*((h*h+a2*a2)*a3 - 2*a1*a2*a4 + (h*h+a1*a1)*a5)/den/den/h;
1159 double c1a = (h*h+a2*a2)/den/h;
1160 double c2a = -a1*a2/den/h;
1162 double c0b = -a2*((h*h+a2*a2)*a3 - 2*a1*a2*a4 + (h*h+a1*a1)*a5)/den/den/h;
1163 double c1b = -a1*a2/den/h;
1164 double c2b = (h*h+a1*a1)/den/h;
1167 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1168 for (
int j=0; j<target_NP; ++j) {
1169 P_target_row(offset, j) = 0;
1170 P_target_row(offset, target_NP + j) = 0;
1172 P_target_row(offset, 0) = c0a;
1173 P_target_row(offset, 1) = c1a;
1174 P_target_row(offset, 2) = c2a;
1175 P_target_row(offset, target_NP + 0) = c0b;
1176 P_target_row(offset, target_NP + 1) = c1b;
1177 P_target_row(offset, target_NP + 2) = c2b;
1180 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1182 double h = _epsilons(target_index);
1183 double a1=0, a2=0, a3=0, a4=0, a5=0;
1184 if (_curvature_poly_order > 0) {
1185 a1 = curvature_coefficients(1);
1186 a2 = curvature_coefficients(2);
1188 if (_curvature_poly_order > 1) {
1189 a3 = curvature_coefficients(3);
1190 a4 = curvature_coefficients(4);
1191 a5 = curvature_coefficients(5);
1193 double den = (h*h + a1*a1 + a2*a2);
1195 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1196 for (
int j=0; j<target_NP; ++j) {
1197 P_target_row(offset, j) = 0;
1201 if (_poly_order > 0 && _curvature_poly_order > 1) {
1202 P_target_row(offset, 1) = (-a1*((h*h+a2*a2)*a3 - 2*a1*a2*a4 + (h*h+a1*a1)*a5))/den/den/(h*h);
1203 P_target_row(offset, 2) = (-a2*((h*h+a2*a2)*a3 - 2*a1*a2*a4 + (h*h+a1*a1)*a5))/den/den/(h*h);
1205 if (_poly_order > 1 && _curvature_poly_order > 0) {
1206 P_target_row(offset, 3) = (h*h+a2*a2)/den/(h*h);
1207 P_target_row(offset, 4) = -2*a1*a2/den/(h*h);
1208 P_target_row(offset, 5) = (h*h+a1*a1)/den/(h*h);
1215 if (_reconstruction_space_rank == 1) {
1216 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1218 double h = _epsilons(target_index);
1219 double a1=0, a2=0, a3=0, a4=0, a5=0;
1220 if (_curvature_poly_order > 0) {
1221 a1 = curvature_coefficients(1);
1222 a2 = curvature_coefficients(2);
1224 if (_curvature_poly_order > 1) {
1225 a3 = curvature_coefficients(3);
1226 a4 = curvature_coefficients(4);
1227 a5 = curvature_coefficients(5);
1229 double den = (h*h + a1*a1 + a2*a2);
1231 for (
int j=0; j<target_NP; ++j) {
1236 if (_poly_order > 0 && _curvature_poly_order > 1) {
1237 delta(1) = (-a1*((h*h+a2*a2)*a3 - 2*a1*a2*a4+(h*h+a1*a1)*a5))/den/den/(h*h);
1238 delta(2) = (-a2*((h*h+a2*a2)*a3 - 2*a1*a2*a4+(h*h+a1*a1)*a5))/den/den/(h*h);
1240 if (_poly_order > 1 && _curvature_poly_order > 0) {
1241 delta(3) = (h*h+a2*a2)/den/(h*h);
1242 delta(4) = -2*a1*a2/den/(h*h);
1243 delta(5) = (h*h+a1*a1)/den/(h*h);
1247 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1248 for (
int j=0; j<target_NP; ++j) {
1249 P_target_row(offset, j) = delta(j);
1250 P_target_row(offset, target_NP + j) = 0;
1252 offset = getTargetOffsetIndexDevice(i, 1, 0, 0);
1253 for (
int j=0; j<target_NP; ++j) {
1254 P_target_row(offset, j) = 0;
1255 P_target_row(offset, target_NP + j) = 0;
1259 offset = getTargetOffsetIndexDevice(i, 0, 1, 0);
1260 for (
int j=0; j<target_NP; ++j) {
1261 P_target_row(offset, j) = 0;
1262 P_target_row(offset, target_NP + j) = 0;
1264 offset = getTargetOffsetIndexDevice(i, 1, 1, 0);
1265 for (
int j=0; j<target_NP; ++j) {
1266 P_target_row(offset, j) = 0;
1267 P_target_row(offset, target_NP + j) = delta(j);
1272 }
else if (_reconstruction_space_rank == 0) {
1273 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1275 double h = _epsilons(target_index);
1276 double a1=0, a2=0, a3=0, a4=0, a5=0;
1277 if (_curvature_poly_order > 0) {
1278 a1 = curvature_coefficients(1);
1279 a2 = curvature_coefficients(2);
1281 if (_curvature_poly_order > 1) {
1282 a3 = curvature_coefficients(3);
1283 a4 = curvature_coefficients(4);
1284 a5 = curvature_coefficients(5);
1286 double den = (h*h + a1*a1 + a2*a2);
1288 for (
int j=0; j<target_NP; ++j) {
1293 if (_poly_order > 0 && _curvature_poly_order > 1) {
1294 delta(1) = (-a1*((h*h+a2*a2)*a3 - 2*a1*a2*a4+(h*h+a1*a1)*a5))/den/den/(h*h);
1295 delta(2) = (-a2*((h*h+a2*a2)*a3 - 2*a1*a2*a4+(h*h+a1*a1)*a5))/den/den/(h*h);
1297 if (_poly_order > 1 && _curvature_poly_order > 0) {
1298 delta(3) = (h*h+a2*a2)/den/(h*h);
1299 delta(4) = -2*a1*a2/den/(h*h);
1300 delta(5) = (h*h+a1*a1)/den/(h*h);
1304 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1305 for (
int j=0; j<target_NP; ++j) {
1306 P_target_row(offset, j) = delta(j);
1308 offset = getTargetOffsetIndexDevice(i, 1, 0, 0);
1309 for (
int j=0; j<target_NP; ++j) {
1310 P_target_row(offset, j) = 0;
1314 offset = getTargetOffsetIndexDevice(i, 0, 1, 0);
1315 for (
int j=0; j<target_NP; ++j) {
1316 P_target_row(offset, j) = 0;
1318 offset = getTargetOffsetIndexDevice(i, 1, 1, 0);
1319 for (
int j=0; j<target_NP; ++j) {
1320 P_target_row(offset, j) = delta(j);
1325 if (_reconstruction_space_rank == 0
1326 && (_polynomial_sampling_functional ==
PointSample
1328 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1330 double h = _epsilons(target_index);
1331 double a1 = curvature_coefficients(1);
1332 double a2 = curvature_coefficients(2);
1334 double q1 = (h*h + a2*a2)/(h*h*h + h*a1*a1 + h*a2*a2);
1335 double q2 = -(a1*a2)/(h*h*h + h*a1*a1 + h*a2*a2);
1336 double q3 = (h*h + a1*a1)/(h*h*h + h*a1*a1 + h*a2*a2);
1338 double t1a = q1*1 + q2*0;
1339 double t2a = q1*0 + q2*1;
1341 double t1b = q2*1 + q3*0;
1342 double t2b = q2*0 + q3*1;
1345 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1346 for (
int j=0; j<target_NP; ++j) {
1347 P_target_row(offset, j) = 0;
1349 if (_poly_order > 0 && _curvature_poly_order > 0) {
1350 P_target_row(offset, 1) = t1a + t2a;
1351 P_target_row(offset, 2) = 0;
1354 offset = getTargetOffsetIndexDevice(i, 0, 1, 0);
1355 for (
int j=0; j<target_NP; ++j) {
1356 P_target_row(offset, j) = 0;
1358 if (_poly_order > 0 && _curvature_poly_order > 0) {
1359 P_target_row(offset, 1) = 0;
1360 P_target_row(offset, 2) = t1b + t2b;
1366 }
else if (_reconstruction_space_rank == 1
1367 && _polynomial_sampling_functional
1373 }
else if (_reconstruction_space_rank == 0
1374 && _polynomial_sampling_functional
1383 if (_reconstruction_space_rank == 1
1385 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1387 double h = _epsilons(target_index);
1388 double a1=0, a2=0, a3=0, a4=0, a5=0;
1389 if (_curvature_poly_order > 0) {
1390 a1 = curvature_coefficients(1);
1391 a2 = curvature_coefficients(2);
1393 if (_curvature_poly_order > 1) {
1394 a3 = curvature_coefficients(3);
1395 a4 = curvature_coefficients(4);
1396 a5 = curvature_coefficients(5);
1398 double den = (h*h + a1*a1 + a2*a2);
1402 double c0a = (a1*a3+a2*a4)/(h*den);
1406 double c0b = (a1*a4+a2*a5)/(h*den);
1411 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1412 for (
int j=0; j<target_NP; ++j) {
1413 P_target_row(offset, j) = 0;
1414 P_target_row(offset, target_NP + j) = 0;
1416 P_target_row(offset, 0) = c0a;
1417 P_target_row(offset, 1) = c1a;
1418 P_target_row(offset, 2) = c2a;
1421 offset = getTargetOffsetIndexDevice(i, 1, 0, 0);
1422 for (
int j=0; j<target_NP; ++j) {
1423 P_target_row(offset, j) = 0;
1424 P_target_row(offset, target_NP + j) = 0;
1426 P_target_row(offset, target_NP + 0) = c0b;
1427 P_target_row(offset, target_NP + 1) = c1b;
1428 P_target_row(offset, target_NP + 2) = c2b;
1431 }
else if (_reconstruction_space_rank == 0
1433 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1435 double h = _epsilons(target_index);
1436 double a1=0, a2=0, a3=0, a4=0, a5=0;
1437 if (_curvature_poly_order > 0) {
1438 a1 = curvature_coefficients(1);
1439 a2 = curvature_coefficients(2);
1441 if (_curvature_poly_order > 1) {
1442 a3 = curvature_coefficients(3);
1443 a4 = curvature_coefficients(4);
1444 a5 = curvature_coefficients(5);
1446 double den = (h*h + a1*a1 + a2*a2);
1450 double c0a = (a1*a3+a2*a4)/(h*den);
1454 double c0b = (a1*a4+a2*a5)/(h*den);
1459 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1460 for (
int j=0; j<target_NP; ++j) {
1461 P_target_row(offset, j) = 0;
1463 P_target_row(offset, 0) = c0a;
1464 P_target_row(offset, 1) = c1a;
1465 P_target_row(offset, 2) = c2a;
1468 offset = getTargetOffsetIndexDevice(i, 1, 0, 0);
1469 for (
int j=0; j<target_NP; ++j) {
1470 P_target_row(offset, j) = 0;
1472 P_target_row(offset, 0) = c0b;
1473 P_target_row(offset, 1) = c1b;
1474 P_target_row(offset, 2) = c2b;
1477 }
else if (_reconstruction_space_rank == 1
1478 && _polynomial_sampling_functional
1481 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1483 double h = _epsilons(target_index);
1484 double a1=0, a2=0, a3=0, a4=0, a5=0;
1485 if (_curvature_poly_order > 0) {
1486 a1 = curvature_coefficients(1);
1487 a2 = curvature_coefficients(2);
1489 if (_curvature_poly_order > 1) {
1490 a3 = curvature_coefficients(3);
1491 a4 = curvature_coefficients(4);
1492 a5 = curvature_coefficients(5);
1494 double den = (h*h + a1*a1 + a2*a2);
1498 double c0a = -a1*((h*h+a2*a2)*a3 - 2*a1*a2*a4 + (h*h+a1*a1)*a5)/den/den/h;
1499 double c1a = (h*h+a2*a2)/den/h;
1500 double c2a = -a1*a2/den/h;
1502 double c0b = -a2*((h*h+a2*a2)*a3 - 2*a1*a2*a4 + (h*h+a1*a1)*a5)/den/den/h;
1503 double c1b = -a1*a2/den/h;
1504 double c2b = (h*h+a1*a1)/den/h;
1507 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1508 for (
int j=0; j<target_NP; ++j) {
1509 P_target_row(offset, j) = 0;
1510 P_target_row(offset, target_NP + j) = 0;
1512 P_target_row(offset, 0) = c0a;
1513 P_target_row(offset, 1) = c1a;
1514 P_target_row(offset, 2) = c2a;
1515 P_target_row(offset, target_NP + 0) = c0b;
1516 P_target_row(offset, target_NP + 1) = c1b;
1517 P_target_row(offset, target_NP + 2) = c2b;
1524 double h = _epsilons(target_index);
1526 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int k) {
1529 for (
int d=0; d<_dimensions-1; ++d) {
1530 relative_coord[d] = getTargetAuxiliaryCoordinate(target_index, k, d, &V);
1531 relative_coord[d] -= getTargetCoordinate(target_index, d, &V);
1534 for (
int i=0; i<3; ++i) relative_coord[i] = 0;
1537 int offset = getTargetOffsetIndexDevice(i, 0, 0, k);
1538 P_target_row(offset, 0) =
GaussianCurvature(curvature_coefficients, h, relative_coord.x, relative_coord.y);
1540 additional_evaluation_sites_handled =
true;
1542 double h = _epsilons(target_index);
1544 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, num_evaluation_sites), [=] (
const int k) {
1548 for (
int d=0; d<_dimensions-1; ++d) {
1549 relative_coord[d] = getTargetAuxiliaryCoordinate(target_index, k, d, &V);
1550 relative_coord[d] -= getTargetCoordinate(target_index, d, &V);
1553 for (
int i=0; i<3; ++i) relative_coord[i] = 0;
1556 int offset = getTargetOffsetIndexDevice(i, 0, 0, k);
1558 for (
int n = 0; n <= _poly_order; n++){
1559 for (alphay = 0; alphay <= n; alphay++){
1560 alphax = n - alphay;
1561 P_target_row(offset, index) =
SurfaceCurlOfScalar(curvature_coefficients, h, relative_coord.x, relative_coord.y, alphax, alphay, 0);
1566 offset = getTargetOffsetIndexDevice(i, 0, 1, k);
1568 for (
int n = 0; n <= _poly_order; n++){
1569 for (alphay = 0; alphay <= n; alphay++){
1570 alphax = n - alphay;
1571 P_target_row(offset, index) =
SurfaceCurlOfScalar(curvature_coefficients, h, relative_coord.x, relative_coord.y, alphax, alphay, 1);
1576 additional_evaluation_sites_handled =
true;
1579 const double factorial[15] = {1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800, 479001600, 6227020800, 87178291200};
1580 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1584 double cutoff_p = _epsilons(target_index);
1590 double triangle_coords[3*3];
1591 for (
int i=0; i<_global_dimensions*3; ++i) triangle_coords[i] = 0;
1597 for (
int j=0; j<_global_dimensions; ++j) {
1598 triangle_coords_matrix(j, 0) = midpoint(j);
1601 size_t num_vertices = _target_extra_data.extent(1) / _global_dimensions;
1602 double reference_cell_area = 0.5;
1603 double entire_cell_area = 0.0;
1604 auto T=triangle_coords_matrix;
1605 for (
size_t v=0; v<num_vertices; ++v) {
1607 int v2 = (v+1) % num_vertices;
1608 for (
int j=0; j<_global_dimensions; ++j) {
1609 triangle_coords_matrix(j,1) = _target_extra_data(target_index, v1*_global_dimensions+j) - triangle_coords_matrix(j,0);
1610 triangle_coords_matrix(j,2) = _target_extra_data(target_index, v2*_global_dimensions+j) - triangle_coords_matrix(j,0);
1613 Kokkos::subview(T, Kokkos::ALL(), 1), Kokkos::subview(T, Kokkos::ALL(), 2));
1619 for (
size_t v=0; v<num_vertices; ++v) {
1621 int v2 = (v+1) % num_vertices;
1623 for (
int j=0; j<_global_dimensions; ++j) {
1624 triangle_coords_matrix(j,1) = _target_extra_data(target_index, v1*_global_dimensions+j) - triangle_coords_matrix(j,0);
1625 triangle_coords_matrix(j,2) = _target_extra_data(target_index, v2*_global_dimensions+j) - triangle_coords_matrix(j,0);
1631 for (
int quadrature = 0; quadrature<_qm.getNumberOfQuadraturePoints(); ++quadrature) {
1632 double transformed_qp[3] = {0,0,0};
1633 for (
int j=0; j<_global_dimensions; ++j) {
1634 for (
int k=1; k<3; ++k) {
1635 transformed_qp[j] += T(j,k)*_qm.getSite(quadrature, k-1);
1637 transformed_qp[j] += T(j,0);
1642 Kokkos::subview(T, Kokkos::ALL(), 1), Kokkos::subview(T, Kokkos::ALL(), 2));
1643 double scaling_factor = sub_cell_area / reference_cell_area;
1645 XYZ qp = XYZ(transformed_qp[0], transformed_qp[1], transformed_qp[2]);
1646 for (
int j=0; j<2; ++j) {
1647 relative_coord[j] = convertGlobalToLocalCoordinate(qp,j,&V) - getTargetCoordinate(target_index,j,&V);
1648 relative_coord[2] = 0;
1652 const int start_index = 0;
1653 for (
int n = start_index; n <= _poly_order; n++){
1654 for (alphay = 0; alphay <= n; alphay++){
1655 alphax = n - alphay;
1656 alphaf = factorial[alphax]*factorial[alphay];
1657 double val_to_sum = (scaling_factor * _qm.getWeight(quadrature)
1658 * std::pow(relative_coord.x/cutoff_p,alphax)
1659 *std::pow(relative_coord.y/cutoff_p,alphay)/alphaf) / entire_cell_area;
1660 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1661 if (quadrature==0 && v==0) P_target_row(offset, k) = val_to_sum;
1662 else P_target_row(offset, k) += val_to_sum;
1673 }
else if (_dimensions==2) {
1675 Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1676 int offset = getTargetOffsetIndexDevice(i, 0, 0, 0);
1677 for (
int j=0; j<target_NP; ++j) {
1678 P_target_row(offset, j) = 0;
1679 delta(j) = (j == 1) ? std::pow(_epsilons(target_index), -1) : 0;
1681 for (
int j=0; j<target_NP; ++j) {
1682 double v1 = delta(j)*G_inv(0,0);
1683 P_target_row(offset, j) = v1;
1690 compadre_kernel_assert_release(((additional_evaluation_sites_need_handled && additional_evaluation_sites_handled) || (!additional_evaluation_sites_need_handled)) &&
"Auxiliary evaluation coordinates are specified by user, but are calling a target operation that can not handle evaluating additional sites.");
#define compadre_kernel_assert_debug(condition)
Kokkos::View< double *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_vector_type
#define compadre_kernel_assert_release(condition)
compadre_kernel_assert_release is similar to compadre_assert_release, but is a call on the device,...
team_policy::member_type member_type
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type
const SamplingFunctional _polynomial_sampling_functional
polynomial sampling functional used to construct P matrix, set at GMLS class instantiation
ReconstructionSpace _reconstruction_space
reconstruction space for GMLS problems, set at GMLS class instantiation
int _data_sampling_multiplier
effective dimension of the data sampling functional e.g.
int _initial_index_for_batch
initial index for current batch
KOKKOS_INLINE_FUNCTION void computeTargetFunctionals(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type P_target_row) const
Evaluates a polynomial basis with a target functional applied to each member of the basis.
int _dimensions
dimension of the problem, set at class instantiation only
Kokkos::View< double **, layout_right > _additional_evaluation_coordinates
(OPTIONAL) user provided additional coordinates for target operation evaluation (device)
constexpr SamplingFunctional StaggeredEdgeIntegralSample
Samples consist of the result of integrals of a vector dotted with the tangent along edges between ne...
KOKKOS_INLINE_FUNCTION double GaussianCurvature(const scratch_vector_type a_, const double h, const double u1, const double u2)
Gaussian curvature K at any point in the local chart.
constexpr SamplingFunctional PointSample
Available sampling functionals.
@ LaplacianOfScalarPointEvaluation
Point evaluation of the laplacian of a scalar (could be on a manifold or not)
@ ScalarFaceAverageEvaluation
Average of values in a face of a cell using quadrature 2D in 3D problem, 1D in 2D problem.
@ GradientOfScalarPointEvaluation
Point evaluation of the gradient of a scalar.
@ CurlOfVectorPointEvaluation
Point evaluation of the curl of a vector (results in a vector)
@ PartialYOfScalarPointEvaluation
Point evaluation of the partial with respect to y of a scalar.
@ ChainedStaggeredLaplacianOfScalarPointEvaluation
Point evaluation of the chained staggered Laplacian acting on VectorTaylorPolynomial basis + Staggere...
@ GaussianCurvaturePointEvaluation
Point evaluation of Gaussian curvature.
@ CurlCurlOfVectorPointEvaluation
Point evaluation of the curl of a curl of a vector (results in a vector)
@ GradientOfVectorPointEvaluation
Point evaluation of the gradient of a vector (results in a matrix, NOT CURRENTLY IMPLEMENTED)
@ PartialZOfScalarPointEvaluation
Point evaluation of the partial with respect to z of a scalar.
@ DivergenceOfVectorPointEvaluation
Point evaluation of the divergence of a vector (results in a scalar)
@ VectorPointEvaluation
Point evaluation of a vector (reconstructs entire vector at once, requiring a ReconstructionSpace hav...
@ VectorLaplacianPointEvaluation
Point evaluation of the laplacian of each component of a vector.
@ ScalarPointEvaluation
Point evaluation of a scalar.
@ PartialXOfScalarPointEvaluation
Point evaluation of the partial with respect to x of a scalar.
constexpr SamplingFunctional StaggeredEdgeAnalyticGradientIntegralSample
Analytical integral of a gradient source vector is just a difference of the scalar source at neighbor...
KOKKOS_INLINE_FUNCTION void getMidpointFromCellVertices(const member_type &teamMember, scratch_vector_type midpoint_storage, scratch_matrix_right_type cell_coordinates, const int cell_num, const int dim=3)
constexpr SamplingFunctional ManifoldVectorPointSample
Point evaluations of the entire vector source function (but on a manifold, so it includes a transform...
constexpr SamplingFunctional VectorPointSample
Point evaluations of the entire vector source function.
@ DivergenceFreeVectorTaylorPolynomial
Divergence-free vector polynomial basis.
@ VectorTaylorPolynomial
Vector polynomial basis having # of components _dimensions, or (_dimensions-1) in the case of manifol...
@ ScalarTaylorPolynomial
Scalar polynomial basis centered at the target site and scaled by sum of basis powers e....
@ VectorOfScalarClonesTaylorPolynomial
Scalar basis reused as many times as there are components in the vector resulting in a much cheaper p...
KOKKOS_INLINE_FUNCTION double SurfaceCurlOfScalar(const scratch_vector_type a_, const double h, const double u1, const double u2, int x_pow, int y_pow, const int component)
Surface curl at any point in the local chart.
KOKKOS_INLINE_FUNCTION double getAreaFromVectors(const member_type &teamMember, view_type_1 v1, view_type_2 v2)