9 &&
"keep_coefficients is set to true, but number of batches exceeds 1.");
41 &&
"No target operations added to GMLS class before calling generatePolynomialCoefficients().");
58 &&
"Cannot solve GMLS problems with the NEUMANN_GRAD_SCALAR constraint using QR Factorization.");
67 int total_added_alphas =
_pc._target_coordinates.extent(0)*
_d_ss._added_alpha_size;
69 decltype(
_d_ss._alphas)(
"alphas", (total_neighbors +
TO_GLOBAL(total_added_alphas))
73 Kokkos::deep_copy(
_d_ss._alphas, 0.0);
74 }
catch(std::exception &e) {
75 printf(
"Insufficient memory to store alphas: \n\n%s", e.what());
82 int max_num_neighbors =
_pc._nla.getMaxNumNeighbors();
85 std::pow(2,sro.use_target_site_weights),
88 this->getNeighborLists()->getNumberOfTargets() : 1,
90 max_num_neighbors : 1,
95 }
catch(std::exception &e) {
96 printf(
"Insufficient memory to store prestencil weights: \n\n%s", e.what());
127 int team_scratch_size_a = 0;
130 int team_scratch_size_b = 0;
131 int thread_scratch_size_a = 0;
132 int thread_scratch_size_b = 0;
150 const int max_manifold_NP = (manifold_NP >
_NP) ? manifold_NP :
_NP;
159 team_scratch_size_b += scratch_vector_type::shmem_size( (
_dimensions-1)*max_num_neighbors );
161 thread_scratch_size_a += scratch_vector_type::shmem_size(this_num_cols);
162 thread_scratch_size_a += scratch_vector_type::shmem_size(
165 team_scratch_size_b += scratch_vector_type::shmem_size(max_num_neighbors);
166 team_scratch_size_b += scratch_vector_type::shmem_size(max_num_neighbors);
173 Kokkos::deep_copy(
_T, 0.0);
176 _pc._target_coordinates.extent(0)*manifold_NP);
185 thread_scratch_size_a += scratch_vector_type::shmem_size(this_num_cols);
186 thread_scratch_size_a += scratch_vector_type::shmem_size(
190 _pm.setTeamScratchSize(0, team_scratch_size_a);
191 _pm.setTeamScratchSize(1, team_scratch_size_b);
192 _pm.setThreadScratchSize(0, thread_scratch_size_a);
193 _pm.setThreadScratchSize(1, thread_scratch_size_b);
199 int RHS_dim_0, RHS_dim_1;
202 int P_dim_0, P_dim_1;
213 _w = Kokkos::View<double*>(
"w", max_batch_size*
TO_GLOBAL(max_num_rows));
214 _Z = Kokkos::View<double*>(
"Z", max_batch_size*
TO_GLOBAL(
_d_ss._total_alpha_values*
_d_ss._max_evaluation_sites_per_target*this_num_cols));
216 }
catch (std::exception &e) {
217 printf(
"Failed to allocate space for RHS, P, and w. Consider increasing number_of_batches: \n\n%s", e.what());
228 &&
"Normal vectors are required for solving GMLS problems with the NEUMANN_GRAD_SCALAR constraint.");
232 for (
int batch_num=0; batch_num<number_of_batches; ++batch_num) {
235 Kokkos::deep_copy(
_RHS, 0.0);
236 Kokkos::deep_copy(
_P, 0.0);
237 Kokkos::deep_copy(
_w, 0.0);
238 Kokkos::deep_copy(
_Z, 0.0);
245 auto tp =
_pm.TeamPolicyThreadsAndVectors(this_batch_size,
_pm._default_threads, 1);
261 Kokkos::parallel_for(
"ComputeCoarseTangentPlane", tp, functor_compute_coarse_tangent_plane);
269 Kokkos::parallel_for(
"FixTangentDirectionOrdering", tp, functor_fix_tangent_direction_ordering);
274 Kokkos::parallel_for(
"AssembleCurvaturePsqrtW", tp, functor_assemble_curvature_psqrtw);
278 Kokkos::Profiling::pushRegion(
"Curvature LU Factorization");
282 GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(
_pm,
_RHS.data(), RHS_dim_0, RHS_dim_1,
_P.data(), P_dim_1, P_dim_0, manifold_NP, manifold_NP, max_num_neighbors, this_batch_size, implicit_RHS);
283 Kokkos::Profiling::popRegion();
286 Kokkos::Profiling::pushRegion(
"Curvature QR+Pivoting Factorization");
287 GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(
_pm,
_P.data(), P_dim_0, P_dim_1,
_RHS.data(), RHS_dim_0, RHS_dim_1, max_num_neighbors, manifold_NP, max_num_neighbors, this_batch_size, implicit_RHS);
288 Kokkos::Profiling::popRegion();
293 Kokkos::parallel_for(
"GetAccurateTangentDirections", tp, functor_get_accurate_tangent_directions);
296 Kokkos::deep_copy(
_P, 0.0);
298 if (batch_num==number_of_batches-1) {
300 _host_T = Kokkos::create_mirror_view(
_T);
308 Kokkos::parallel_for(
"AssembleCurvaturePsqrtW", tp, functor_assemble_curvature_psqrtw);
312 Kokkos::Profiling::pushRegion(
"Curvature LU Factorization");
313 GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(
_pm,
_RHS.data(), RHS_dim_0, RHS_dim_1,
_P.data(), P_dim_1, P_dim_0, manifold_NP, manifold_NP, max_num_neighbors, this_batch_size, implicit_RHS);
314 Kokkos::Profiling::popRegion();
317 Kokkos::Profiling::pushRegion(
"Curvature QR+Pivoting Factorization");
318 GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(
_pm,
_P.data(), P_dim_0, P_dim_1,
_RHS.data(), RHS_dim_0, RHS_dim_1, max_num_neighbors, manifold_NP, max_num_neighbors, this_batch_size, implicit_RHS);
319 Kokkos::Profiling::popRegion();
324 Kokkos::parallel_for(
"ApplyCurvatureTargets", tp, functor_apply_curvature_targets);
332 Kokkos::parallel_for(
"ComputePrestencilWeights", tp, functor_compute_prestencil_weights);
335 Kokkos::deep_copy(
_P, 0.0);
339 Kokkos::parallel_for(
"AssembleManifoldPsqrtW", tp, functor_assemble_manifold_psqrtw);
343 Kokkos::Profiling::pushRegion(
"Manifold LU Factorization");
344 GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(
_pm,
_RHS.data(), RHS_dim_0, RHS_dim_1,
_P.data(), P_dim_1, P_dim_0, this_num_cols, this_num_cols, max_num_rows, this_batch_size, implicit_RHS);
345 Kokkos::Profiling::popRegion();
347 Kokkos::Profiling::pushRegion(
"Manifold QR+Pivoting Factorization");
348 GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(
_pm,
_P.data(), P_dim_0, P_dim_1,
_RHS.data(), RHS_dim_0, RHS_dim_1, max_num_rows, this_num_cols, max_num_rows, this_batch_size, implicit_RHS);
349 Kokkos::Profiling::popRegion();
362 Kokkos::parallel_for(
"AssembleStandardPsqrtW", tp, functor_assemble_standard_psqrtw);
367 Kokkos::Profiling::pushRegion(
"LU Factorization");
368 GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(
_pm,
_RHS.data(), RHS_dim_0, RHS_dim_1,
_P.data(), P_dim_1, P_dim_0, this_num_cols + added_coeff_size, this_num_cols + added_coeff_size, max_num_rows +
_d_ss._added_alpha_size, this_batch_size, implicit_RHS);
369 Kokkos::Profiling::popRegion();
371 Kokkos::Profiling::pushRegion(
"QR+Pivoting Factorization");
373 GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(
_pm,
_RHS.data(), RHS_dim_0, RHS_dim_1,
_P.data(), P_dim_1, P_dim_0, this_num_cols + added_coeff_size, this_num_cols + added_coeff_size, max_num_rows +
_d_ss._added_alpha_size, this_batch_size, implicit_RHS);
375 GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(
_pm,
_P.data(), P_dim_0, P_dim_1,
_RHS.data(), RHS_dim_0, RHS_dim_1, max_num_rows, this_num_cols, max_num_rows, this_batch_size, implicit_RHS);
377 Kokkos::Profiling::popRegion();
381 Kokkos::parallel_for(
"ComputePrestencilWeights", tp, functor_compute_prestencil_weights);
398 Kokkos::parallel_for(
"EvaluateManifoldTargets", tp, functor_evaluate_manifold_targets);
408 Kokkos::parallel_for(
"EvaluateStandardTargets", tp, functor_evaluate_standard_targets);
415 const auto work_item_property = Kokkos::Experimental::WorkItemProperty::HintLightWeight;
416 const auto tp2 = Kokkos::Experimental::require(tp, work_item_property);
417 auto functor_apply_targets =
ApplyTargets(gmls_solution_data);
419 Kokkos::parallel_for(
"ApplyTargets", tp2, functor_apply_targets);
428 _w = Kokkos::View<double*>(
"w",0);
429 _Z = Kokkos::View<double*>(
"Z",0);
430 if (number_of_batches > 1) {
431 _RHS = Kokkos::View<double*>(
"RHS",0);
432 _P = Kokkos::View<double*>(
"P",0);
436 _RHS = Kokkos::View<double*>(
"RHS", 0);
437 if (!keep_coefficients)
_P = Kokkos::View<double*>(
"P", 0);
440 _P = Kokkos::View<double*>(
"P", 0);
441 if (!keep_coefficients)
_RHS = Kokkos::View<double*>(
"RHS", 0);
443 _RHS = Kokkos::View<double*>(
"RHS", 0);
444 if (!keep_coefficients)
_P = Kokkos::View<double*>(
"P", 0);
455 this->
_d_ss._contains_valid_alphas =
true;
473 data._sampling_multiplier = gmls._sampling_multiplier;
474 data._initial_index_for_batch = gmls._initial_index_for_batch;
475 data._d_ss = gmls._d_ss;
478 const int max_num_neighbors = gmls._pc._nla.getMaxNumNeighbors();
479 const int max_num_rows = gmls._sampling_multiplier*max_num_neighbors;
487 data.this_num_cols = gmls._basis_multiplier*gmls._NP;
489 int RHS_dim_0, RHS_dim_1;
490 getRHSDims(gmls._dense_solver_type, gmls._constraint_type, gmls._reconstruction_space,
491 gmls._dimensions, max_num_rows, data.this_num_cols, RHS_dim_0, RHS_dim_1);
492 int P_dim_0, P_dim_1;
493 getPDims(gmls._dense_solver_type, gmls._constraint_type, gmls._reconstruction_space,
494 gmls._dimensions, max_num_rows, data.this_num_cols, P_dim_0, P_dim_1);
497 data.Coeffs_data = gmls._RHS.data();
498 data.Coeffs_dim_0 = RHS_dim_0;
499 data.Coeffs_dim_1 = RHS_dim_1;
501 data.Coeffs_data = gmls._P.data();
502 data.Coeffs_dim_0 = P_dim_1;
503 data.Coeffs_dim_1 = P_dim_0;
506 data.P_target_row_dim_0 = gmls._d_ss._total_alpha_values*gmls._d_ss._max_evaluation_sites_per_target;
507 data.P_target_row_dim_1 = data.this_num_cols;
508 data.P_target_row_data = gmls._Z.data();
510 data.operations_size = gmls._operations.size();
512 data.number_of_neighbors_list = gmls._pc._nla._number_of_neighbors_list;
513 data.additional_number_of_neighbors_list = gmls._additional_pc._nla._number_of_neighbors_list;
521 data._source_extra_data = gmls._source_extra_data;
522 data._target_extra_data = gmls._target_extra_data;
524 data._epsilons = gmls._epsilons ;
525 data._prestencil_weights = gmls._prestencil_weights ;
526 data._additional_pc = gmls._additional_pc;
527 data._poly_order = gmls._poly_order ;
528 data._curvature_poly_order = gmls._curvature_poly_order;
530 data._global_dimensions = gmls._global_dimensions;
531 data._local_dimensions = gmls._local_dimensions;
532 data._dimensions = gmls._dimensions;
533 data._reconstruction_space = gmls._reconstruction_space;
534 data._reconstruction_space_rank = gmls._reconstruction_space_rank;
535 data._dense_solver_type = gmls._dense_solver_type;
536 data._problem_type = gmls._problem_type;
537 data._constraint_type = gmls._constraint_type;
538 data._polynomial_sampling_functional = gmls._polynomial_sampling_functional;
539 data._data_sampling_functional = gmls._data_sampling_functional;
540 data._curvature_support_operations = gmls._curvature_support_operations;
541 data._operations = gmls._operations;
542 data._weighting_type = gmls._weighting_type;
543 data._curvature_weighting_type = gmls._curvature_weighting_type;
544 data._weighting_p = gmls._weighting_p;
545 data._weighting_n = gmls._weighting_n;
546 data._curvature_weighting_p = gmls._curvature_weighting_p;
547 data._curvature_weighting_n = gmls._curvature_weighting_n;
548 data._basis_multiplier = gmls._basis_multiplier;
549 data._sampling_multiplier = gmls._sampling_multiplier;
550 data._data_sampling_multiplier = gmls._data_sampling_multiplier;
551 data._initial_index_for_batch = gmls._initial_index_for_batch;
553 data._order_of_quadrature_points = gmls._order_of_quadrature_points;
554 data._dimension_of_quadrature_points = gmls._dimension_of_quadrature_points;
556 data._d_ss = gmls._d_ss;
558 data.max_num_neighbors = gmls._pc._nla.getMaxNumNeighbors();
559 data.max_num_rows = gmls._sampling_multiplier*data.max_num_neighbors;
560 int basis_powers_space_multiplier = (gmls._reconstruction_space ==
BernsteinPolynomial) ? 2 : 1;
562 data.manifold_NP =
GMLS::getNP(gmls._curvature_poly_order, gmls._dimensions-1,
564 data.max_manifold_NP = (data.manifold_NP > gmls._NP) ? data.manifold_NP : gmls._NP;
565 data.this_num_cols = gmls._basis_multiplier*data.max_manifold_NP;
566 data.max_poly_order = (gmls._poly_order > gmls._curvature_poly_order) ?
567 gmls._poly_order : gmls._curvature_poly_order;
569 data.ref_N_data = gmls._ref_N.data();
570 data.ref_N_dim = gmls._dimensions;
572 data.thread_workspace_dim = (data.max_poly_order+1)*gmls._global_dimensions*basis_powers_space_multiplier;
573 data.manifold_gradient_dim = (gmls._dimensions-1)*data.max_num_neighbors;
575 data.manifold_curvature_coefficients_data = gmls._manifold_curvature_coefficients.data();
578 data.manifold_NP = 0;
579 data.this_num_cols = gmls._basis_multiplier*gmls._NP;
580 data.thread_workspace_dim = (gmls._poly_order+1)*gmls._global_dimensions*basis_powers_space_multiplier;
581 data.manifold_gradient_dim = 0;
585 data.T_data = gmls._T.data();
587 data.P_target_row_dim_0 = gmls._d_ss._total_alpha_values*gmls._d_ss._max_evaluation_sites_per_target;
588 data.P_target_row_dim_1 = data.this_num_cols;
589 data.P_target_row_data = gmls._Z.data();
591 data.RHS_data = gmls._RHS.data();
592 getRHSDims(gmls._dense_solver_type, gmls._constraint_type, gmls._reconstruction_space,
593 gmls._dimensions, data.max_num_rows, data.this_num_cols, data.RHS_dim_0, data.RHS_dim_1);
595 data.P_data = gmls._P.data();
596 getPDims(gmls._dense_solver_type, gmls._constraint_type, gmls._reconstruction_space,
597 gmls._dimensions, data.max_num_rows, data.this_num_cols, data.P_dim_0, data.P_dim_1);
599 data.w_data = gmls._w.data();
602 data.Coeffs_data = gmls._RHS.data();
603 data.Coeffs_dim_0 = data.RHS_dim_0;
604 data.Coeffs_dim_1 = data.RHS_dim_1;
606 data.Coeffs_data = gmls._P.data();
607 data.Coeffs_dim_0 = data.P_dim_1;
608 data.Coeffs_dim_1 = data.P_dim_0;