73 const Teuchos::ParameterList& p)
75 std::string scatterName = p.get<std::string>(
"Scatter Name");
76 Teuchos::RCP<PHX::FieldTag> scatterHolder =
77 Teuchos::rcp(
new PHX::Tag<ScalarT>(scatterName,Teuchos::rcp(
new PHX::MDALayout<Dummy>(0))));
80 const std::vector<std::string>& names =
81 *(p.get< Teuchos::RCP< std::vector<std::string> > >(
"Dependent Names"));
83 Teuchos::RCP<PHX::DataLayout> dl =
84 p.get< Teuchos::RCP<const panzer::PureBasis> >(
"Basis")->functional;
87 for (std::size_t eq = 0; eq < names.size(); ++eq) {
88 PHX::MDField<const ScalarT,Cell,NODE> field = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
91 this->addDependentField(field.fieldTag());
95 this->addEvaluatedField(*scatterHolder);
97 this->setName(scatterName+
" Scatter Residual");
205 using Teuchos::rcp_dynamic_cast;
209 const auto& localCellIds = this->
wda(workset).cell_local_ids_k;
210 const RCP<ProductVectorBase<double>> thyraBlockResidual = rcp_dynamic_cast<ProductVectorBase<double> >(
blockedContainer_->get_f(),
true);
213 int currentWorksetLIDSubBlock = -1;
214 for (std::size_t fieldIndex = 0; fieldIndex <
scatterFields_.size(); fieldIndex++) {
221 auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(
productVectorBlockIndex_[fieldIndex]),
true))->getTpetraVector());
222 const auto& kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::OverwriteAll);
227 const auto& fieldValues =
scatterFields_[fieldIndex].get_static_view();
229 Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (
const int& cell) {
230 for(
int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
231 const int lid = worksetLIDs(cell,fieldOffsets(basis));
232 Kokkos::atomic_add(&kokkosResidual(lid,0), fieldValues(cell,basis));
287 const Workset & workset_0 = (*d.worksets_)[0];
288 const std::string blockId = this->
wda(workset_0).block_id;
295 const int globalFieldNum =
globalIndexer_->getFieldNum(fieldName);
298 fieldIds_[fd] = fieldGlobalIndexer->getFieldNum(fieldName);
300 const std::vector<int>& offsets =
globalIndexer_->getGIDFieldOffsets(blockId,globalFieldNum);
301 fieldOffsets_[fd] = PHX::View<int*>(
"ScatterResidual_BlockedTpetra(Jacobian):fieldOffsets",offsets.size());
302 auto hostOffsets = Kokkos::create_mirror_view(
fieldOffsets_[fd]);
303 for (std::size_t i=0; i < offsets.size(); ++i)
304 hostOffsets(i) = offsets[i];
311 int elementBlockGIDCount = 0;
312 for (
const auto& blockDOFMgr :
globalIndexer_->getFieldDOFManagers())
313 elementBlockGIDCount += blockDOFMgr->getElementBlockGIDCount(blockId);
315 worksetLIDs_ = Kokkos::View<LO**, Kokkos::LayoutRight, PHX::Device>(
316 "ScatterResidual_BlockedTpetra(Jacobian):worksetLIDs_",
320 const auto& blockGlobalIndexers =
globalIndexer_->getFieldDOFManagers();
321 const int numBlocks =
static_cast<int>(
globalIndexer_->getFieldDOFManagers().size());
322 blockOffsets_ = PHX::View<LO*>(
"ScatterResidual_BlockedTpetra(Jacobian):blockOffsets_",
324 const auto hostBlockOffsets = Kokkos::create_mirror_view(
blockOffsets_);
325 for (
int blk=0;blk<numBlocks;blk++) {
327 hostBlockOffsets(blk) = blockOffset;
329 hostBlockOffsets(numBlocks) = hostBlockOffsets(numBlocks-1) + blockGlobalIndexers[blockGlobalIndexers.size()-1]->getElementBlockGIDCount(blockId);
334 int max_blockDerivativeSize = 0;
335 for (
int blk=0;blk<numBlocks;blk++) {
336 const int blockDerivativeSize = hostBlockOffsets(blk+1) - hostBlockOffsets(blk);
337 if ( blockDerivativeSize > max_blockDerivativeSize )
338 max_blockDerivativeSize = blockDerivativeSize;
340 workset_vals_ = Kokkos::View<typename Sacado::ScalarType<ScalarT>::type**, Kokkos::LayoutRight, PHX::Device>(
341 "ScatterResidual_BlockedTpetra(Jacobian):workset_vals_",
368 using Teuchos::rcp_dynamic_cast;
373 const auto& localCellIds = this->
wda(workset).cell_local_ids_k;
377 const RCP<ProductVectorBase<double>> thyraBlockResidual = rcp_dynamic_cast<ProductVectorBase<double> >(
blockedContainer_->get_f());
378 const bool haveResidual = Teuchos::nonnull(thyraBlockResidual);
379 const RCP<BlockedLinearOpBase<double>> Jac = rcp_dynamic_cast<BlockedLinearOpBase<double> >(
blockedContainer_->get_A(),
true);
385 using LocalMatrixType = KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>,
size_t>;
386 typename PHX::View<LocalMatrixType**>::HostMirror
387 hostJacTpetraBlocks(
"panzer::ScatterResidual_BlockTpetra<Jacobian>::hostJacTpetraBlocks", numFieldBlocks,numFieldBlocks);
389 PHX::View<int**> blockExistsInJac = PHX::View<int**>(
"blockExistsInJac_",numFieldBlocks,numFieldBlocks);
390 auto hostBlockExistsInJac = Kokkos::create_mirror_view(blockExistsInJac);
392 for (
int row=0; row < numFieldBlocks; ++row) {
393 for (
int col=0; col < numFieldBlocks; ++col) {
394 const auto thyraTpetraOperator = rcp_dynamic_cast<Thyra::TpetraLinearOp<double,LO,GO,NodeT>>(Jac->getNonconstBlock(row,col),
false);
395 if (nonnull(thyraTpetraOperator)) {
411 const auto tpetraCrsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),
true);
412 const auto managedMatrix = tpetraCrsMatrix->getLocalMatrixDevice();
413 const auto managedGraph = managedMatrix.graph;
416 using StaticCrsGraphType =
typename LocalMatrixType::StaticCrsGraphType;
417 StaticCrsGraphType unmanagedGraph;
418 unmanagedGraph.entries =
typename StaticCrsGraphType::entries_type(managedGraph.entries.data(),managedGraph.entries.extent(0));
419 unmanagedGraph.row_map =
typename StaticCrsGraphType::row_map_type(managedGraph.row_map.data(),managedGraph.row_map.extent(0));
420 unmanagedGraph.row_block_offsets =
typename StaticCrsGraphType::row_block_type(managedGraph.row_block_offsets.data(),managedGraph.row_block_offsets.extent(0));
422 typename LocalMatrixType::values_type unmanagedValues(managedMatrix.values.data(),managedMatrix.values.extent(0));
423 LocalMatrixType unmanagedMatrix(managedMatrix.values.label(), managedMatrix.numCols(), unmanagedValues, unmanagedGraph);
424 new (&hostJacTpetraBlocks(row,col)) LocalMatrixType(unmanagedMatrix);
427 hostBlockExistsInJac(row,col) = 1;
430 hostBlockExistsInJac(row,col) = 0;
434 typename PHX::View<LocalMatrixType**>
435 jacTpetraBlocks(
"panzer::ScatterResidual_BlockedTpetra<Jacobian>::jacTpetraBlocks",numFieldBlocks,numFieldBlocks);
436 Kokkos::deep_copy(jacTpetraBlocks,hostJacTpetraBlocks);
437 Kokkos::deep_copy(blockExistsInJac,hostBlockExistsInJac);
444 const auto& globalIndexers =
globalIndexer_->getFieldDOFManagers();
445 auto blockOffsets_h = Kokkos::create_mirror_view(
blockOffsets_);
447 for (
size_t block=0; block < globalIndexers.size(); ++block) {
448 const auto subviewOfBlockLIDs = Kokkos::subview(
worksetLIDs_,Kokkos::ALL(), std::make_pair(blockOffsets_h(block),blockOffsets_h(block+1)));
449 globalIndexers[block]->getElementLIDs(localCellIds,subviewOfBlockLIDs);
453 for (std::size_t fieldIndex = 0; fieldIndex <
scatterFields_.size(); fieldIndex++) {
456 typename Tpetra::Vector<double,LO,GO,PHX::Device>::dual_view_type::t_dev kokkosResidual;
458 auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(blockRowIndex),
true))->getTpetraVector());
459 kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::OverwriteAll);
463 const PHX::View<const int*> fieldOffsets =
fieldOffsets_[fieldIndex];
464 const PHX::View<const ScalarT**> fieldValues =
scatterFields_[fieldIndex].get_static_view();
467 const Kokkos::View<const LO**, Kokkos::LayoutRight, PHX::Device> worksetLIDs =
worksetLIDs_;
468 Kokkos::View<typename Sacado::ScalarType<ScalarT>::type**, Kokkos::LayoutRight, PHX::Device> workset_vals =
workset_vals_;
470 Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (
const int& cell) {
471 for(
int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
472 typedef PHX::MDField<const ScalarT,Cell,NODE>
FieldType;
473 typename FieldType::array_type::reference_type tmpFieldVal = fieldValues(cell,basis);
474 const int rowLID = worksetLIDs(cell,fieldOffsets(basis));
477 Kokkos::atomic_add(&kokkosResidual(rowLID,0), tmpFieldVal.val());
479 for (
int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
480 if (blockExistsInJac(blockRowIndex,blockColIndex)) {
481 const int start = blockOffsets(blockColIndex);
482 const int stop = blockOffsets(blockColIndex+1);
483 const int sensSize = stop-start;
485 for (
int i=0; i < sensSize; ++i)
486 workset_vals(cell,i) = tmpFieldVal.fastAccessDx(start+i);
488 jacTpetraBlocks(blockRowIndex,blockColIndex).sumIntoValues(rowLID, &worksetLIDs(cell,start), sensSize, &workset_vals(cell,0),
true,
true);
497 for (
int row=0; row < numFieldBlocks; ++row) {
498 for (
int col=0; col < numFieldBlocks; ++col) {
499 if (hostBlockExistsInJac(row,col)) {
500 hostJacTpetraBlocks(row,col).~CrsMatrix();