437 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
438 typedef Xpetra::MultiVectorFactory<coordinate_type,LO,GO,NO> RealValuedMultiVectorFactory;
440 std::string nspName =
"Nullspace";
441 if(pL.isParameter(
"Nullspace name")) nspName = pL.get<std::string>(
"Nullspace name");
448 RCP<RealValuedMultiVector> fineCoords;
453 RCP<Matrix> Ptentative;
456 if ( aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
457 Ptentative = Teuchos::null;
458 Set(coarseLevel,
"P", Ptentative);
461 RCP<MultiVector> coarseNullspace;
462 RCP<RealValuedMultiVector> coarseCoords;
465 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
466 GO indexBase = coarseMap->getIndexBase();
469 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null)
470 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
472 Array<GO> elementList;
473 ArrayView<const GO> elementListView;
477 elementListView = elementAList;
480 auto numElements = elementAList.size() / blkSize;
482 elementList.resize(numElements);
485 for (LO i = 0; i < Teuchos::as<LO>(numElements); i++)
486 elementList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase;
488 elementListView = elementList;
491 auto uniqueMap = fineCoords->getMap();
492 auto coarseCoordMap = MapFactory::Build(coarseMap->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
493 elementListView, indexBase, coarseMap->getComm());
494 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordMap, fineCoords->getNumVectors());
497 RCP<RealValuedMultiVector> ghostedCoords = fineCoords;
498 if (aggregates->AggregatesCrossProcessors()) {
499 auto nonUniqueMap = aggregates->GetMap();
500 auto importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
502 ghostedCoords = RealValuedMultiVectorFactory::Build(nonUniqueMap, fineCoords->getNumVectors());
503 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
508 auto aggGraph = aggregates->GetGraph();
509 auto numAggs = aggGraph.numRows();
511 auto fineCoordsView = fineCoords ->getDeviceLocalView(Xpetra::Access::ReadOnly);
512 auto coarseCoordsView = coarseCoords->getDeviceLocalView(Xpetra::Access::OverwriteAll);
518 const auto dim = fineCoords->getNumVectors();
520 typename AppendTrait<
decltype(fineCoordsView), Kokkos::RandomAccess>::type fineCoordsRandomView = fineCoordsView;
521 for (
size_t j = 0; j < dim; j++) {
522 Kokkos::parallel_for(
"MueLu::TentativeP::BuildCoords", Kokkos::RangePolicy<local_ordinal_type, execution_space>(0, numAggs),
523 KOKKOS_LAMBDA(
const LO i) {
527 auto aggregate = aggGraph.rowConst(i);
529 coordinate_type sum = 0.0;
530 for (
size_t colID = 0; colID < static_cast<size_t>(aggregate.length); colID++)
531 sum += fineCoordsRandomView(aggregate(colID),j);
533 coarseCoordsView(i,j) = sum / aggregate.length;
539 if (!aggregates->AggregatesCrossProcessors()) {
540 if(Xpetra::Helpers<SC,LO,GO,NO>::isTpetraBlockCrs(A)) {
541 BuildPuncoupledBlockCrs(coarseLevel,A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,
545 BuildPuncoupled(coarseLevel, A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.
GetLevelID());
549 BuildPcoupled (A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
559 if (A->IsView(
"stridedMaps") ==
true)
560 Ptentative->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), coarseMap);
562 Ptentative->CreateView(
"stridedMaps", Ptentative->getRangeMap(), coarseMap);
565 Set(coarseLevel,
"Coordinates", coarseCoords);
567 Set(coarseLevel,
"Nullspace", coarseNullspace);
568 Set(coarseLevel,
"P", Ptentative);
571 RCP<ParameterList> params = rcp(
new ParameterList());
572 params->set(
"printLoadBalancingInfo",
true);
579 BuildPuncoupled(
Level& coarseLevel, RCP<Matrix> A, RCP<Aggregates> aggregates,
580 RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
581 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative,
582 RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
583 auto rowMap = A->getRowMap();
584 auto colMap = A->getColMap();
586 const size_t numRows = rowMap->getLocalNumElements();
587 const size_t NSDim = fineNullspace->getNumVectors();
589 typedef Kokkos::ArithTraits<SC> ATS;
590 using impl_SC =
typename ATS::val_type;
591 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
592 const impl_SC zero = impl_ATS::zero(), one = impl_ATS::one();
594 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
596 typename Aggregates::local_graph_type aggGraph;
599 aggGraph = aggregates->GetGraph();
601 auto aggRows = aggGraph.row_map;
602 auto aggCols = aggGraph.entries;
614 "MueLu: TentativePFactory_kokkos: for now works only with good maps "
615 "(i.e. \"matching\" row and column maps)");
624 LO fullBlockSize, blockID, stridingOffset, stridedBlockSize;
626 amalgInfo->GetStridingInformation(fullBlockSize, blockID, stridingOffset, stridedBlockSize, indexBase);
627 GO globalOffset = amalgInfo->GlobalOffset();
630 auto procWinner = aggregates->GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadOnly);
631 auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadOnly);
632 const size_t numAggregates = aggregates->GetNumAggregates();
634 int myPID = aggregates->GetMap()->getComm()->getRank();
639 typedef typename Aggregates::aggregates_sizes_type::non_const_type AggSizeType;
640 AggSizeType aggDofSizes;
642 if (stridedBlockSize == 1) {
646 aggDofSizes = AggSizeType(
"agg_dof_sizes", numAggregates+1);
648 auto sizesConst = aggregates->ComputeAggregateSizes();
649 Kokkos::deep_copy(Kokkos::subview(aggDofSizes, Kokkos::make_pair(
static_cast<size_t>(1), numAggregates+1)), sizesConst);
655 aggDofSizes = AggSizeType(
"agg_dof_sizes", numAggregates + 1);
657 auto nodeMap = aggregates->GetMap()->getLocalMap();
658 auto dofMap = colMap->getLocalMap();
660 Kokkos::parallel_for(
"MueLu:TentativePF:Build:compute_agg_sizes",
range_type(0,numAggregates),
661 KOKKOS_LAMBDA(
const LO agg) {
662 auto aggRowView = aggGraph.rowConst(agg);
665 for (LO colID = 0; colID < aggRowView.length; colID++) {
666 GO nodeGID = nodeMap.getGlobalElement(aggRowView(colID));
668 for (LO k = 0; k < stridedBlockSize; k++) {
669 GO dofGID = (nodeGID - indexBase) * fullBlockSize + k + indexBase + globalOffset + stridingOffset;
671 if (dofMap.getLocalElement(dofGID) != INVALID)
675 aggDofSizes(agg+1) = size;
682 ReduceMaxFunctor<LO,
decltype(aggDofSizes)> reduceMax(aggDofSizes);
683 Kokkos::parallel_reduce(
"MueLu:TentativePF:Build:max_agg_size",
range_type(0, aggDofSizes.extent(0)), reduceMax, maxAggSize);
687 Kokkos::parallel_scan(
"MueLu:TentativePF:Build:aggregate_sizes:stage1_scan",
range_type(0,numAggregates+1),
688 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
689 update += aggDofSizes(i);
691 aggDofSizes(i) = update;
696 Kokkos::View<LO*, DeviceType> agg2RowMapLO(Kokkos::ViewAllocateWithoutInitializing(
"agg2row_map_LO"), numRows);
700 AggSizeType aggOffsets(Kokkos::ViewAllocateWithoutInitializing(
"aggOffsets"), numAggregates);
701 Kokkos::deep_copy(aggOffsets, Kokkos::subview(aggDofSizes, Kokkos::make_pair(
static_cast<size_t>(0), numAggregates)));
703 Kokkos::parallel_for(
"MueLu:TentativePF:Build:createAgg2RowMap",
range_type(0, vertex2AggId.extent(0)),
704 KOKKOS_LAMBDA(
const LO lnode) {
705 if (procWinner(lnode, 0) == myPID) {
707 auto aggID = vertex2AggId(lnode,0);
709 auto offset = Kokkos::atomic_fetch_add( &aggOffsets(aggID), stridedBlockSize );
713 for (LO k = 0; k < stridedBlockSize; k++)
714 agg2RowMapLO(offset + k) = lnode*stridedBlockSize + k;
721 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
724 auto fineNS = fineNullspace ->getDeviceLocalView(Xpetra::Access::ReadWrite);
725 auto coarseNS = coarseNullspace->getDeviceLocalView(Xpetra::Access::OverwriteAll);
729 typedef typename Xpetra::Matrix<SC,LO,GO,NO>::local_matrix_type local_matrix_type;
730 typedef typename local_matrix_type::row_map_type::non_const_type rows_type;
731 typedef typename local_matrix_type::index_type::non_const_type cols_type;
732 typedef typename local_matrix_type::values_type::non_const_type vals_type;
736 typedef Kokkos::View<int[10], DeviceType> status_type;
737 status_type status(
"status");
739 typename AppendTrait<
decltype(fineNS), Kokkos::RandomAccess>::type fineNSRandom = fineNS;
743 const bool& doQRStep = pL.get<
bool>(
"tentative: calculate qr");
747 GetOStream(
Warnings0) <<
"TentativePFactor : for nontrivial nullspace, this may degrade performance" << std::endl;
750 size_t nnzEstimate = numRows * NSDim;
751 rows_type rowsAux(Kokkos::ViewAllocateWithoutInitializing(
"Ptent_aux_rows"), numRows+1);
752 cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing(
"Ptent_aux_cols"), nnzEstimate);
753 vals_type valsAux(
"Ptent_aux_vals", nnzEstimate);
754 rows_type rows(
"Ptent_rows", numRows+1);
761 Kokkos::parallel_for(
"MueLu:TentativePF:BuildPuncoupled:for1",
range_type(0, numRows+1),
762 KOKKOS_LAMBDA(
const LO row) {
763 rowsAux(row) = row*NSDim;
765 Kokkos::parallel_for(
"MueLu:TentativePF:BuildUncoupled:for2",
range_type(0, nnzEstimate),
766 KOKKOS_LAMBDA(
const LO j) {
767 colsAux(j) = INVALID;
783 const Kokkos::TeamPolicy<execution_space> policy(numAggregates, 1);
786 Kokkos::parallel_for(
"MueLu:TentativePF:BuildUncoupled:main_loop", policy,
787 KOKKOS_LAMBDA(
const typename Kokkos::TeamPolicy<execution_space>::member_type &thread) {
788 auto agg = thread.league_rank();
791 LO aggSize = aggRows(agg+1) - aggRows(agg);
796 auto norm = impl_ATS::magnitude(zero);
801 for (
decltype(aggSize) k = 0; k < aggSize; k++) {
802 auto dnorm = impl_ATS::magnitude(fineNSRandom(agg2RowMapLO(aggRows(agg)+k),0));
809 statusAtomic(1) =
true;
814 coarseNS(agg, 0) = norm;
817 for (
decltype(aggSize) k = 0; k < aggSize; k++) {
818 LO localRow = agg2RowMapLO(aggRows(agg)+k);
819 impl_SC localVal = fineNSRandom(agg2RowMapLO(aggRows(agg)+k),0) / norm;
821 rows(localRow+1) = 1;
822 colsAux(localRow) = agg;
823 valsAux(localRow) = localVal;
828 typename status_type::HostMirror statusHost = Kokkos::create_mirror_view(status);
829 Kokkos::deep_copy(statusHost, status);
830 for (
decltype(statusHost.size()) i = 0; i < statusHost.size(); i++)
832 std::ostringstream oss;
833 oss <<
"MueLu::TentativePFactory::MakeTentative: ";
835 case 0: oss <<
"!goodMap is not implemented";
break;
836 case 1: oss <<
"fine level NS part has a zero column";
break;
842 Kokkos::parallel_for(
"MueLu:TentativePF:BuildUncoupled:main_loop_noqr", policy,
843 KOKKOS_LAMBDA(
const typename Kokkos::TeamPolicy<execution_space>::member_type &thread) {
844 auto agg = thread.league_rank();
847 LO aggSize = aggRows(agg+1) - aggRows(agg);
850 coarseNS(agg, 0) = one;
853 for (
decltype(aggSize) k = 0; k < aggSize; k++) {
854 LO localRow = agg2RowMapLO(aggRows(agg)+k);
855 impl_SC localVal = fineNSRandom(agg2RowMapLO(aggRows(agg)+k),0);
857 rows(localRow+1) = 1;
858 colsAux(localRow) = agg;
859 valsAux(localRow) = localVal;
865 Kokkos::parallel_reduce(
"MueLu:TentativeP:CountNNZ",
range_type(0, numRows+1),
866 KOKKOS_LAMBDA(
const LO i,
size_t &nnz_count) {
867 nnz_count += rows(i);
884 const Kokkos::TeamPolicy<execution_space> policy(numAggregates,1);
886 decltype(aggDofSizes ),
decltype(maxAggSize),
decltype(agg2RowMapLO),
887 decltype(statusAtomic),
decltype(rows),
decltype(rowsAux),
decltype(colsAux),
889 localQRFunctor(fineNSRandom, coarseNS, aggDofSizes, maxAggSize, agg2RowMapLO, statusAtomic,
890 rows, rowsAux, colsAux, valsAux, doQRStep);
891 Kokkos::parallel_reduce(
"MueLu:TentativePF:BuildUncoupled:main_qr_loop", policy, localQRFunctor, nnz);
894 typename status_type::HostMirror statusHost = Kokkos::create_mirror_view(status);
895 Kokkos::deep_copy(statusHost, status);
896 for (
decltype(statusHost.size()) i = 0; i < statusHost.size(); i++)
898 std::ostringstream oss;
899 oss <<
"MueLu::TentativePFactory::MakeTentative: ";
901 case 0: oss <<
"!goodMap is not implemented";
break;
902 case 1: oss <<
"fine level NS part has a zero column";
break;
915 if (nnz != nnzEstimate) {
920 Kokkos::parallel_scan(
"MueLu:TentativePF:Build:compress_rows",
range_type(0,numRows+1),
921 KOKKOS_LAMBDA(
const LO i, LO& upd,
const bool&
final) {
931 cols = cols_type(
"Ptent_cols", nnz);
932 vals = vals_type(
"Ptent_vals", nnz);
937 Kokkos::parallel_for(
"MueLu:TentativePF:Build:compress_cols_vals",
range_type(0,numRows),
938 KOKKOS_LAMBDA(
const LO i) {
939 LO rowStart = rows(i);
942 for (
auto j = rowsAux(i); j < rowsAux(i+1); j++)
943 if (colsAux(j) != INVALID) {
944 cols(rowStart+lnnz) = colsAux(j);
945 vals(rowStart+lnnz) = valsAux(j);
957 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates do not cross process boundaries" << std::endl;
963 local_matrix_type lclMatrix = local_matrix_type(
"A", numRows, coarseMap->getLocalNumElements(), nnz, vals, rows, cols);
966 RCP<ParameterList> FCparams;
967 if (pL.isSublist(
"matrixmatrix: kernel params"))
968 FCparams = rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
970 FCparams = rcp(
new ParameterList);
973 FCparams->set(
"compute global constants", FCparams->get(
"compute global constants",
false));
974 FCparams->set(
"Timer Label", std::string(
"MueLu::TentativeP-") +
toString(levelID));
976 auto PtentCrs = CrsMatrixFactory::Build(lclMatrix, rowMap, coarseMap, coarseMap, A->getDomainMap());
977 Ptentative = rcp(
new CrsMatrixWrap(PtentCrs));
984 BuildPuncoupledBlockCrs(
Level& coarseLevel, RCP<Matrix> A, RCP<Aggregates> aggregates,
985 RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
986 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative,
987 RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
997 RCP<const Map> rowMap = A->getRowMap();
998 RCP<const Map> rangeMap = A->getRangeMap();
999 RCP<const Map> colMap = A->getColMap();
1001 const size_t numFineBlockRows = rowMap->getLocalNumElements();
1005 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1007 typedef Kokkos::ArithTraits<SC> ATS;
1008 using impl_SC =
typename ATS::val_type;
1009 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
1010 const impl_SC one = impl_ATS::one();
1013 const size_t NSDim = fineNullspace->getNumVectors();
1014 auto aggSizes = aggregates->ComputeAggregateSizes();
1017 typename Aggregates::local_graph_type aggGraph;
1020 aggGraph = aggregates->GetGraph();
1022 auto aggRows = aggGraph.row_map;
1023 auto aggCols = aggGraph.entries;
1030 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
1031 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
1032 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
1034 coarsePointMap->getIndexBase(),
1035 coarsePointMap->getComm());
1048 "MueLu: TentativePFactory_kokkos: for now works only with good maps "
1049 "(i.e. \"matching\" row and column maps)");
1058 LO fullBlockSize, blockID, stridingOffset, stridedBlockSize;
1060 amalgInfo->GetStridingInformation(fullBlockSize, blockID, stridingOffset, stridedBlockSize, indexBase);
1064 auto procWinner = aggregates->GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadOnly);
1065 auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadOnly);
1066 const size_t numAggregates = aggregates->GetNumAggregates();
1068 int myPID = aggregates->GetMap()->getComm()->getRank();
1073 typedef typename Aggregates::aggregates_sizes_type::non_const_type AggSizeType;
1074 AggSizeType aggDofSizes;
1080 aggDofSizes = AggSizeType(
"agg_dof_sizes", numAggregates+1);
1082 Kokkos::deep_copy(Kokkos::subview(aggDofSizes, Kokkos::make_pair(
static_cast<size_t>(1), numAggregates+1)), aggSizes);
1088 ReduceMaxFunctor<LO,
decltype(aggDofSizes)> reduceMax(aggDofSizes);
1089 Kokkos::parallel_reduce(
"MueLu:TentativePF:Build:max_agg_size",
range_type(0, aggDofSizes.extent(0)), reduceMax, maxAggSize);
1093 Kokkos::parallel_scan(
"MueLu:TentativePF:Build:aggregate_sizes:stage1_scan",
range_type(0,numAggregates+1),
1094 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
1095 update += aggDofSizes(i);
1097 aggDofSizes(i) = update;
1102 Kokkos::View<LO*, DeviceType> aggToRowMapLO(Kokkos::ViewAllocateWithoutInitializing(
"aggtorow_map_LO"), numFineBlockRows);
1106 AggSizeType aggOffsets(Kokkos::ViewAllocateWithoutInitializing(
"aggOffsets"), numAggregates);
1107 Kokkos::deep_copy(aggOffsets, Kokkos::subview(aggDofSizes, Kokkos::make_pair(
static_cast<size_t>(0), numAggregates)));
1109 Kokkos::parallel_for(
"MueLu:TentativePF:Build:createAgg2RowMap",
range_type(0, vertex2AggId.extent(0)),
1110 KOKKOS_LAMBDA(
const LO lnode) {
1111 if (procWinner(lnode, 0) == myPID) {
1113 auto aggID = vertex2AggId(lnode,0);
1115 auto offset = Kokkos::atomic_fetch_add( &aggOffsets(aggID), stridedBlockSize );
1119 for (LO k = 0; k < stridedBlockSize; k++)
1120 aggToRowMapLO(offset + k) = lnode*stridedBlockSize + k;
1127 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
1130 auto fineNS = fineNullspace ->getDeviceLocalView(Xpetra::Access::ReadWrite);
1131 auto coarseNS = coarseNullspace->getDeviceLocalView(Xpetra::Access::OverwriteAll);
1133 typedef typename Xpetra::Matrix<SC,LO,GO,NO>::local_matrix_type local_matrix_type;
1134 typedef typename local_matrix_type::row_map_type::non_const_type rows_type;
1135 typedef typename local_matrix_type::index_type::non_const_type cols_type;
1140 typedef Kokkos::View<int[10], DeviceType> status_type;
1141 status_type status(
"status");
1143 typename AppendTrait<
decltype(fineNS), Kokkos::RandomAccess>::type fineNSRandom = fineNS;
1153 rows_type ia(Kokkos::ViewAllocateWithoutInitializing(
"BlockGraph_rowptr"), numFineBlockRows+1);
1154 cols_type ja(Kokkos::ViewAllocateWithoutInitializing(
"BlockGraph_colind"), numFineBlockRows);
1156 Kokkos::parallel_for(
"MueLu:TentativePF:BlockCrs:graph_init",
range_type(0, numFineBlockRows),
1157 KOKKOS_LAMBDA(
const LO j) {
1161 if(j==(LO)numFineBlockRows-1)
1162 ia[numFineBlockRows] = numFineBlockRows;
1166 const Kokkos::TeamPolicy<execution_space> policy(numAggregates, 1);
1167 Kokkos::parallel_for(
"MueLu:TentativePF:BlockCrs:fillGraph", policy,
1168 KOKKOS_LAMBDA(
const typename Kokkos::TeamPolicy<execution_space>::member_type &thread) {
1169 auto agg = thread.league_rank();
1170 Xpetra::global_size_t offset = agg;
1173 LO aggSize = aggRows(agg+1) - aggRows(agg);
1175 for (LO j = 0; j < aggSize; j++) {
1177 const LO localRow = aggToRowMapLO[aggDofSizes[agg]+j];
1178 const size_t rowStart = ia[localRow];
1179 ja[rowStart] = offset;
1189 rows_type i_temp(Kokkos::ViewAllocateWithoutInitializing(
"BlockGraph_rowptr"), numFineBlockRows+1);
1191 Kokkos::parallel_scan(
"MueLu:TentativePF:BlockCrs:compress_rows",
range_type(0,numFineBlockRows),
1192 KOKKOS_LAMBDA(
const LO i, LO& upd,
const bool&
final) {
1195 for (
auto j = ia[i]; j < ia[i+1]; j++)
1196 if (ja[j] != INVALID)
1198 if(
final && i == (LO) numFineBlockRows-1)
1199 i_temp[numFineBlockRows] = upd;
1202 cols_type j_temp(Kokkos::ViewAllocateWithoutInitializing(
"BlockGraph_colind"), nnz);
1205 Kokkos::parallel_for(
"MueLu:TentativePF:BlockCrs:compress_cols",
range_type(0,numFineBlockRows),
1206 KOKKOS_LAMBDA(
const LO i) {
1207 size_t rowStart = i_temp[i];
1209 for (
auto j = ia[i]; j < ia[i+1]; j++)
1210 if (ja[j] != INVALID) {
1211 j_temp[rowStart+lnnz] = ja[j];
1220 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap,coarseBlockMap,ia,ja);
1225 RCP<ParameterList> FCparams;
1226 if(pL.isSublist(
"matrixmatrix: kernel params"))
1227 FCparams=rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
1229 FCparams= rcp(
new ParameterList);
1231 FCparams->set(
"compute global constants",FCparams->get(
"compute global constants",
false));
1232 std::string levelIDs =
toString(levelID);
1233 FCparams->set(
"Timer Label",std::string(
"MueLu::TentativeP-")+levelIDs);
1234 RCP<const Export> dummy_e;
1235 RCP<const Import> dummy_i;
1236 BlockGraph->expertStaticFillComplete(coarseBlockMap,rowMap,dummy_i,dummy_e,FCparams);
1247 RCP<Xpetra::CrsMatrix<SC,LO,GO,NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC,LO,GO,NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap,NSDim);
1248 RCP<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> >(P_xpetra);
1249 if(P_tpetra.is_null())
throw std::runtime_error(
"BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
1250 RCP<CrsMatrixWrap> P_wrap = rcp(
new CrsMatrixWrap(P_xpetra));
1252 auto values = P_tpetra->getTpetra_BlockCrsMatrix()->getValuesDeviceNonConst();
1253 const LO stride = NSDim*NSDim;
1255 Kokkos::parallel_for(
"MueLu:TentativePF:BlockCrs:main_loop_noqr", policy,
1256 KOKKOS_LAMBDA(
const typename Kokkos::TeamPolicy<execution_space>::member_type &thread) {
1257 auto agg = thread.league_rank();
1260 LO aggSize = aggRows(agg+1) - aggRows(agg);
1261 Xpetra::global_size_t offset = agg*NSDim;
1264 for (LO j = 0; j < aggSize; j++) {
1265 LO localBlockRow = aggToRowMapLO(aggRows(agg)+j);
1266 LO rowStart = localBlockRow * stride;
1267 for (LO r = 0; r < (LO)NSDim; r++) {
1268 LO localPointRow = localBlockRow*NSDim + r;
1269 for (LO c = 0; c < (LO)NSDim; c++) {
1270 values[rowStart + r*NSDim + c] = fineNSRandom(localPointRow,c);
1276 for(LO j=0; j<(LO)NSDim; j++)
1277 coarseNS(offset+j,j) = one;
1280 Ptentative = P_wrap;