147 using execution_space =
typename LWGraph_kokkos::execution_space;
148 using memory_space =
typename LWGraph_kokkos::memory_space;
149 using local_ordinal_type =
typename LWGraph_kokkos::local_ordinal_type;
155 if (pL.get<
int>(
"aggregation: max agg size") == -1)
156 pL.set(
"aggregation: max agg size", INT_MAX);
159 RCP<const FactoryBase> graphFact =
GetFactory(
"Graph");
170 std::string mapOnePtName = pL.get<std::string>(
"OnePt aggregate map name");
171 RCP<Map> OnePtMap = Teuchos::null;
172 if (mapOnePtName.length()) {
173 std::string mapOnePtFactName = pL.get<std::string>(
"OnePt aggregate map factory");
174 if (mapOnePtFactName ==
"" || mapOnePtFactName ==
"NoFactory") {
177 RCP<const FactoryBase> mapOnePtFact =
GetFactory(mapOnePtFactName);
178 OnePtMap = currentLevel.
Get<RCP<Map> >(mapOnePtName, mapOnePtFact.get());
185 RCP<Aggregates> aggregates = rcp(
new Aggregates(*graph));
186 aggregates->setObjectLabel(
"UC");
188 const LO numRows = graph->GetNodeNumVertices();
191 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type> aggStat(Kokkos::ViewAllocateWithoutInitializing(
"aggregation status"),
193 Kokkos::deep_copy(aggStat,
READY);
204 typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = graph->getLocalLWGraph().GetBoundaryNodeMap();
205 Kokkos::parallel_for(
"MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
206 Kokkos::RangePolicy<local_ordinal_type, execution_space>(0, numRows),
207 KOKKOS_LAMBDA(
const local_ordinal_type nodeIdx) {
208 if (dirichletBoundaryMap(nodeIdx) ==
true) {
214 LO nDofsPerNode =
Get<LO>(currentLevel,
"DofsPerNode");
215 GO indexBase = graph->GetDomainMap()->getIndexBase();
216 if (OnePtMap != Teuchos::null) {
217 typename Kokkos::View<unsigned*,typename LWGraph_kokkos::device_type>::HostMirror aggStatHost
218 = Kokkos::create_mirror_view(aggStat);
219 Kokkos::deep_copy(aggStatHost, aggStat);
221 for (LO i = 0; i < numRows; i++) {
223 GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase;
225 for (LO kr = 0; kr < nDofsPerNode; kr++)
226 if (OnePtMap->isNodeGlobalElement(grid + kr))
227 aggStatHost(i) =
ONEPT;
230 Kokkos::deep_copy(aggStat, aggStatHost);
233 const RCP<const Teuchos::Comm<int> > comm = graph->GetComm();
234 GO numGlobalRows = 0;
238 LO numNonAggregatedNodes = numRows;
239 std::string aggAlgo = pL.get<std::string>(
"aggregation: coloring algorithm");
240 if(aggAlgo ==
"mis2 coarsening" || aggAlgo ==
"mis2 aggregation")
243 using graph_t =
typename LWGraph_kokkos::local_graph_type;
244 using device_t =
typename graph_t::device_type;
245 using exec_space =
typename device_t::execution_space;
246 using rowmap_t =
typename graph_t::row_map_type;
247 using colinds_t =
typename graph_t::entries_type;
248 using lno_t =
typename colinds_t::non_const_value_type;
249 rowmap_t aRowptrs = graph->getLocalLWGraph().getRowPtrs();
250 colinds_t aColinds = graph->getLocalLWGraph().getEntries();
252 typename colinds_t::non_const_type labels;
254 if(aggAlgo ==
"mis2 coarsening")
257 labels = KokkosGraph::graph_mis2_coarsen<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
259 else if(aggAlgo ==
"mis2 aggregation")
262 labels = KokkosGraph::graph_mis2_aggregate<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
264 auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
265 auto procWinner = aggregates->GetProcWinner() ->getDeviceLocalView(Xpetra::Access::OverwriteAll);
266 int rank = comm->getRank();
267 Kokkos::parallel_for(Kokkos::RangePolicy<exec_space>(0, numRows),
268 KOKKOS_LAMBDA(lno_t i)
270 procWinner(i, 0) = rank;
271 if(aggStat(i) ==
READY)
274 vertex2AggId(i, 0) = labels(i);
277 numNonAggregatedNodes = 0;
278 aggregates->SetNumAggregates(numAggs);
290 using graph_t =
typename LWGraph_kokkos::local_graph_type;
291 using KernelHandle = KokkosKernels::Experimental::
292 KokkosKernelsHandle<
typename graph_t::row_map_type::value_type,
293 typename graph_t::entries_type::value_type,
294 typename graph_t::entries_type::value_type,
295 typename graph_t::device_type::execution_space,
296 typename graph_t::device_type::memory_space,
297 typename graph_t::device_type::memory_space>;
300 kh.create_distance2_graph_coloring_handle();
303 auto coloringHandle = kh.get_distance2_graph_coloring_handle();
313 if(pL.get<
bool>(
"aggregation: deterministic") ==
true) {
314 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
316 }
else if(aggAlgo ==
"serial") {
317 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
319 }
else if(aggAlgo ==
"default") {
320 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_DEFAULT );
322 }
else if(aggAlgo ==
"vertex based") {
323 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB );
325 }
else if(aggAlgo ==
"vertex based bit set") {
326 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT );
328 }
else if(aggAlgo ==
"edge filtering") {
329 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT_EF );
331 }
else if(aggAlgo ==
"net based bit set") {
332 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_NB_BIT );
335 TEUCHOS_TEST_FOR_EXCEPTION(
true,std::invalid_argument,
"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering")
339 typename graph_t::row_map_type aRowptrs = graph->getLocalLWGraph().getRowPtrs();
340 typename graph_t::entries_type aColinds = graph->getLocalLWGraph().getEntries();
344 KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds);
347 aggregates->SetGraphColors(coloringHandle->get_vertex_colors());
348 aggregates->SetGraphNumColors(
static_cast<LO
>(coloringHandle->get_num_colors()));
352 kh.destroy_distance2_graph_coloring_handle();
358 GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
359 for (
size_t a = 0; a <
algos_.size(); a++) {
360 std::string phase =
algos_[a]->description();
364 algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes);
365 algos_[a]->SetProcRankVerbose(oldRank);
368 GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0;
369 GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0;
370 MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated);
373 double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows);
374 if (aggPercent > 99.99 && aggPercent < 100.00) {
381 GetOStream(
Statistics1) <<
" aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) <<
" (phase), " << std::fixed
382 << std::setprecision(2) << numGlobalAggregated <<
"/" << numGlobalRows <<
" [" << aggPercent <<
"%] (total)\n"
383 <<
" remaining : " << numGlobalRows - numGlobalAggregated <<
"\n"
384 <<
" aggregates : " << numGlobalAggs-numGlobalAggsPrev <<
" (phase), " << numGlobalAggs <<
" (total)" << std::endl;
385 numGlobalAggregatedPrev = numGlobalAggregated;
386 numGlobalAggsPrev = numGlobalAggs;
391 TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes,
Exceptions::RuntimeError,
"MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");
393 aggregates->AggregatesCrossProcessors(
false);
394 aggregates->ComputeAggregateSizes(
true);
396 Set(currentLevel,
"Aggregates", aggregates);