MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_UncoupledAggregationFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
47#define MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
48
49#include <climits>
50
51#include <Xpetra_Map.hpp>
52#include <Xpetra_Vector.hpp>
53#include <Xpetra_MultiVectorFactory.hpp>
54#include <Xpetra_VectorFactory.hpp>
55
57
58#include "MueLu_OnePtAggregationAlgorithm_kokkos.hpp"
59#include "MueLu_PreserveDirichletAggregationAlgorithm_kokkos.hpp"
60
61#include "MueLu_AggregationPhase1Algorithm_kokkos.hpp"
62#include "MueLu_AggregationPhase2aAlgorithm_kokkos.hpp"
63#include "MueLu_AggregationPhase2bAlgorithm_kokkos.hpp"
64#include "MueLu_AggregationPhase3Algorithm_kokkos.hpp"
65
66#include "MueLu_Level.hpp"
67#include "MueLu_LWGraph_kokkos.hpp"
68#include "MueLu_Aggregates.hpp"
69#include "MueLu_MasterList.hpp"
70#include "MueLu_Monitor.hpp"
71
72#include "KokkosGraph_Distance2ColorHandle.hpp"
73#include "KokkosGraph_Distance2Color.hpp"
74#include "KokkosGraph_MIS2.hpp"
75
76namespace MueLu {
77
78 template <class LocalOrdinal, class GlobalOrdinal, class Node>
82
83 template <class LocalOrdinal, class GlobalOrdinal, class Node>
85 RCP<ParameterList> validParamList = rcp(new ParameterList());
86
87 // Aggregation parameters (used in aggregation algorithms)
88 // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters
89
90 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
91#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
92 SET_VALID_ENTRY("aggregation: max agg size");
93 SET_VALID_ENTRY("aggregation: min agg size");
94 SET_VALID_ENTRY("aggregation: max selected neighbors");
95 SET_VALID_ENTRY("aggregation: ordering");
96 validParamList->getEntry("aggregation: ordering").setValidator(
97 rcp(new validatorType(Teuchos::tuple<std::string>("natural", "graph", "random"), "aggregation: ordering")));
98 SET_VALID_ENTRY("aggregation: deterministic");
99 SET_VALID_ENTRY("aggregation: coloring algorithm");
100 SET_VALID_ENTRY("aggregation: enable phase 1");
101 SET_VALID_ENTRY("aggregation: enable phase 2a");
102 SET_VALID_ENTRY("aggregation: enable phase 2b");
103 SET_VALID_ENTRY("aggregation: enable phase 3");
104 SET_VALID_ENTRY("aggregation: match ML phase2a");
105 SET_VALID_ENTRY("aggregation: phase3 avoid singletons");
106 SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors");
107 SET_VALID_ENTRY("aggregation: preserve Dirichlet points");
108 SET_VALID_ENTRY("aggregation: allow user-specified singletons");
109 SET_VALID_ENTRY("aggregation: phase 1 algorithm");
110#undef SET_VALID_ENTRY
111
112 // general variables needed in AggregationFactory
113 validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
114 validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
115
116 // special variables necessary for OnePtAggregationAlgorithm
117 validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')");
118 validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates.");
119 //validParamList->set< RCP<const FactoryBase> >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates.");
120
121 return validParamList;
122 }
123
124 template <class LocalOrdinal, class GlobalOrdinal, class Node>
126 Input(currentLevel, "Graph");
127 Input(currentLevel, "DofsPerNode");
128
129 const ParameterList& pL = GetParameterList();
130
131 // request special data necessary for OnePtAggregationAlgorithm
132 std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
133 if (mapOnePtName.length() > 0) {
134 std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
135 if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
136 currentLevel.DeclareInput(mapOnePtName, NoFactory::get());
137 } else {
138 RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
139 currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get());
140 }
141 }
142 }
143
144 template <class LocalOrdinal, class GlobalOrdinal, class Node>
146 Build(Level &currentLevel) const {
147 using execution_space = typename LWGraph_kokkos::execution_space;
148 using memory_space = typename LWGraph_kokkos::memory_space;
149 using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type;
150 FactoryMonitor m(*this, "Build", currentLevel);
151
152 ParameterList pL = GetParameterList();
153 bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed
154
155 if (pL.get<int>("aggregation: max agg size") == -1)
156 pL.set("aggregation: max agg size", INT_MAX);
157
158 // define aggregation algorithms
159 RCP<const FactoryBase> graphFact = GetFactory("Graph");
160
161 // TODO Can we keep different aggregation algorithms over more Build calls?
162 algos_.clear();
163 algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact)));
164 if (pL.get<bool>("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos (graphFact)));
165 if (pL.get<bool>("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos (graphFact)));
166 if (pL.get<bool>("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos (graphFact)));
167 if (pL.get<bool>("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos (graphFact)));
168 if (pL.get<bool>("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos (graphFact)));
169
170 std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
171 RCP<Map> OnePtMap = Teuchos::null;
172 if (mapOnePtName.length()) {
173 std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
174 if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
175 OnePtMap = currentLevel.Get<RCP<Map> >(mapOnePtName, NoFactory::get());
176 } else {
177 RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
178 OnePtMap = currentLevel.Get<RCP<Map> >(mapOnePtName, mapOnePtFact.get());
179 }
180 }
181
182 RCP<const LWGraph_kokkos> graph = Get< RCP<LWGraph_kokkos> >(currentLevel, "Graph");
183
184 // Build
185 RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
186 aggregates->setObjectLabel("UC");
187
188 const LO numRows = graph->GetNodeNumVertices();
189
190 // construct aggStat information
191 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type> aggStat(Kokkos::ViewAllocateWithoutInitializing("aggregation status"),
192 numRows);
193 Kokkos::deep_copy(aggStat, READY);
194
195 // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map
196 // even if the map is correctly extracted from the graph, aggStat is
197 // now a Kokkos::View<unsinged*, memory_space> and filling it will
198 // require a parallel_for or to copy it to the Host which is not really
199 // good from a performance point of view.
200 // If dirichletBoundaryMap was an actual Xpetra::Map, one could call
201 // getLocalMap to have a Kokkos::View on the appropriate memory_space
202 // instead of an ArrayRCP.
203 {
204 typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = graph->getLocalLWGraph().GetBoundaryNodeMap();
205 Kokkos::parallel_for("MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
206 Kokkos::RangePolicy<local_ordinal_type, execution_space>(0, numRows),
207 KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) {
208 if (dirichletBoundaryMap(nodeIdx) == true) {
209 aggStat(nodeIdx) = BOUNDARY;
210 }
211 });
212 }
213
214 LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode");
215 GO indexBase = graph->GetDomainMap()->getIndexBase();
216 if (OnePtMap != Teuchos::null) {
217 typename Kokkos::View<unsigned*,typename LWGraph_kokkos::device_type>::HostMirror aggStatHost
218 = Kokkos::create_mirror_view(aggStat);
219 Kokkos::deep_copy(aggStatHost, aggStat);
220
221 for (LO i = 0; i < numRows; i++) {
222 // reconstruct global row id (FIXME only works for contiguous maps)
223 GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase;
224
225 for (LO kr = 0; kr < nDofsPerNode; kr++)
226 if (OnePtMap->isNodeGlobalElement(grid + kr))
227 aggStatHost(i) = ONEPT;
228 }
229
230 Kokkos::deep_copy(aggStat, aggStatHost);
231 }
232
233 const RCP<const Teuchos::Comm<int> > comm = graph->GetComm();
234 GO numGlobalRows = 0;
235 if (IsPrint(Statistics1))
236 MueLu_sumAll(comm, as<GO>(numRows), numGlobalRows);
237
238 LO numNonAggregatedNodes = numRows;
239 std::string aggAlgo = pL.get<std::string>("aggregation: coloring algorithm");
240 if(aggAlgo == "mis2 coarsening" || aggAlgo == "mis2 aggregation")
241 {
242 SubFactoryMonitor sfm(*this, "Algo \"MIS2\"", currentLevel);
243 using graph_t = typename LWGraph_kokkos::local_graph_type;
244 using device_t = typename graph_t::device_type;
245 using exec_space = typename device_t::execution_space;
246 using rowmap_t = typename graph_t::row_map_type;
247 using colinds_t = typename graph_t::entries_type;
248 using lno_t = typename colinds_t::non_const_value_type;
249 rowmap_t aRowptrs = graph->getLocalLWGraph().getRowPtrs();
250 colinds_t aColinds = graph->getLocalLWGraph().getEntries();
251 lno_t numAggs = 0;
252 typename colinds_t::non_const_type labels;
253
254 if(aggAlgo == "mis2 coarsening")
255 {
256 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 coarsening" << std::endl;
257 labels = KokkosGraph::graph_mis2_coarsen<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
258 }
259 else if(aggAlgo == "mis2 aggregation")
260 {
261 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 aggregation" << std::endl;
262 labels = KokkosGraph::graph_mis2_aggregate<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
263 }
264 auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
265 auto procWinner = aggregates->GetProcWinner() ->getDeviceLocalView(Xpetra::Access::OverwriteAll);
266 int rank = comm->getRank();
267 Kokkos::parallel_for(Kokkos::RangePolicy<exec_space>(0, numRows),
268 KOKKOS_LAMBDA(lno_t i)
269 {
270 procWinner(i, 0) = rank;
271 if(aggStat(i) == READY)
272 {
273 aggStat(i) = AGGREGATED;
274 vertex2AggId(i, 0) = labels(i);
275 }
276 });
277 numNonAggregatedNodes = 0;
278 aggregates->SetNumAggregates(numAggs);
279 }
280 else
281 {
282 {
283 SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel);
284
285 // LBV on Sept 06 2019: the note below is a little worrisome,
286 // can we guarantee that MueLu is never used on a non-symmetric
287 // graph?
288 // note: just using colinds_view in place of scalar_view_t type
289 // (it won't be used at all by symbolic SPGEMM)
290 using graph_t = typename LWGraph_kokkos::local_graph_type;
291 using KernelHandle = KokkosKernels::Experimental::
292 KokkosKernelsHandle<typename graph_t::row_map_type::value_type,
293 typename graph_t::entries_type::value_type,
294 typename graph_t::entries_type::value_type,
295 typename graph_t::device_type::execution_space,
296 typename graph_t::device_type::memory_space,
297 typename graph_t::device_type::memory_space>;
298 KernelHandle kh;
299 //leave gc algorithm choice as the default
300 kh.create_distance2_graph_coloring_handle();
301
302 // get the distance-2 graph coloring handle
303 auto coloringHandle = kh.get_distance2_graph_coloring_handle();
304
305 // Set the distance-2 graph coloring algorithm to use.
306 // Options:
307 // COLORING_D2_DEFAULT - Let the kernel handle pick the variation
308 // COLORING_D2_SERIAL - Use the legacy serial-only implementation
309 // COLORING_D2_VB - Use the parallel vertex based direct method
310 // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array
311 // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT
312 // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest)
313 if(pL.get<bool>("aggregation: deterministic") == true) {
314 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
315 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
316 } else if(aggAlgo == "serial") {
317 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
318 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
319 } else if(aggAlgo == "default") {
320 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_DEFAULT );
321 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl;
322 } else if(aggAlgo == "vertex based") {
323 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB );
324 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl;
325 } else if(aggAlgo == "vertex based bit set") {
326 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT );
327 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl;
328 } else if(aggAlgo == "edge filtering") {
329 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT_EF );
330 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl;
331 } else if(aggAlgo == "net based bit set") {
332 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_NB_BIT );
333 if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl;
334 } else {
335 TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering")
336 }
337
338 //Create device views for graph rowptrs/colinds
339 typename graph_t::row_map_type aRowptrs = graph->getLocalLWGraph().getRowPtrs();
340 typename graph_t::entries_type aColinds = graph->getLocalLWGraph().getEntries();
341
342 //run d2 graph coloring
343 //graph is symmetric so row map/entries and col map/entries are the same
344 KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds);
345
346 // extract the colors and store them in the aggregates
347 aggregates->SetGraphColors(coloringHandle->get_vertex_colors());
348 aggregates->SetGraphNumColors(static_cast<LO>(coloringHandle->get_num_colors()));
349
350
351 //clean up coloring handle
352 kh.destroy_distance2_graph_coloring_handle();
353 }
354
355 if (IsPrint(Statistics1)) {
356 GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl;
357 }
358 GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
359 for (size_t a = 0; a < algos_.size(); a++) {
360 std::string phase = algos_[a]->description();
361 SubFactoryMonitor sfm2(*this, "Algo \"" + phase + "\"", currentLevel);
362
363 int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
364 algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes);
365 algos_[a]->SetProcRankVerbose(oldRank);
366
367 if (IsPrint(Statistics1)) {
368 GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0;
369 GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0;
370 MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated);
371 MueLu_sumAll(comm, numLocalAggs, numGlobalAggs);
372
373 double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows);
374 if (aggPercent > 99.99 && aggPercent < 100.00) {
375 // Due to round off (for instance, for 140465733/140466897), we could
376 // get 100.00% display even if there are some remaining nodes. This
377 // is bad from the users point of view. It is much better to change
378 // it to display 99.99%.
379 aggPercent = 99.99;
380 }
381 GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed
382 << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n"
383 << " remaining : " << numGlobalRows - numGlobalAggregated << "\n"
384 << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl;
385 numGlobalAggregatedPrev = numGlobalAggregated;
386 numGlobalAggsPrev = numGlobalAggs;
387 }
388 }
389 }
390
391 TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");
392
393 aggregates->AggregatesCrossProcessors(false);
394 aggregates->ComputeAggregateSizes(true/*forceRecompute*/);
395
396 Set(currentLevel, "Aggregates", aggregates);
397
398 }
399
400} //namespace MueLu
401
402#endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
Container class for aggregation information.
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
const RCP< const FactoryBase > GetFactory(const std::string &varName) const
Default implementation of FactoryAcceptor::GetFactory().
Class that holds all level-specific information.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput().
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static const NoFactory * get()
virtual const Teuchos::ParameterList & GetParameterList() const
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
std::vector< RCP< MueLu::AggregationAlgorithmBase_kokkos< LocalOrdinal, GlobalOrdinal, Node > > > algos_
Append a new aggregation algorithm to list of aggregation algorithms.
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
bool IsPrint(MsgType type, int thisProcRankOnly=-1) const
Find out whether we need to print out information for a specific message type.
int GetProcRankVerbose() const
Get proc rank used for printing. Do not use this information for any other purpose....
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.