46#ifndef MUELU_AGGREGATES_DEF_HPP
47#define MUELU_AGGREGATES_DEF_HPP
49#include <Xpetra_Map.hpp>
50#include <Xpetra_Vector.hpp>
51#include <Xpetra_MultiVectorFactory.hpp>
52#include <Xpetra_VectorFactory.hpp>
54#include "MueLu_LWGraph_kokkos.hpp"
55#include "MueLu_Graph.hpp"
62 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
79 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
85 vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1);
88 procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
91 isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getLocalNumElements(),
false);
97 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
99 Aggregates(
const RCP<const Map>& map) {
109 isRoot_ = Teuchos::ArrayRCP<bool>(map->getLocalNumElements(),
false);
115 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
125 int myPID =
GetMap()->getComm()->getRank();
127 auto vertex2AggId =
vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
128 auto procWinner =
procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly);
131 Kokkos::parallel_for(
"MueLu:Aggregates:ComputeAggregateSizes:for",
range_type(0,procWinner.size()),
132 KOKKOS_LAMBDA(
const LO i) {
133 if (procWinner(i, 0) == myPID)
134 aggregateSizesAtomic(vertex2AggId(i, 0))++;
139 return aggregateSizes;
144 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
145 typename Teuchos::ArrayRCP<LocalOrdinal>
147 ComputeAggregateSizesArrayRCP(
bool forceRecompute)
const {
163 return aggregateSizesArrayRCP;
166 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
169 using row_map_type =
typename local_graph_type::row_map_type;
170 using entries_type =
typename local_graph_type::entries_type;
171 using size_type =
typename local_graph_type::size_type;
175 if (
static_cast<LO
>(
graph_.numRows()) == numAggregates)
178 auto vertex2AggId =
vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
179 auto procWinner =
procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly);
183 typename row_map_type::non_const_type rows(
"Agg_rows", numAggregates+1);
186 Kokkos::parallel_scan(
"MueLu:Aggregates:GetGraph:compute_rows",
range_type(0, numAggregates),
187 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
193 decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing(
"Agg_offsets"), numAggregates+1);
194 Kokkos::deep_copy(offsets, rows);
196 int myPID =
GetMap()->getComm()->getRank();
200 Kokkos::View<size_type, device_type> numNNZ_device = Kokkos::subview(rows, numAggregates);
201 typename Kokkos::View<size_type, device_type>::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device);
202 Kokkos::deep_copy(numNNZ_host, numNNZ_device);
203 numNNZ = numNNZ_host();
205 typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing(
"Agg_cols"), numNNZ);
207 Kokkos::parallel_reduce(
"MueLu:Aggregates:GetGraph:compute_cols",
range_type(0, procWinner.size()),
208 KOKKOS_LAMBDA(
const LO i,
size_t& nnz) {
209 if (procWinner(i, 0) == myPID) {
210 typedef typename std::remove_reference<
decltype( offsets(0) ) >::type atomic_incr_type;
211 auto idx = Kokkos::atomic_fetch_add( &offsets(vertex2AggId(i,0)), atomic_incr_type(1));
217 "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ <<
" != " << realnnz <<
" = realnnz");
224 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
229 auto vertex2AggId =
vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
231 LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
233 aggPtr =
LO_view(
"aggPtr",numAggs+1);
234 aggNodes =
LO_view(
"aggNodes",numNodes);
235 LO_view aggCurr(
"agg curr",numAggs+1);
238 Kokkos::parallel_scan(
"MueLu:Aggregates:ComputeNodesInAggregate:scan",
range_type(0,numAggs+1),
239 KOKKOS_LAMBDA(
const LO aggIdx, LO& aggOffset,
bool final_pass) {
242 count = aggSizes(aggIdx);
244 aggPtr(aggIdx) = aggOffset;
245 aggCurr(aggIdx) = aggOffset;
247 aggCurr(numAggs) = 0;
253 LO numUnaggregated = 0;
254 Kokkos::parallel_reduce(
"MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize",
range_type(0,numNodes),
255 KOKKOS_LAMBDA(
const LO nodeIdx, LO & count) {
256 if(vertex2AggId(nodeIdx,0)==INVALID)
259 unaggregated =
LO_view(
"unaggregated",numUnaggregated);
262 Kokkos::parallel_for(
"MueLu:Aggregates:ComputeNodesInAggregate:for",
range_type(0,numNodes),
263 KOKKOS_LAMBDA(
const LO nodeIdx) {
264 LO aggIdx = vertex2AggId(nodeIdx,0);
265 if(aggIdx != INVALID) {
267 aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx),1)) = nodeIdx;
270 unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs),1)) = nodeIdx;
276 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
282 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
287 if (
numGlobalAggregates_ == -1) out0 <<
"Global number of aggregates: not computed " << std::endl;
292 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
297 GO nGlobalAggregates;
304 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
305 const RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal, Tpetra::KokkosCompat::KokkosDeviceWrapperNode<DeviceType>> >
#define MUELU_UNAGGREGATED
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Aggregates(const GraphBase &graph)
Standard constructor for Aggregates structure.
typename LWGraph_kokkos::local_graph_type local_graph_type
LO numAggregates_
Number of aggregates on this processor.
RCP< LOVector > procWinner_
void print(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
aggregates_sizes_type::HostMirror aggregateSizesHost_
bool aggregatesIncludeGhosts_
Set to false iff aggregates do not include any DOFs belong to other processes.
std::string description() const
Return a simple one-line description of this object.
local_graph_type graph_
Aggregates represented as Kokkos graph type.
aggregates_sizes_type aggregateSizes_
Array of sizes of each local aggregate.
void ComputeNodesInAggregate(LO_view &aggPtr, LO_view &aggNodes, LO_view &unaggregated) const
Generates a compressed list of nodes in each aggregate, where the entries in aggNodes[aggPtr[i]] up t...
GO GetNumGlobalAggregatesComputeIfNeeded()
Get global number of aggregates.
Kokkos::View< local_ordinal_type *, device_type > LO_view
local_graph_type GetGraph() const
Teuchos::ArrayRCP< bool > isRoot_
An ArrayRCP of booleans specifying if a local entry is an aggregate root.
RCP< LOMultiVector > vertex2AggId_
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
const RCP< const Map > GetMap() const
returns (overlapping) map of aggregate/node distribution
Kokkos::View< LocalOrdinal *, device_type > aggregates_sizes_type
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
MueLu::GraphBase< LocalOrdinal, GlobalOrdinal, Node > GraphBase
aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
void SetNumGlobalAggregates(GO nGlobalAggregates)
Set number of global aggregates on current processor.
GO numGlobalAggregates_
Number of global aggregates.
Container class for aggregation information.
virtual std::string description() const
Return a simple one-line description of this object.
Exception throws to report errors in the internal logical of the program.
virtual const RCP< const Map > GetImportMap() const =0
Lightweight MueLu representation of a compressed row storage graph.
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.