MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_Aggregates_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_AGGREGATES_DEF_HPP
47#define MUELU_AGGREGATES_DEF_HPP
48
49#include <Xpetra_Map.hpp>
50#include <Xpetra_Vector.hpp>
51#include <Xpetra_MultiVectorFactory.hpp>
52#include <Xpetra_VectorFactory.hpp>
53
54#include "MueLu_LWGraph_kokkos.hpp"
55#include "MueLu_Graph.hpp"
56#include "MueLu_GraphBase.hpp"
59
60namespace MueLu {
61
62 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
66
67 vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1);
69
70 procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
71 procWinner_->putScalar(MUELU_UNASSIGNED);
72
73 isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getLocalNumElements(), false);
74
75 // slow but safe, force TentativePFactory to build column map for P itself
77 }
78
79 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
81 Aggregates(LWGraph_kokkos graph) {
84
85 vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1);
87
88 procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
89 procWinner_->putScalar(MUELU_UNASSIGNED);
90
91 isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getLocalNumElements(), false);
92
93 // slow but safe, force TentativePFactory to build column map for P itself
95 }
96
97 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
99 Aggregates(const RCP<const Map>& map) {
100 numAggregates_ = 0;
102
103 vertex2AggId_ = LOMultiVectorFactory::Build(map, 1);
105
106 procWinner_ = LOVectorFactory::Build(map);
107 procWinner_->putScalar(MUELU_UNASSIGNED);
108
109 isRoot_ = Teuchos::ArrayRCP<bool>(map->getLocalNumElements(), false);
110
111 // slow but safe, force TentativePFactory to build column map for P itself
113 }
114
115 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
118 if (aggregateSizes_.size() && !forceRecompute) {
119 return aggregateSizes_;
120
121 } else {
122 // It is necessary to initialize this to 0
123 aggregates_sizes_type aggregateSizes("aggregates", numAggregates_);
124
125 int myPID = GetMap()->getComm()->getRank();
126
127 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
128 auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly);
129
130 typename AppendTrait<decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic = aggregateSizes;
131 Kokkos::parallel_for("MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0,procWinner.size()),
132 KOKKOS_LAMBDA(const LO i) {
133 if (procWinner(i, 0) == myPID)
134 aggregateSizesAtomic(vertex2AggId(i, 0))++;
135 });
136
137 aggregateSizes_ = aggregateSizes;
138
139 return aggregateSizes;
140 }
141
142 }
143
144 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
145 typename Teuchos::ArrayRCP<LocalOrdinal>
147 ComputeAggregateSizesArrayRCP(bool forceRecompute) const {
148 auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute);
149
150 // if this is the first time this is called, setup the host mirror and fill it
151 if(!aggregateSizesHost_.is_allocated()) {
152 aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes);
153 Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes);
154 } else {
155 // otherwise, only update if we forced a recompute
156 if(forceRecompute)
157 Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes);
158 }
159
160 // put the data in an ArrayRCP, but do not give it ownership of the data
161 Teuchos::ArrayRCP<LocalOrdinal> aggregateSizesArrayRCP(aggregateSizesHost_.data(),0,aggregateSizesHost_.extent(0),false);
162
163 return aggregateSizesArrayRCP;
164 }
165
166 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
169 using row_map_type = typename local_graph_type::row_map_type;
170 using entries_type = typename local_graph_type::entries_type;
171 using size_type = typename local_graph_type::size_type;
172
173 auto numAggregates = numAggregates_;
174
175 if (static_cast<LO>(graph_.numRows()) == numAggregates)
176 return graph_;
177
178 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
179 auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly);
180 auto sizes = ComputeAggregateSizes();
181
182 // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0.
183 typename row_map_type::non_const_type rows("Agg_rows", numAggregates+1); // rows(0) = 0 automatically
184
185 // parallel_scan (exclusive)
186 Kokkos::parallel_scan("MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates),
187 KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) {
188 update += sizes(i);
189 if (final_pass)
190 rows(i+1) = update;
191 });
192
193 decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates+1); // +1 is just for ease
194 Kokkos::deep_copy(offsets, rows);
195
196 int myPID = GetMap()->getComm()->getRank();
197
198 size_type numNNZ;
199 {
200 Kokkos::View<size_type, device_type> numNNZ_device = Kokkos::subview(rows, numAggregates);
201 typename Kokkos::View<size_type, device_type>::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device);
202 Kokkos::deep_copy(numNNZ_host, numNNZ_device);
203 numNNZ = numNNZ_host();
204 }
205 typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ);
206 size_t realnnz = 0;
207 Kokkos::parallel_reduce("MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()),
208 KOKKOS_LAMBDA(const LO i, size_t& nnz) {
209 if (procWinner(i, 0) == myPID) {
210 typedef typename std::remove_reference< decltype( offsets(0) ) >::type atomic_incr_type;
211 auto idx = Kokkos::atomic_fetch_add( &offsets(vertex2AggId(i,0)), atomic_incr_type(1));
212 cols(idx) = i;
213 nnz++;
214 }
215 }, realnnz);
216 TEUCHOS_TEST_FOR_EXCEPTION(realnnz != numNNZ, Exceptions::RuntimeError,
217 "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz");
218
219 graph_ = local_graph_type(cols, rows);
220
221 return graph_;
222 }
223
224 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
225 void
227 LO numAggs = GetNumAggregates();
228 LO numNodes = vertex2AggId_->getLocalLength();
229 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
230 typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true);
231 LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
232
233 aggPtr = LO_view("aggPtr",numAggs+1);
234 aggNodes = LO_view("aggNodes",numNodes);
235 LO_view aggCurr("agg curr",numAggs+1);
236
237 // Construct the "rowptr" and the counter
238 Kokkos::parallel_scan("MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0,numAggs+1),
239 KOKKOS_LAMBDA(const LO aggIdx, LO& aggOffset, bool final_pass) {
240 LO count = 0;
241 if(aggIdx < numAggs)
242 count = aggSizes(aggIdx);
243 if(final_pass) {
244 aggPtr(aggIdx) = aggOffset;
245 aggCurr(aggIdx) = aggOffset;
246 if(aggIdx==numAggs)
247 aggCurr(numAggs) = 0; // use this for counting unaggregated nodes
248 }
249 aggOffset += count;
250 });
251
252 // Preallocate unaggregated to the correct size
253 LO numUnaggregated = 0;
254 Kokkos::parallel_reduce("MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0,numNodes),
255 KOKKOS_LAMBDA(const LO nodeIdx, LO & count) {
256 if(vertex2AggId(nodeIdx,0)==INVALID)
257 count++;
258 }, numUnaggregated);
259 unaggregated = LO_view("unaggregated",numUnaggregated);
260
261 // Stick the nodes in each aggregate's spot
262 Kokkos::parallel_for("MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0,numNodes),
263 KOKKOS_LAMBDA(const LO nodeIdx) {
264 LO aggIdx = vertex2AggId(nodeIdx,0);
265 if(aggIdx != INVALID) {
266 // atomic postincrement aggCurr(aggIdx) each time
267 aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx),1)) = nodeIdx;
268 } else {
269 // same, but using last entry of aggCurr for unaggregated nodes
270 unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs),1)) = nodeIdx;
271 }
272 });
273
274 }
275
276 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
278 if (numGlobalAggregates_ == -1) return BaseClass::description() + "{nGlobalAggregates = not computed}";
279 else return BaseClass::description() + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}";
280 }
281
282 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
283 void Aggregates<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosDeviceWrapperNode<DeviceType> >::print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const {
285
286 if (verbLevel & Statistics1) {
287 if (numGlobalAggregates_ == -1) out0 << "Global number of aggregates: not computed " << std::endl;
288 else out0 << "Global number of aggregates: " << numGlobalAggregates_ << std::endl;
289 }
290 }
291
292 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
294
295 if (numGlobalAggregates_ != -1) {
296 LO nAggregates = GetNumAggregates();
297 GO nGlobalAggregates;
298 MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates);
299 SetNumGlobalAggregates(nGlobalAggregates);
300 }
302 }
303
304 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
305 const RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal, Tpetra::KokkosCompat::KokkosDeviceWrapperNode<DeviceType>> >
309
310} //namespace MueLu
311
312#endif // MUELU_AGGREGATES_DEF_HPP
#define MUELU_UNAGGREGATED
#define MUELU_UNASSIGNED
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
void print(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
bool aggregatesIncludeGhosts_
Set to false iff aggregates do not include any DOFs belong to other processes.
void ComputeNodesInAggregate(LO_view &aggPtr, LO_view &aggNodes, LO_view &unaggregated) const
Generates a compressed list of nodes in each aggregate, where the entries in aggNodes[aggPtr[i]] up t...
Teuchos::ArrayRCP< bool > isRoot_
An ArrayRCP of booleans specifying if a local entry is an aggregate root.
const RCP< const Map > GetMap() const
returns (overlapping) map of aggregate/node distribution
aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
void SetNumGlobalAggregates(GO nGlobalAggregates)
Set number of global aggregates on current processor.
Container class for aggregation information.
virtual std::string description() const
Return a simple one-line description of this object.
Exception throws to report errors in the internal logical of the program.
virtual const RCP< const Map > GetImportMap() const =0
Lightweight MueLu representation of a compressed row storage graph.
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.