72 const Teuchos::RCP<
const Teuchos::Comm<int> > &comm,
73 Teuchos::RCP<Tpetra::CrsMatrix<scalar_t, lno_t, gno_t, nod_t>> &M_)
75 if (comm->getRank() == 0){
76 std::cout <<
"Create matrix with " << problemType;
77 std::cout <<
" (and " << xdim;
79 std::cout <<
" x " << ydim <<
" x " << zdim <<
" ";
81 std::cout <<
" x" << ydim <<
" x 1 ";
83 std::cout <<
"x 1 x 1 ";
84 std::cout <<
" mesh)" << std::endl;
87 Teuchos::CommandLineProcessor tclp;
88 Galeri::Xpetra::Parameters<gno_t> params(tclp, xdim, ydim, zdim, problemType);
90 Teuchos::RCP<const Tpetra::Map<lno_t, gno_t> > map =
91 Teuchos::rcp(
new Tpetra::Map<lno_t, gno_t>(params.GetNumGlobalElements(), 0, comm));
94 Teuchos::RCP<Galeri::Xpetra::Problem<Tpetra::Map<lno_t, gno_t>,
95 Tpetra::CrsMatrix<scalar_t, lno_t, gno_t, nod_t>,
96 Tpetra::MultiVector<scalar_t, lno_t, gno_t> > > Pr=
97 Galeri::Xpetra::BuildProblem<scalar_t,
lno_t,
gno_t,
98 Tpetra::Map<lno_t, gno_t>,
99 Tpetra::CrsMatrix<scalar_t, lno_t, gno_t, nod_t>,
100 Tpetra::MultiVector<scalar_t, lno_t, gno_t, nod_t> >
101 (params.GetMatrixType(), map, params.GetParameterList());
103 M_ = Pr->BuildMatrix();
105 catch (std::exception &e) {
106 std::cout <<
"Error returned from Galeri " << e.what() << std::endl;
120 typedef typename adapter_type::user_t graph_type;
121 typedef typename graph_type::global_ordinal_type GO;
122 typedef typename graph_type::local_ordinal_type LO;
123 typedef typename graph_type::node_type NO;
124 typedef typename adapter_type::part_t PT;
126 using ordinal_view_t = Kokkos::View<GO*, typename NO::device_type>;
127 using part_view_t = Kokkos::View<PT*, typename NO::device_type>;
129 auto graph = adapter->getUserGraph();
130 auto rowMap = graph->getRowMap();
131 auto colMap = graph->getColMap();
133 size_t numLclRows = rowMap->getLocalNumElements();
134 size_t numGblRows = rowMap->getGlobalNumElements();
135 size_t numLclCols = colMap->getLocalNumElements();
138 ordinal_view_t colLocalToGlobal(Kokkos::view_alloc(
"colLocalToGlobal", Kokkos::WithoutInitializing), numLclCols);
139 auto colMapHost = Kokkos::create_mirror_view (Kokkos::HostSpace (), colLocalToGlobal);
140 for(
size_t i = 0; i < numLclCols; ++i)
141 colMapHost[i] = colMap->getGlobalElement(i);
142 Kokkos::deep_copy (colLocalToGlobal, colMapHost);
144 ordinal_view_t rowLocalToGlobal(Kokkos::view_alloc(
"rowLocalToGlobal", Kokkos::WithoutInitializing), numLclRows);
145 auto rowMapHost = Kokkos::create_mirror_view (Kokkos::HostSpace (), rowLocalToGlobal);
146 for(
size_t i = 0; i < numLclRows; ++i)
147 rowMapHost[i] = rowMap->getGlobalElement(i);
148 Kokkos::deep_copy (rowLocalToGlobal, rowMapHost);
150 part_view_t localParts(
"localParts", numGblRows);
151 part_view_t globalParts(
"globalParts", numGblRows);
152 auto localPartsHost = Kokkos::create_mirror_view(Kokkos::HostSpace(), localParts);
155 for(
size_t i = 0; i < numLclRows; i++){
156 GO gi = rowMap->getGlobalElement(i);
157 localPartsHost(gi) = parts[i];
159 Kokkos::deep_copy(localParts, localPartsHost);
161 auto comm = graph->getComm();
162 Teuchos::reduceAll<int, PT> (*comm, Teuchos::REDUCE_SUM, numGblRows, localParts.data(), globalParts.data());
164 auto rowPtr = graph->getLocalGraphHost().row_map;
165 auto colInd = graph->getLocalGraphHost().entries;
167 size_t localtotalcut = 0, totalcut = 0;
169 using execution_space =
typename NO::device_type::execution_space;
170 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
171 Kokkos::parallel_reduce(
"Compute cut", range_policy(0, numLclRows),
172 KOKKOS_LAMBDA(
const LO i,
size_t &cut){
174 const GO gRid = rowLocalToGlobal(i);
175 const PT gi = globalParts(gRid);
177 const size_t start = rowPtr(i);
178 const size_t end = rowPtr(i+1);
179 for(
size_t j = start; j < end; ++j) {
181 const GO gCid = colLocalToGlobal(colInd(j));
182 PT gj = globalParts(gCid);
188 Teuchos::reduceAll (*comm, Teuchos::REDUCE_SUM, 1, &localtotalcut, &totalcut);
191 auto rowPtr_h = Kokkos::create_mirror_view(rowPtr);
192 Kokkos::deep_copy(rowPtr_h, rowPtr);
195 size_t *partw =
new size_t[nparts];
196 size_t *partc =
new size_t[nparts];
198 size_t *gpartw =
new size_t[nparts];
199 size_t *gpartc =
new size_t[nparts];
201 for(
int i = 0; i < nparts; i++){
202 partw[i] = 0; partc[i] = 0;
203 gpartw[i] = 0; gpartc[i] = 0;
206 for(
size_t i = 0; i < numLclRows; i++){
207 partw[parts[i]] += rowPtr_h(i+1) - rowPtr_h(i) - 1;
211 Teuchos::reduceAll (*comm, Teuchos::REDUCE_SUM, nparts, partw, gpartw);
212 Teuchos::reduceAll (*comm, Teuchos::REDUCE_SUM, nparts, partc, gpartc);
214 size_t maxc = 0, totc = 0;
215 size_t maxw = 0, totw = 0;
217 for(
int i = 0; i < nparts; i++){
226 double imbw = (double)maxw/((
double)totw/nparts);
227 double imbc = (double)maxc/((
double)totc/nparts);
229 if(comm->getRank() == 0) {
231 std::cout <<
"\n\n************************************************" << std::endl;
232 std::cout <<
" EDGECUT: " << totalcut << std::endl;
233 std::cout <<
" MAX/AVG WEIGHT: " << imbw << std::endl;
234 std::cout <<
" MAX/AVG COUNT: " << imbc << std::endl;
235 std::cout <<
"************************************************\n\n" << std::endl;
245 typedef typename adapter_type::user_t graph_type;
246 typedef typename graph_type::global_ordinal_type GO;
247 typedef typename graph_type::local_ordinal_type LO;
248 typedef typename graph_type::node_type NO;
249 typedef typename adapter_type::part_t PT;
251 using ordinal_view_t = Kokkos::View<GO*, typename NO::device_type>;
252 using part_view_t = Kokkos::View<PT*, typename NO::device_type>;
254 auto graph = adapter->getUserGraph();
255 auto rowMap = graph->getRowMap();
256 auto colMap = graph->getColMap();
258 size_t numLclRows = rowMap->getNodeNumElements();
259 size_t numGblRows = rowMap->getGlobalNumElements();
260 size_t numLclCols = colMap->getNodeNumElements();
263 ordinal_view_t colLocalToGlobal(Kokkos::view_alloc(
"colLocalToGlobal", Kokkos::WithoutInitializing), numLclCols);
264 auto colMapHost = Kokkos::create_mirror_view (Kokkos::HostSpace (), colLocalToGlobal);
265 for(
size_t i = 0; i < numLclCols; ++i)
266 colMapHost[i] = colMap->getGlobalElement(i);
267 Kokkos::deep_copy (colLocalToGlobal, colMapHost);
269 ordinal_view_t rowLocalToGlobal(Kokkos::view_alloc(
"rowLocalToGlobal", Kokkos::WithoutInitializing), numLclRows);
270 auto rowMapHost = Kokkos::create_mirror_view (Kokkos::HostSpace (), rowLocalToGlobal);
271 for(
size_t i = 0; i < numLclRows; ++i)
272 rowMapHost[i] = rowMap->getGlobalElement(i);
273 Kokkos::deep_copy (rowLocalToGlobal, rowMapHost);
276 part_view_t localParts(Kokkos::view_alloc(
"localParts", Kokkos::WithoutInitializing), numGblRows);
277 part_view_t globalParts(
"globalParts", numGblRows);
278 auto localPartsHost = Kokkos::create_mirror_view(Kokkos::HostSpace(), localParts);
281 for(LO i = 0; i < numLclRows; i++){
283 GO gi = rowMap->getGlobalElement(i);
284 localPartsHost(gi) = parts[i];
286 Kokkos::deep_copy(localParts, localPartsHost);
288 auto comm = graph->getComm();
289 Teuchos::reduceAll<int, PT> (*comm, Teuchos::REDUCE_SUM, numGblRows, localParts.data(), globalParts.data());
291 auto rowPtr = graph->getLocalGraphHost().row_map;
292 auto colInd = graph->getLocalGraphHost().entries;
294 size_t localtotalcut = 0, totalcut = 0;
296 using execution_space =
typename NO::device_type::execution_space;
297 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
298 Kokkos::parallel_reduce(
"Compute cut", range_policy(0, numLclRows),
299 KOKKOS_LAMBDA(
const LO i,
size_t &cut){
301 const GO gRid = rowLocalToGlobal(i);
302 const PT gi = globalParts(gRid);
304 const size_t start = rowPtr(i);
305 const size_t end = rowPtr(i+1);
306 for(
size_t j = start; j < end; ++j) {
308 const GO gCid = colLocalToGlobal(colInd(j));
309 PT gj = globalParts(gCid);
315 Teuchos::reduceAll (*comm, Teuchos::REDUCE_SUM, 1, &localtotalcut, &totalcut);
317 if(comm->getRank() == 0) {
318 std::cout <<
"\n\n************************************************" << std::endl;
319 std::cout <<
" EDGECUT: " << totalcut << std::endl;
320 std::cout <<
"************************************************\n\n" << std::endl;
330 Tpetra::ScopeGuard tpetraScope (&narg, &arg);
333 const Teuchos::RCP<const Teuchos::Comm<int>> pComm= Tpetra::getDefaultComm();
335 int me = pComm->getRank();
339 int max_iters = 1000;
342 std::string matrix_file =
"";
343 std::string vector_file =
"";
344 std::string eigensolve =
"LOBPCG";
345 bool parmetis =
false;
350 std::string ptype =
"";
351 std::string prec =
"";
352 std::string init =
"";
357 for (
int i = 0; i < narg; i++){
358 std::cout << arg[i] <<
" ";
360 std::cout << std::endl;
363 Teuchos::CommandLineProcessor cmdp(
false,
true);
364 cmdp.setOption(
"matrix_file",&matrix_file,
365 "Path and filename of the matrix to be read.");
366 cmdp.setOption(
"vector_file",&vector_file,
367 "Path and filename of the vector to be read.");
368 cmdp.setOption(
"nparts",&nparts,
369 "Number of global parts desired in the resulting partition.");
370 cmdp.setOption(
"rand_seed",&rand_seed,
371 "Seed for the random multivector.");
372 cmdp.setOption(
"max_iters",&max_iters,
373 "Maximum iters (LOBPCG) or mulitplies by A (randomized).");
374 cmdp.setOption(
"block_size",&block_size,
375 "Block size (LOBPCG) or number of vectors l (randomized).");
376 cmdp.setOption(
"verbosity", &verbosity,
378 cmdp.setOption(
"parmetis",
"sphynx", &parmetis,
379 "Whether to use parmetis.");
380 cmdp.setOption(
"pulp",
"sphynx", &pulp,
381 "Whether to use pulp.");
382 cmdp.setOption(
"prec", &prec,
383 "Prec type to use.");
386 cmdp.setOption(
"prob", &ptype,
387 "Problem type to use. Options are combinatorial, normalized or generalized.");
388 cmdp.setOption(
"tol", &tol,
389 "Tolerance to use.");
390 cmdp.setOption(
"init", &init,
391 "Sphynx Initial guess. Options: random or constants. Default: random if randomized solver is used.");
393 if (cmdp.parse(narg,arg)!=Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
399 std::cout <<
"matrix file = " << matrix_file << std::endl;
400 std::cout <<
"vector file = " << vector_file << std::endl;
401 std::cout <<
"nparts = " << nparts << std::endl;
402 std::cout <<
"verbosity = " << verbosity << std::endl;
403 std::cout <<
"parmetis = " << parmetis << std::endl;
404 std::cout <<
"pulp = " << pulp << std::endl;
405 std::cout <<
"prec = " << prec << std::endl;
406 std::cout <<
"eigensolver = " << eigensolve << std::endl;
407 std::cout <<
"prob = " << ptype << std::endl;
408 std::cout <<
"tol = " << tol << std::endl;
409 std::cout <<
"init = " << init << std::endl;
412 using scalar_type = Tpetra::Details::DefaultTypes::scalar_type;
413 using local_ordinal_type = Tpetra::Details::DefaultTypes::local_ordinal_type;
414 using global_ordinal_type = Tpetra::Details::DefaultTypes::global_ordinal_type;
415 using node_type = Tpetra::Details::DefaultTypes::node_type;
417 using crs_matrix_type = Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type>;
418 using map_type = Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>;
419 using mv_type = Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>;
424 std::srand(rand_seed);
427 Teuchos::RCP<adapter_type> adapter;
428 Teuchos::RCP<crs_matrix_type> tmatrix;
430 std::string mtx =
".mtx", lc =
".largestComp";
431 if(std::equal(lc.rbegin(), lc.rend(), matrix_file.rbegin())) {
434 std::cout <<
"Used reader for Largest Comp." << std::endl;
437 else if(std::equal(mtx.rbegin(), mtx.rend(), matrix_file.rbegin())) {
438 typedef Tpetra::MatrixMarket::Reader<crs_matrix_type> reader_type;
440 tmatrix = r.readSparseFile(matrix_file, pComm);
442 std::cout <<
"Used standard Matrix Market reader." << std::endl;
447 if(matrix_file ==
"200")
449 else if(matrix_file ==
"400")
452 (meshdim, meshdim, meshdim,
"Brick3D", pComm, tmatrix);
455 std::cout <<
"Generated Brick3D matrix." << std::endl;
459 std::cout <<
"Done with reading/creating the matrix." << std::endl;
462 Teuchos::RCP<const map_type> map = tmatrix->getMap();
464 Teuchos::RCP<mv_type> V;
465 if (vector_file !=
""){
466 V = Tpetra::MatrixMarket::Reader<mv_type >::readDenseFile(vector_file,pComm,map);
468 std::cout <<
"Done with reading user-provided eigenvectors." << std::endl;
471 adapter = Teuchos::rcp(
new adapter_type(tmatrix->getCrsGraph(), 1));
472 adapter->setVertexWeightIsDegree(0);
475 Teuchos::RCP<Teuchos::ParameterList> params = Teuchos::rcp(
new Teuchos::ParameterList());
476 Teuchos::RCP<Teuchos::ParameterList> sphynxParams(
new Teuchos::ParameterList);
477 params->set(
"num_global_parts", nparts);
479 Teuchos::RCP<Teuchos::StackedTimer> stacked_timer;
480 stacked_timer = Teuchos::rcp(
new Teuchos::StackedTimer(
"SphynxDriver"));
481 Teuchos::TimeMonitor::setStackedTimer(stacked_timer);
482 if(parmetis || pulp) {
484 params->set(
"partitioning_approach",
"partition");
485 params->set(
"imbalance_tolerance", 1.01);
487 params->set(
"algorithm",
"parmetis");
488 params->set(
"imbalance_tolerance", 1.01);
491 params->set(
"algorithm",
"pulp");
492 params->set(
"pulp_vert_imbalance", 1.01);
496 Teuchos::RCP<problem_type> problem;
499 Teuchos::TimeMonitor t1(*Teuchos::TimeMonitor::getNewTimer(
"Partitioning::All"));
501 Teuchos::TimeMonitor t2(*Teuchos::TimeMonitor::getNewTimer(
"Partitioning::Problem"));
502 problem = Teuchos::rcp(
new problem_type(adapter.getRawPtr(), params.getRawPtr(), sphynxParams, Tpetra::getDefaultComm()));
505 Teuchos::TimeMonitor t3(*Teuchos::TimeMonitor::getNewTimer(
"Partitioning::Solve"));
517 sphynxParams->set(
"sphynx_verbosity", verbosity);
518 sphynxParams->set(
"sphynx_max_iterations", max_iters);
520 sphynxParams->set(
"sphynx_block_size", block_size);
522 sphynxParams->set(
"sphynx_skip_preprocessing",
true);
524 if (ptype !=
"") sphynxParams->set(
"sphynx_problem_type", ptype);
525 if (init !=
"") sphynxParams->set(
"sphynx_initial_guess", init);
526 if (prec !=
"") sphynxParams->set(
"sphynx_preconditioner_type", prec);
527 if (tol != -1) sphynxParams->set(
"sphynx_tolerance", tol);
530 Teuchos::RCP<problem_type> problem;
533 Teuchos::TimeMonitor t1b(*Teuchos::TimeMonitor::getNewTimer(
"Partitioning::All"));
535 Teuchos::TimeMonitor t2b(*Teuchos::TimeMonitor::getNewTimer(
"Partitioning::Problem"));
536 problem = Teuchos::rcp(
new problem_type(adapter.get(), params.get(), sphynxParams, Tpetra::getDefaultComm()));
539 Teuchos::TimeMonitor t3b(*Teuchos::TimeMonitor::getNewTimer(
"Partitioning::Solve"));
540 if (vector_file ==
""){
542 std::cout << eigensolve <<
"will be used to solve the partitioning problem." << std::endl;
546 std::cout <<
"Problem to be partitioned with user-provided eigenvectors." << std::endl;
547 problem->setUserEigenvectors(V);
564 stacked_timer->stopBaseTimer();
566 Teuchos::RCP<Teuchos::FancyOStream> fancy2 = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
567 Teuchos::FancyOStream& out2 = *fancy2;
568 Teuchos::StackedTimer::OutputOptions options;
569 options.output_fraction = options.output_histogram = options.output_minmax =
true;
570 stacked_timer->report(out2, pComm, options);
572 Teuchos::TimeMonitor::summarize();