Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// Amesos2: Templated Direct Sparse Solver Package
6// Copyright 2011 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ***********************************************************************
41//
42// @HEADER
43
51
52#ifndef AMESOS2_SUPERLUDIST_DEF_HPP
53#define AMESOS2_SUPERLUDIST_DEF_HPP
54
55#include <Teuchos_Tuple.hpp>
56#include <Teuchos_StandardParameterEntryValidators.hpp>
57#include <Teuchos_DefaultMpiComm.hpp>
58#include <Teuchos_Details_MpiTypeTraits.hpp>
59
62#include "Amesos2_Util.hpp"
63
64
65namespace Amesos2 {
66
67
68 template <class Matrix, class Vector>
69 Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
70 Teuchos::RCP<Vector> X,
71 Teuchos::RCP<const Vector> B)
72 : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
73 , bvals_()
74 , xvals_()
75 , in_grid_(false)
76 , force_symbfact_(false)
77 , is_contiguous_(true)
78 {
79 using Teuchos::Comm;
80 // It's OK to depend on MpiComm explicitly here, because
81 // SuperLU_DIST requires MPI anyway.
82 using Teuchos::MpiComm;
83 using Teuchos::outArg;
84 using Teuchos::ParameterList;
85 using Teuchos::parameterList;
86 using Teuchos::RCP;
87 using Teuchos::rcp;
88 using Teuchos::rcp_dynamic_cast;
89 using Teuchos::REDUCE_SUM;
90 using Teuchos::reduceAll;
91 typedef global_ordinal_type GO;
92 typedef Tpetra::Map<local_ordinal_type, GO, node_type> map_type;
93
95 // Set up the SuperLU_DIST processor grid //
97
98 RCP<const Comm<int> > comm = this->getComm ();
99 const int myRank = comm->getRank ();
100 const int numProcs = comm->getSize ();
101
102 SLUD::int_t nprow, npcol;
103 get_default_grid_size (numProcs, nprow, npcol);
104
105 {
106 // FIXME (mfh 16 Dec 2014) getComm() just returns
107 // matrixA_->getComm(), so it's not clear why we need to ask for
108 // the matrix's communicator separately here.
109 RCP<const Comm<int> > matComm = this->matrixA_->getComm ();
110 TEUCHOS_TEST_FOR_EXCEPTION(
111 matComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
112 "constructor: The matrix's communicator is null!");
113 RCP<const MpiComm<int> > matMpiComm =
114 rcp_dynamic_cast<const MpiComm<int> > (matComm);
115 // FIXME (mfh 16 Dec 2014) If the matrix's communicator is a
116 // SerialComm, we probably could just use MPI_COMM_SELF here.
117 // I'm not sure if SuperLU_DIST is smart enough to handle that
118 // case, though.
119 TEUCHOS_TEST_FOR_EXCEPTION(
120 matMpiComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
121 "constructor: The matrix's communicator is not an MpiComm!");
122 TEUCHOS_TEST_FOR_EXCEPTION(
123 matMpiComm->getRawMpiComm ().is_null (), std::logic_error, "Amesos2::"
124 "Superlustdist constructor: The matrix's communicator claims to be a "
125 "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
126 "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
127 "exist, which likely implies that the Teuchos::MpiComm was constructed "
128 "incorrectly. It means something different than if the MPI_Comm were "
129 "MPI_COMM_NULL.");
130 MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
131 data_.mat_comm = rawMpiComm;
132 // This looks a bit like ScaLAPACK's grid initialization (which
133 // technically takes place in the BLACS, not in ScaLAPACK
134 // proper). See http://netlib.org/scalapack/slug/node34.html.
135 // The main difference is that SuperLU_DIST depends explicitly
136 // on MPI, while the BLACS hides its communication protocol.
137 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
138 }
139
141 // Set some default parameters. //
142 // //
143 // Must do this after grid has been created in //
144 // case user specifies the nprow and npcol parameters //
146 SLUD::set_default_options_dist(&data_.options);
147
148 RCP<ParameterList> default_params =
149 parameterList (* (this->getValidParameters ()));
150 this->setParameters (default_params);
151
152 // Set some internal options
153 data_.options.Fact = SLUD::DOFACT;
154 data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
155 data_.options.SolveInitialized = SLUD::NO;
156 data_.options.RefineInitialized = SLUD::NO;
157 data_.rowequ = false;
158 data_.colequ = false;
159 data_.perm_r.resize(this->globalNumRows_);
160 data_.perm_c.resize(this->globalNumCols_);
161 data_.largediag_mc64_job = 4;
162 for (global_size_type i = 0; i < this->globalNumRows_; i++)
163 data_.perm_r[i] = i;
164 for (global_size_type i = 0; i < this->globalNumCols_; i++)
165 data_.perm_c[i] = i;
166
168 // Set up a communicator for the parallel column ordering and //
169 // parallel symbolic factorization. //
171 data_.symb_comm = MPI_COMM_NULL;
172
173 // domains is the next power of 2 less than nprow*npcol. This
174 // value will be used for creating an MPI communicator for the
175 // pre-ordering and symbolic factorization methods.
176 data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
177
178 const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
179 MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
180
182 // Set up a row Map that only includes processes that are in the
183 // SuperLU process grid. This will be used for redistributing A.
185
186 // mfh 16 Dec 2014: We could use createWeightedContigMapWithNode
187 // with myProcParticipates as the weight, but that costs an extra
188 // all-reduce.
189
190 // Set to 1 if I am in the grid, and I get some of the matrix rows.
191 int myProcParticipates = 0;
192 if (myRank < nprow * npcol) {
193 in_grid_ = true;
194 myProcParticipates = 1;
195 }
196
197 // Compute how many processes in the communicator belong to the
198 // process grid.
199 int numParticipatingProcs = 0;
200 reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
201 outArg (numParticipatingProcs));
202 TEUCHOS_TEST_FOR_EXCEPTION(
203 this->globalNumRows_ != 0 && numParticipatingProcs == 0,
204 std::logic_error, "Amesos2::Superludist constructor: The matrix has "
205 << this->globalNumRows_ << " > 0 global row(s), but no processes in the "
206 "communicator want to participate in its factorization! nprow = "
207 << nprow << " and npcol = " << npcol << ".");
208
209 // Divide up the rows among the participating processes.
210 size_t myNumRows = 0;
211 {
212 const GO GNR = static_cast<GO> (this->globalNumRows_);
213 const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
214 GNR / static_cast<GO> (numParticipatingProcs);
215 const GO remainder =
216 GNR - quotient * static_cast<GO> (numParticipatingProcs);
217 const GO lclNumRows = (static_cast<GO> (myRank) < remainder) ?
218 (quotient + static_cast<GO> (1)) : quotient;
219 myNumRows = static_cast<size_t> (lclNumRows);
220 }
221
222 // TODO: might only need to initialize if parallel symbolic factorization is requested.
223 const GO indexBase = this->matrixA_->getRowMap ()->getIndexBase ();
225 rcp (new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
226
228 // Do some other initialization //
230
231 data_.A.Store = NULL;
232 function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
233 SLUD::PStatInit(&(data_.stat));
234 // We do not use ScalePermstructInit because we will use our own
235 // arrays for storing perm_r and perm_c
236 data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
237 data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
238 }
239
240
241 template <class Matrix, class Vector>
243 {
244 /* Free SuperLU_DIST data_types
245 * - Matrices
246 * - Vectors
247 * - Stat object
248 * - ScalePerm, LUstruct, grid, and solve objects
249 *
250 * Note: the function definitions are the same regardless whether
251 * complex or real, so we arbitrarily use the D namespace
252 */
253 if ( this->status_.getNumPreOrder() > 0 ){
255#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
256 SUPERLU_FREE( data_.sizes );
257 SUPERLU_FREE( data_.fstVtxSep );
258#else
259 free( data_.sizes );
260 free( data_.fstVtxSep );
261#endif
262 }
263
264 // Cleanup old matrix store memory if it's non-NULL. Our
265 // Teuchos::Array's will destroy rowind, colptr, and nzval for us
266 if( data_.A.Store != NULL ){
267 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
268 }
269
270 // LU data is initialized in numericFactorization_impl()
271 if ( this->status_.getNumNumericFact() > 0 ){
272 function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
273 }
274 function_map::LUstructFree(&(data_.lu));
275
276 // If a symbolic factorization is ever performed without a
277 // follow-up numericfactorization, there are some arrays in the
278 // Pslu_freeable struct which will never be free'd by
279 // SuperLU_DIST.
280 if ( this->status_.symbolicFactorizationDone() &&
281 !this->status_.numericFactorizationDone() ){
282 if ( data_.pslu_freeable.xlsub != NULL ){
283#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
284 SUPERLU_FREE( data_.pslu_freeable.xlsub );
285 SUPERLU_FREE( data_.pslu_freeable.lsub );
286#else
287 free( data_.pslu_freeable.xlsub );
288 free( data_.pslu_freeable.lsub );
289#endif
290 }
291 if ( data_.pslu_freeable.xusub != NULL ){
292#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
293 SUPERLU_FREE( data_.pslu_freeable.xusub );
294 SUPERLU_FREE( data_.pslu_freeable.usub );
295#else
296 free( data_.pslu_freeable.xusub );
297 free( data_.pslu_freeable.usub );
298#endif
299 }
300 if ( data_.pslu_freeable.supno_loc != NULL ){
301#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
302 SUPERLU_FREE( data_.pslu_freeable.supno_loc );
303 SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
304 SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
305#else
306 free( data_.pslu_freeable.supno_loc );
307 free( data_.pslu_freeable.xsup_beg_loc );
308 free( data_.pslu_freeable.xsup_end_loc );
309#endif
310 }
311#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
312 SUPERLU_FREE( data_.pslu_freeable.globToLoc );
313#else
314 free( data_.pslu_freeable.globToLoc );
315#endif
316 }
317
318 SLUD::PStatFree( &(data_.stat) ) ;
319
320 // Teuchos::Arrays will free R, C, perm_r, and perm_c
321 // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
322
323 if ( data_.options.SolveInitialized == SLUD::YES )
324 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
325
326 // gridexit of an older version frees SuperLU_MPI_DOUBLE_COMPLE,
327 // which could cause an issue if there are still active instances of superludist?
328 SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
329 // cases where grid
330 // wouldn't be initialized?
331
332 if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
333 }
334
335 template<class Matrix, class Vector>
336 void
338 {
339 int job = data_.largediag_mc64_job;
340 if (job == 5)
341 {
342 data_.R1.resize(data_.A.nrow);
343 data_.C1.resize(data_.A.ncol);
344 }
345
346 SLUD::NCformat *GAstore = (SLUD::NCformat*) GA.Store;
347 SLUD::int_t* colptr = GAstore->colptr;
348 SLUD::int_t* rowind = GAstore->rowind;
349 SLUD::int_t nnz = GAstore->nnz;
350 slu_type *a_GA = (slu_type *) GAstore->nzval;
351 MPI_Datatype mpi_dtype = Teuchos::Details::MpiTypeTraits<magnitude_type>::getType();
352 MPI_Datatype mpi_itype = Teuchos::Details::MpiTypeTraits<SLUD::int_t>::getType();
353
354 int iinfo = 0;
355 if ( !data_.grid.iam ) { /* Process 0 finds a row permutation */
356 iinfo = function_map::ldperm_dist(job, data_.A.nrow, nnz, colptr, rowind, a_GA,
357 data_.perm_r.getRawPtr(), data_.R1.getRawPtr(), data_.C1.getRawPtr());
358
359 MPI_Bcast( &iinfo, 1, MPI_INT, 0, data_.grid.comm );
360 if ( iinfo == 0 ) {
361 MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
362 if ( job == 5 && data_.options.Equil ) {
363 MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
364 MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
365 }
366 }
367 } else {
368 MPI_Bcast( &iinfo, 1, mpi_int_t, 0, data_.grid.comm );
369 if ( iinfo == 0 ) {
370 MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
371 if ( job == 5 && data_.options.Equil ) {
372 MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
373 MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
374 }
375 }
376 }
377 TEUCHOS_TEST_FOR_EXCEPTION( iinfo != 0,
378 std::runtime_error,
379 "SuperLU_DIST pre-ordering failed to compute row perm with "
380 << iinfo << std::endl);
381
382 if (job == 5)
383 {
384 for (SLUD::int_t i = 0; i < data_.A.nrow; ++i) data_.R1[i] = exp(data_.R1[i]);
385 for (SLUD::int_t i = 0; i < data_.A.ncol; ++i) data_.C1[i] = exp(data_.C1[i]);
386 }
387 }
388
389
390 template<class Matrix, class Vector>
391 int
393 {
394 if (data_.options.RowPerm == SLUD::NOROWPERM) {
395 SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
396 for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
397 }
398 else if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
399 if (!force_symbfact_)
400 // defer to numerical factorization because row permutation requires the matrix values
401 return (EXIT_SUCCESS + 1);
402 }
403 // loadA_impl(); // Refresh matrix values
404
405 if( in_grid_ ){
406 // If this function has been called at least once, then the
407 // sizes, and fstVtxSep arrays were allocated in
408 // get_perm_c_parmetis. Delete them before calling that
409 // function again. These arrays will also be dealloc'd in the
410 // deconstructor.
411 if( this->status_.getNumPreOrder() > 0 ){
412#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
413 SUPERLU_FREE( data_.sizes );
414 SUPERLU_FREE( data_.fstVtxSep );
415#else
416 free( data_.sizes );
417 free( data_.fstVtxSep );
418#endif
419 }
420 float info = 0.0;
421 {
422#ifdef HAVE_AMESOS2_TIMERS
423 Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
424#endif
425 info = SLUD::get_perm_c_parmetis( &(data_.A),
426 data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
427 data_.grid.nprow * data_.grid.npcol, data_.domains,
428 &(data_.sizes), &(data_.fstVtxSep),
429 &(data_.grid), &(data_.symb_comm) );
430 }
431 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
432 std::runtime_error,
433 "SuperLU_DIST pre-ordering ran out of memory after allocating "
434 << info << " bytes of memory" );
435 }
436
437 // Ordering will be applied directly before numeric factorization,
438 // after we have a chance to get updated coefficients from the
439 // matrix
440
441 return EXIT_SUCCESS;
442 }
443
444
445
446 template <class Matrix, class Vector>
447 int
449 {
450 // loadA_impl(); // Refresh matrix values
451 if (!force_symbfact_) {
452 if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
453 // defer to numerical factorization because row permutation requires the matrix values
454 return (EXIT_SUCCESS + 1);
455 }
456 }
457
458 if( in_grid_ ){
459
460 float info = 0.0;
461 {
462#ifdef HAVE_AMESOS2_TIMERS
463 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
464#endif
465
466#if (SUPERLU_DIST_MAJOR_VERSION > 7)
467 info = SLUD::symbfact_dist(&(data_.options), (data_.grid.nprow) * (data_.grid.npcol),
468 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
469 data_.perm_r.getRawPtr(), data_.sizes,
470 data_.fstVtxSep, &(data_.pslu_freeable),
471 &(data_.grid.comm), &(data_.symb_comm),
472 &(data_.mem_usage));
473
474#else
475 info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
476 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
477 data_.perm_r.getRawPtr(), data_.sizes,
478 data_.fstVtxSep, &(data_.pslu_freeable),
479 &(data_.grid.comm), &(data_.symb_comm),
480 &(data_.mem_usage));
481#endif
482 }
483 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
484 std::runtime_error,
485 "SuperLU_DIST symbolic factorization ran out of memory after"
486 " allocating " << info << " bytes of memory" );
487 }
488 same_symbolic_ = false;
489 same_solve_struct_ = false;
490
491 return EXIT_SUCCESS;
492 }
493
494
495 template <class Matrix, class Vector>
496 int
498 using Teuchos::as;
499
500 // loadA_impl(); // Refresh the matrix values
501 SLUD::SuperMatrix GA; /* Global A in NC format */
502 bool need_value = false;
503
504 if( in_grid_ ) {
505 if( data_.options.Equil == SLUD::YES ) {
506 SLUD::int_t info = 0;
507
508 // Compute scaling
509 data_.R.resize(this->globalNumRows_);
510 data_.C.resize(this->globalNumCols_);
511 function_map::gsequ_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
512 &(data_.rowcnd), &(data_.colcnd), &(data_.amax), &info, &(data_.grid));
513
514 // Apply the scalings
515 function_map::laqgs_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
516 data_.rowcnd, data_.colcnd, data_.amax,
517 &(data_.equed));
518
519 data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
520 data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
521
522 // Compute and apply the row permutation
523 if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
524 // Create a column-order copy of A
525 need_value = true;
526 SLUD::D::pdCompRow_loc_to_CompCol_global(true, &data_.A, &data_.grid, &GA);
527
528 // Compute row permutation
530
531 // Here we do symbolic factorization
532 force_symbfact_ = true;
535 force_symbfact_ = false;
536
537 // Apply row-permutation scaling for job=5
538 // Here we do it manually to bypass the threshold check in laqgs_loc
539 if (data_.largediag_mc64_job == 5)
540 {
541 SLUD::NRformat_loc *Astore = (SLUD::NRformat_loc*) data_.A.Store;
542 slu_type *a = (slu_type*) Astore->nzval;
543 SLUD::int_t m_loc = Astore->m_loc;
544 SLUD::int_t fst_row = Astore->fst_row;
545 SLUD::int_t i, j, irow = fst_row, icol;
546
547 /* Scale the distributed matrix further.
548 A <-- diag(R1)*A*diag(C1) */
549 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
550 for (j = 0; j < m_loc; ++j) {
551 for (i = rowptr_view_.data()[j]; i < rowptr_view_.data()[j+1]; ++i) {
552 icol = colind_view_.data()[i];
553 a[i] = mult_op(a[i], data_.R1[irow] * data_.C1[icol]);
554 }
555 ++irow;
556 }
557
558 /* Multiply together the scaling factors */
559 if ( data_.rowequ ) for (i = 0; i < data_.A.nrow; ++i) data_.R[i] *= data_.R1[i];
560 else for (i = 0; i < data_.A.nrow; ++i) data_.R[i] = data_.R1[i];
561 if ( data_.colequ ) for (i = 0; i < data_.A.ncol; ++i) data_.C[i] *= data_.C1[i];
562 else for (i = 0; i < data_.A.ncol; ++i) data_.C[i] = data_.C1[i];
563
564 data_.rowequ = data_.colequ = 1;
565 }
566 }
567 }
568
569 // Apply the column ordering, so that AC is the column-permuted A, and compute etree
570 size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
571 for( size_t i = 0; i < nnz_loc; ++i ) colind_view_(i) = data_.perm_c[colind_view_(i)];
572
573 // Distribute data from the symbolic factorization
574 if( same_symbolic_ ){
575 // Note: with the SamePattern_SameRowPerm options, it does not
576 // matter that the glu_freeable member has never been
577 // initialized, because it is never accessed. It is a
578 // placeholder arg. The real work is done in data_.lu
579#if (SUPERLU_DIST_MAJOR_VERSION > 7)
580 data_.options.Fact = SLUD::SamePattern_SameRowPerm;
581 function_map::pdistribute(&(data_.options),
582 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
583 &(data_.A), &(data_.scale_perm),
584 &(data_.glu_freeable), &(data_.lu),
585 &(data_.grid));
586#else
587 function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
588 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
589 &(data_.A), &(data_.scale_perm),
590 &(data_.glu_freeable), &(data_.lu),
591 &(data_.grid));
592#endif
593 } else {
594#if (SUPERLU_DIST_MAJOR_VERSION > 7)
595 data_.options.Fact = SLUD::DOFACT;
596 function_map::dist_psymbtonum(&(data_.options),
597 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
598 &(data_.A), &(data_.scale_perm),
599 &(data_.pslu_freeable), &(data_.lu),
600 &(data_.grid));
601#else
602 function_map::dist_psymbtonum(SLUD::DOFACT,
603 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
604 &(data_.A), &(data_.scale_perm),
605 &(data_.pslu_freeable), &(data_.lu),
606 &(data_.grid));
607#endif
608 }
609
610 // Retrieve the normI of A (required by gstrf).
611 bool notran = (data_.options.Trans == SLUD::NOTRANS);
612 magnitude_type anorm = function_map::plangs((notran ? (char *)"1" : (char *)"I"), &(data_.A), &(data_.grid));
613
614 int info = 0;
615 {
616#ifdef HAVE_AMESOS2_TIMERS
617 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
618#endif
619 function_map::gstrf(&(data_.options), this->globalNumRows_,
620 this->globalNumCols_, anorm, &(data_.lu),
621 &(data_.grid), &(data_.stat), &info);
622 }
623
624 // Check output
625 TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
626 std::runtime_error,
627 "L and U factors have been computed but U("
628 << info << "," << info << ") is exactly zero "
629 "(i.e. U is singular)");
630 }
631
632 if (need_value)
633 SLUD::Destroy_CompCol_Matrix_dist(&GA);
634
635 // The other option, that info_st < 0, denotes invalid parameters
636 // to the function, but we'll assume for now that that won't
637 // happen.
638
639 data_.options.Fact = SLUD::FACTORED;
640 same_symbolic_ = true;
641
642 return EXIT_SUCCESS;
643 }
644
645
646 template <class Matrix, class Vector>
647 int
649 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
650 {
651 using Teuchos::as;
652
653 // local_len_rhs is how many of the multivector rows belong to
654 // this processor in the SuperLU_DIST processor grid.
655 const size_t local_len_rhs = superlu_rowmap_->getLocalNumElements();
656 const global_size_type nrhs = X->getGlobalNumVectors();
657 const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
658
659 // make sure our multivector storage is sized appropriately
660 bvals_.resize(nrhs * local_len_rhs);
661 xvals_.resize(nrhs * local_len_rhs);
662
663 // We assume the global length of the two vectors have already been
664 // checked for compatibility
665
666 { // get the values from B
667#ifdef HAVE_AMESOS2_TIMERS
668 Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
669#endif
670 {
671 // The input dense matrix for B should be distributed in the
672 // same manner as the superlu_dist matrix. That is, if a
673 // processor has m_loc rows of A, then it should also have
674 // m_loc rows of B (and the same rows). We accomplish this by
675 // distributing the multivector rows with the same Map that
676 // the matrix A's rows are distributed.
677#ifdef HAVE_AMESOS2_TIMERS
678 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
679#endif
680 // get grid-distributed mv data. The multivector data will be
681 // distributed across the processes in the SuperLU_DIST grid.
682 typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
683 copy_helper::do_get(B,
684 bvals_(),
685 local_len_rhs,
686 Teuchos::ptrInArg(*superlu_rowmap_));
687 }
688 } // end block for conversion time
689
690 if( in_grid_ ){
691 // if( data_.options.trans == SLUD::NOTRANS ){
692 // if( data_.rowequ ){ // row equilibration has been done on AC
693 // // scale bxvals_ by diag(R)
694 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
695 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
696 // }
697 // } else if( data_.colequ ){ // column equilibration has been done on AC
698 // // scale bxvals_ by diag(C)
699 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
700 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
701 // }
702
703 // Initialize the SOLVEstruct_t.
704 //
705 // We are able to reuse the solve struct if we have not changed
706 // the sparsity pattern of L and U since the last solve
707 if( !same_solve_struct_ ){
708 if( data_.options.SolveInitialized == SLUD::YES ){
709 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
710 }
711 function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
712 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
713 &(data_.grid), &(data_.solve_struct));
714 // Flag that we can reuse this solve_struct unless another
715 // symbolicFactorization is called between here and the next
716 // solve.
717 same_solve_struct_ = true;
718 }
719
720 // Apply row-scaling if requested
721 if (data_.options.Equil == SLUD::YES && data_.rowequ) {
722 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
723 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
724 for(global_size_type j = 0; j < nrhs; ++j) {
725 for(size_t i = 0; i < local_len_rhs; ++i) {
726 bvals_[i + j*ld] = mult_op(bvals_[i + j*ld], data_.R[first_global_row_b + i]);
727 }
728 }
729 }
730
731 // Solve
732 int ierr = 0; // returned error code
733 {
734#ifdef HAVE_AMESOS2_TIMERS
735 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
736#endif
737
738#if (SUPERLU_DIST_MAJOR_VERSION > 7)
739 function_map::gstrs(&(data_.options), as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
740 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
741 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
742 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
743 &(data_.solve_struct), &(data_.stat), &ierr);
744#else
745 function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
746 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
747 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
748 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
749 &(data_.solve_struct), &(data_.stat), &ierr);
750#endif
751 } // end block for solve time
752
753 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
754 std::runtime_error,
755 "Argument " << -ierr << " to gstrs had an illegal value" );
756
757 // "Un-scale" the solution so that it is a solution of the original system
758 // if( data_.options.trans == SLUD::NOTRANS ){
759 // if( data_.colequ ){ // column equilibration has been done on AC
760 // // scale bxvals_ by diag(C)
761 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
762 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
763 // }
764 // } else if( data_.rowequ ){ // row equilibration has been done on AC
765 // // scale bxvals_ by diag(R)
766 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
767 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
768 // }
769 { // permute B to a solution of the original system
770#ifdef HAVE_AMESOS2_TIMERS
771 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
772#endif
773 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
774 function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
775 as<SLUD::int_t>(local_len_rhs),
776 data_.solve_struct.row_to_proc,
777 data_.solve_struct.inv_perm_c,
778 bvals_.getRawPtr(), ld,
779 xvals_.getRawPtr(), ld,
780 as<int>(nrhs),
781 &(data_.grid));
782 }
783
784 // Apply col-scaling if requested
785 if (data_.options.Equil == SLUD::YES && data_.colequ) {
786 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
787 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
788 for(global_size_type j = 0; j < nrhs; ++j) {
789 for(size_t i = 0; i < local_len_rhs; ++i) {
790 xvals_[i + j*ld] = mult_op(xvals_[i + j*ld], data_.C[first_global_row_b + i]);
791 }
792 }
793 }
794 }
795
796 /* Update X's global values */
797 {
798#ifdef HAVE_AMESOS2_TIMERS
799 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
800#endif
801 typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
802 put_helper::do_put(X,
803 xvals_(),
804 local_len_rhs,
805 Teuchos::ptrInArg(*superlu_rowmap_));
806 }
807
808 return EXIT_SUCCESS;
809 }
810
811
812 template <class Matrix, class Vector>
813 bool
815 {
816 // SuperLU_DIST requires square matrices
817 return( this->globalNumRows_ == this->globalNumCols_ );
818 }
819
820
821 template <class Matrix, class Vector>
822 void
823 Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
824 {
825 using Teuchos::as;
826 using Teuchos::RCP;
827 using Teuchos::getIntegralValue;
828 using Teuchos::ParameterEntryValidator;
829
830 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
831
832 if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
833 TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
834 parameterList->isParameter("npcol")),
835 std::invalid_argument,
836 "nprow and npcol must be set together" );
837
838 SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
839 SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
840
841 TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
842 std::invalid_argument,
843 "nprow and npcol combination invalid" );
844
845 if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
846 // De-allocate the default grid that was initialized in the constructor
847 SLUD::superlu_gridexit(&(data_.grid));
848 // Create a new grid
849 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
850 } // else our grid has not changed size since the last initialization
851 }
852
853 TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
854 std::invalid_argument,
855 "SuperLU_DIST does not support solving the tranpose system" );
856
857 data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
858
859 // Equilbration option
860 bool equil = parameterList->get<bool>("Equil", false);
861 data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
862
863 if( parameterList->isParameter("RowPerm") ){
864 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
865 parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
866
867 data_.options.RowPerm = getIntegralValue<SLUD::rowperm_t>(*parameterList, "RowPerm");
868 }
869
870 if( parameterList->isParameter("LargeDiag_MC64-Options") ){
871 data_.largediag_mc64_job = parameterList->template get<int>("LargeDiag_MC64-Options");
872 }
873
874 if( parameterList->isParameter("ColPerm") ){
875 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
876 parameterList->getEntry("ColPerm").setValidator(colperm_validator);
877
878 data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
879 }
880
881 // TODO: Uncomment when supported
882 // if( parameterList->isParameter("IterRefine") ){
883 // RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
884 // parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
885 // data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
886 // }
887 data_.options.IterRefine = SLUD::NOREFINE;
888
889 bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
890 data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
891
892 if( parameterList->isParameter("IsContiguous") ){
893 is_contiguous_ = parameterList->get<bool>("IsContiguous");
894 }
895 }
896
897
898 template <class Matrix, class Vector>
899 Teuchos::RCP<const Teuchos::ParameterList>
901 {
902 using std::string;
903 using Teuchos::tuple;
904 using Teuchos::ParameterList;
905 using Teuchos::EnhancedNumberValidator;
906 using Teuchos::setStringToIntegralParameter;
907 using Teuchos::setIntParameter;
908 using Teuchos::stringToIntegralParameterEntryValidator;
909
910 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
911
912 if( is_null(valid_params) ){
913 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
914
915 Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
916 = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
917 col_row_validator->setMin(1);
918
919 pl->set("npcol", data_.grid.npcol,
920 "Number of columns in the processor grid. "
921 "Must be set with nprow", col_row_validator);
922 pl->set("nprow", data_.grid.nprow,
923 "Number of rows in the SuperLU_DIST processor grid. "
924 "Must be set together with npcol", col_row_validator);
925
926 // validator will catch any value besides NOTRANS
927 setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
928 "Solve for the transpose system or not",
929 tuple<string>("NOTRANS"),
930 tuple<string>("Do not solve with transpose"),
931 tuple<SLUD::trans_t>(SLUD::NOTRANS),
932 pl.getRawPtr());
933
934 // Equillbration
935 pl->set("Equil", false, "Whether to equilibrate the system before solve");
936
937 // TODO: uncomment when supported
938 // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
939 // "Type of iterative refinement to use",
940 // tuple<string>("NOREFINE", "DOUBLE"),
941 // tuple<string>("Do not use iterative refinement",
942 // "Do double iterative refinement"),
943 // tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
944 // SLUD::DOUBLE),
945 // pl.getRawPtr());
946
947 // Tiny pivot handling
948 pl->set("ReplaceTinyPivot", true,
949 "Specifies whether to replace tiny diagonals during LU factorization");
950
951 // Row permutation
952 setStringToIntegralParameter<SLUD::rowperm_t>("RowPerm", "NOROWPERM",
953 "Specifies how to permute the rows of the "
954 "matrix for sparsity preservation",
955 tuple<string>("NOROWPERM", "LargeDiag_MC64"),
956 tuple<string>("Natural ordering",
957 "Duff/Koster algorithm"),
958 tuple<SLUD::rowperm_t>(SLUD::NOROWPERM,
959 SLUD::LargeDiag_MC64),
960 pl.getRawPtr());
961
962 setIntParameter("LargeDiag_MC64-Options", 4, "Options for RowPerm-LargeDiag_MC64", pl.getRawPtr());
963
964 // Column permutation
965 setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
966 "Specifies how to permute the columns of the "
967 "matrix for sparsity preservation",
968 tuple<string>("NATURAL", "PARMETIS"),
969 tuple<string>("Natural ordering",
970 "ParMETIS ordering on A^T + A"),
971 tuple<SLUD::colperm_t>(SLUD::NATURAL,
972 SLUD::PARMETIS),
973 pl.getRawPtr());
974
975 pl->set("IsContiguous", true, "Whether GIDs contiguous");
976
977 valid_params = pl;
978 }
979
980 return valid_params;
981 }
982
983
984 template <class Matrix, class Vector>
985 void
987 SLUD::int_t& nprow,
988 SLUD::int_t& npcol) const {
989 TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
990 std::invalid_argument,
991 "Number of MPI processes must be at least 1" );
992 SLUD::int_t c, r = 1;
993 while( r*r <= nprocs ) r++;
994 nprow = npcol = --r; // fall back to square grid
995 c = nprocs / r;
996 while( (r--)*c != nprocs ){
997 c = nprocs / r; // note integer division
998 }
999 ++r;
1000 // prefer the square grid over a single row (which will only happen
1001 // in the case of a prime nprocs
1002 if( r > 1 || nprocs < 9){ // nprocs < 9 is a heuristic for the small cases
1003 nprow = r;
1004 npcol = c;
1005 }
1006 }
1007
1008
1009 template <class Matrix, class Vector>
1010 bool
1012 // Extract the necessary information from mat and call SLU function
1013 using Teuchos::Array;
1014 using Teuchos::ArrayView;
1015 using Teuchos::ptrInArg;
1016 using Teuchos::as;
1017
1018 using SLUD::int_t;
1019
1020#ifdef HAVE_AMESOS2_TIMERS
1021 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
1022#endif
1023
1024 // Cleanup old store memory if it's non-NULL
1025 if( data_.A.Store != NULL ){
1026 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
1027 data_.A.Store = NULL;
1028 }
1029
1030 Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
1031 = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
1032
1033 int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
1034 l_nnz = as<int_t>(redist_mat->getLocalNNZ());
1035 l_rows = as<int_t>(redist_mat->getLocalNumRows());
1036 g_rows = as<int_t>(redist_mat->getGlobalNumRows());
1037 g_cols = g_rows; // we deal with square matrices
1038 fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
1039
1040 Kokkos::resize(nzvals_view_, l_nnz);
1041 Kokkos::resize(colind_view_, l_nnz);
1042 Kokkos::resize(rowptr_view_, l_rows + 1);
1043 int_t nnz_ret = 0;
1044 {
1045#ifdef HAVE_AMESOS2_TIMERS
1046 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
1047#endif
1048
1050 host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
1051 redist_mat.ptr(),
1053 nnz_ret,
1054 ptrInArg(*superlu_rowmap_),
1055 ROOTED,
1056 ARBITRARY);
1057 }
1058
1059 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
1060 std::runtime_error,
1061 "Did not get the expected number of non-zero vals");
1062
1063 // Get the SLU data type for this type of matrix
1064 SLUD::Dtype_t dtype = type_map::dtype;
1065
1066 if( in_grid_ ){
1067 function_map::create_CompRowLoc_Matrix(&(data_.A),
1068 g_rows, g_cols,
1069 l_nnz, l_rows, fst_global_row,
1070 nzvals_view_.data(),
1071 colind_view_.data(),
1072 rowptr_view_.data(),
1073 SLUD::SLU_NR_loc,
1074 dtype, SLUD::SLU_GE);
1075 }
1076
1077 return true;
1078}
1079
1080
1081 template<class Matrix, class Vector>
1082 const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
1083
1084
1085} // end namespace Amesos2
1086
1087#endif // AMESOS2_SUPERLUDIST_DEF_HPP
Provides definition of SuperLU_DIST types as well as conversions and type traits.
@ ROOTED
Definition Amesos2_TypeDecl.hpp:127
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:143
Utility functions for Amesos2.
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
Definition Amesos2_SolverCore_decl.hpp:455
SolverCore(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
global_size_type globalNumCols_
Definition Amesos2_SolverCore_decl.hpp:479
Timers timers_
Definition Amesos2_SolverCore_decl.hpp:497
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Definition Amesos2_SolverCore_decl.hpp:363
global_size_type globalNumRows_
Definition Amesos2_SolverCore_decl.hpp:476
Status status_
Definition Amesos2_SolverCore_decl.hpp:491
Control control_
Definition Amesos2_SolverCore_decl.hpp:494
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition Amesos2_Superludist_decl.hpp:343
bool in_grid_
true if this processor is in SuperLU_DISTS's 2D process grid
Definition Amesos2_Superludist_decl.hpp:335
~Superludist()
Destructor.
Definition Amesos2_Superludist_def.hpp:242
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition Amesos2_Superludist_def.hpp:814
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition Amesos2_Superludist_def.hpp:823
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition Amesos2_Superludist_def.hpp:497
host_ordinal_type_array colind_view_
Stores the row indices of the nonzero entries.
Definition Amesos2_Superludist_decl.hpp:326
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition Amesos2_Superludist_def.hpp:1011
host_value_type_array nzvals_view_
Stores the values of the nonzero entries for SuperLU_DIST.
Definition Amesos2_Superludist_decl.hpp:324
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition Amesos2_Superludist_def.hpp:392
Teuchos::Array< slu_type > bvals_
1D store for B values
Definition Amesos2_Superludist_decl.hpp:330
host_size_type_array rowptr_view_
Stores the location in Ai_ and Aval_ that starts row j.
Definition Amesos2_Superludist_decl.hpp:328
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition Amesos2_Superludist_def.hpp:648
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition Amesos2_Superludist_def.hpp:986
static const char * name
Name of this solver interface.
Definition Amesos2_Superludist_decl.hpp:98
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Superludist_def.hpp:69
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition Amesos2_Superludist_def.hpp:448
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition Amesos2_Superludist_def.hpp:900
Teuchos::Array< slu_type > xvals_
1D store for X values
Definition Amesos2_Superludist_decl.hpp:332
void computeRowPermutationLargeDiagMC64(SLUD::SuperMatrix &GA)
Compute the row permutation for option LargeDiag-MC64.
Definition Amesos2_Superludist_def.hpp:337
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:65
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:176
Helper class for getting 1-D copies of multivectors.
Definition Amesos2_MultiVecAdapter_decl.hpp:267
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition Amesos2_Util.hpp:663
Helper class for putting 1-D data arrays into multivectors.
Definition Amesos2_MultiVecAdapter_decl.hpp:373