167 using Ifpack2::Details::getParamTryingTypes;
168 const char prefix[] =
"Ifpack2::ILUT: ";
175 IlutImplType::Enum ilutimplType = IlutImplType::Serial;
177 static const char typeName[] =
"fact: type";
179 if ( ! params.isType<std::string>(typeName))
break;
182 Teuchos::Array<std::string> ilutimplTypeStrs;
183 Teuchos::Array<IlutImplType::Enum> ilutimplTypeEnums;
184 IlutImplType::loadPLTypeOption (ilutimplTypeStrs, ilutimplTypeEnums);
185 Teuchos::StringToIntegralParameterEntryValidator<IlutImplType::Enum>
186 s2i(ilutimplTypeStrs (), ilutimplTypeEnums (), typeName,
false);
188 ilutimplType = s2i.getIntegralValue(params.get<std::string>(typeName));
191 if (ilutimplType == IlutImplType::PAR_ILUT) {
192 this->useKokkosKernelsParILUT_ =
true;
195 this->useKokkosKernelsParILUT_ =
false;
201 double fillLevel = LevelOfFill_;
203 const std::string paramName (
"fact: ilut level-of-fill");
204 TEUCHOS_TEST_FOR_EXCEPTION(
205 (params.isParameter(paramName) && this->useKokkosKernelsParILUT_), std::runtime_error,
206 "Ifpack2::ILUT: Parameter " << paramName <<
" is meaningless for algorithm par_ilut.");
207 getParamTryingTypes<double, double, float>
208 (fillLevel, params, paramName, prefix);
209 TEUCHOS_TEST_FOR_EXCEPTION
210 (fillLevel < 1.0, std::runtime_error,
211 "Ifpack2::ILUT: The \"" << paramName <<
"\" parameter must be >= "
212 "1.0, but you set it to " << fillLevel <<
". For ILUT, the fill level "
213 "means something different than it does for ILU(k). ILU(0) produces "
214 "factors with the same sparsity structure as the input matrix A. For "
215 "ILUT, level-of-fill = 1.0 will produce factors with nonzeros matching "
216 "the sparsity structure of A. level-of-fill > 1.0 allows for additional "
222 const std::string paramName (
"fact: absolute threshold");
223 getParamTryingTypes<magnitude_type, magnitude_type, double>
224 (absThresh, params, paramName, prefix);
229 const std::string paramName (
"fact: relative threshold");
230 getParamTryingTypes<magnitude_type, magnitude_type, double>
231 (relThresh, params, paramName, prefix);
236 const std::string paramName (
"fact: relax value");
237 getParamTryingTypes<magnitude_type, magnitude_type, double>
238 (relaxValue, params, paramName, prefix);
243 const std::string paramName (
"fact: drop tolerance");
244 getParamTryingTypes<magnitude_type, magnitude_type, double>
245 (dropTol, params, paramName, prefix);
248 int par_ilut_max_iter=20;
250 int par_ilut_team_size=0;
251 int par_ilut_vector_size=0;
252 float par_ilut_fill_in_limit=0.75;
253 bool par_ilut_verbose=
false;
254 if (this->useKokkosKernelsParILUT_) {
255 par_ilut_max_iter = par_ilut_options_.max_iter;
256 par_ilut_residual_norm_delta_stop = par_ilut_options_.residual_norm_delta_stop;
257 par_ilut_team_size = par_ilut_options_.team_size;
258 par_ilut_vector_size = par_ilut_options_.vector_size;
259 par_ilut_fill_in_limit = par_ilut_options_.fill_in_limit;
260 par_ilut_verbose = par_ilut_options_.verbose;
262 std::string par_ilut_plist_name(
"parallel ILUT options");
263 if (params.isSublist(par_ilut_plist_name)) {
264 Teuchos::ParameterList
const &par_ilut_plist = params.sublist(par_ilut_plist_name);
266 std::string paramName(
"maximum iterations");
267 getParamTryingTypes<int, int>(par_ilut_max_iter, par_ilut_plist, paramName, prefix);
269 paramName =
"residual norm delta stop";
270 getParamTryingTypes<magnitude_type, magnitude_type, double>(par_ilut_residual_norm_delta_stop, par_ilut_plist, paramName, prefix);
272 paramName =
"team size";
273 getParamTryingTypes<int, int>(par_ilut_team_size, par_ilut_plist, paramName, prefix);
275 paramName =
"vector size";
276 getParamTryingTypes<int, int>(par_ilut_vector_size, par_ilut_plist, paramName, prefix);
278 paramName =
"fill in limit";
279 getParamTryingTypes<float, float, double>(par_ilut_fill_in_limit, par_ilut_plist, paramName, prefix);
281 paramName =
"verbose";
282 getParamTryingTypes<bool, bool>(par_ilut_verbose, par_ilut_plist, paramName, prefix);
286 par_ilut_options_.max_iter = par_ilut_max_iter;
287 par_ilut_options_.residual_norm_delta_stop = par_ilut_residual_norm_delta_stop;
288 par_ilut_options_.team_size = par_ilut_team_size;
289 par_ilut_options_.vector_size = par_ilut_vector_size;
290 par_ilut_options_.fill_in_limit = par_ilut_fill_in_limit;
291 par_ilut_options_.verbose = par_ilut_verbose;
296 L_solver_->setParameters(params);
297 U_solver_->setParameters(params);
299 LevelOfFill_ = fillLevel;
300 Athresh_ = absThresh;
301 Rthresh_ = relThresh;
302 RelaxValue_ = relaxValue;
303 DropTolerance_ = dropTol;
491 using Teuchos::Array;
492 using Teuchos::rcp_const_cast;
493 Teuchos::Time timer (
"ILUT::initialize");
494 double startTime = timer.wallTime();
496 Teuchos::TimeMonitor timeMon (timer);
499 TEUCHOS_TEST_FOR_EXCEPTION(
500 A_.is_null (), std::runtime_error,
"Ifpack2::ILUT::initialize: "
501 "The matrix to precondition is null. Please call setMatrix() with a "
502 "nonnull input before calling this method.");
505 IsInitialized_ =
false;
507 A_local_ = Teuchos::null;
511 A_local_ = makeLocalFilter(A_);
512 TEUCHOS_TEST_FOR_EXCEPTION(
513 A_local_.is_null(), std::logic_error,
"Ifpack2::RILUT::initialize: "
514 "makeLocalFilter returned null; it failed to compute A_local. "
515 "Please report this bug to the Ifpack2 developers.");
517 if (this->useKokkosKernelsParILUT_) {
518 this->KernelHandle_ = Teuchos::rcp(
new kk_handle_type());
519 KernelHandle_->create_par_ilut_handle();
520 auto par_ilut_handle = KernelHandle_->get_par_ilut_handle();
521 par_ilut_handle->set_residual_norm_delta_stop(par_ilut_options_.residual_norm_delta_stop);
522 par_ilut_handle->set_team_size(par_ilut_options_.team_size);
523 par_ilut_handle->set_vector_size(par_ilut_options_.vector_size);
524 par_ilut_handle->set_fill_in_limit(par_ilut_options_.fill_in_limit);
525 par_ilut_handle->set_verbose(par_ilut_options_.verbose);
526 par_ilut_handle->set_async_update(
false);
528 RCP<const crs_matrix_type> A_local_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_local_);
529 if (A_local_crs.is_null()) {
532 Array<size_t> entriesPerRow(numRows);
534 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
536 RCP<crs_matrix_type> A_local_crs_nc =
538 A_local_->getColMap (),
541 nonconst_local_inds_host_view_type indices(
"indices",A_local_->getLocalMaxNumRowEntries());
542 nonconst_values_host_view_type values(
"values",A_local_->getLocalMaxNumRowEntries());
544 size_t numEntries = 0;
545 A_local_->getLocalRowCopy(i, indices, values, numEntries);
546 A_local_crs_nc->insertLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()), indices.data());
548 A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
549 A_local_crs = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
551 auto A_local_crs_device = A_local_crs->getLocalMatrixDevice();
554 typedef typename Kokkos::View<usize_type*, array_layout, device_type> ulno_row_view_t;
555 const int NumMyRows = A_local_crs->getRowMap()->getLocalNumElements();
556 L_rowmap_ = ulno_row_view_t(
"L_row_map", NumMyRows + 1);
557 U_rowmap_ = ulno_row_view_t(
"U_row_map", NumMyRows + 1);
559 KokkosSparse::Experimental::par_ilut_symbolic(KernelHandle_.getRawPtr(),
560 A_local_crs_device.graph.row_map, A_local_crs_device.graph.entries,
565 IsInitialized_ =
true;
568 InitializeTime_ += (timer.wallTime() - startTime);
583 using Teuchos::Array;
584 using Teuchos::ArrayRCP;
585 using Teuchos::ArrayView;
588 using Teuchos::reduceAll;
590 using Teuchos::rcp_const_cast;
597 Teuchos::Time timer (
"ILUT::compute");
598 double startTime = timer.wallTime();
600 Teuchos::TimeMonitor timeMon (timer,
true);
602 if (!this->useKokkosKernelsParILUT_)
635#ifdef IFPACK2_WRITE_ILUT_FACTORS
636 std::ofstream ofsL(
"L.ifpack2_ilut.mtx", std::ios::out);
637 std::ofstream ofsU(
"U.ifpack2_ilut.mtx", std::ios::out);
642 double local_nnz =
static_cast<double> (A_local_->getLocalNumEntries ());
643 double fill = ((
getLevelOfFill () - 1.0) * local_nnz) / (2 * myNumRows);
648 double fill_ceil=std::ceil(fill);
652 size_type fillL =
static_cast<size_type
>(fill_ceil);
653 size_type fillU =
static_cast<size_type
>(fill_ceil);
655 Array<scalar_type> InvDiagU (myNumRows, zero);
657 Array<Array<local_ordinal_type> > L_tmp_idx(myNumRows);
658 Array<Array<scalar_type> > L_tmpv(myNumRows);
659 Array<Array<local_ordinal_type> > U_tmp_idx(myNumRows);
660 Array<Array<scalar_type> > U_tmpv(myNumRows);
662 enum { UNUSED, ORIG, FILL };
665 Array<int> pattern(max_col, UNUSED);
666 Array<scalar_type> cur_row(max_col, zero);
667 Array<magnitude_type> unorm(max_col);
669 Array<local_ordinal_type> L_cols_heap;
670 Array<local_ordinal_type> U_cols;
671 Array<local_ordinal_type> L_vals_heap;
672 Array<local_ordinal_type> U_vals_heap;
677 greater_indirect<scalar_type,local_ordinal_type> vals_comp(cur_row);
682 nonconst_local_inds_host_view_type ColIndicesARCP;
683 nonconst_values_host_view_type ColValuesARCP;
684 if (! A_local_->supportsRowViews ()) {
685 const size_t maxnz = A_local_->getLocalMaxNumRowEntries ();
686 Kokkos::resize(ColIndicesARCP,maxnz);
687 Kokkos::resize(ColValuesARCP,maxnz);
691 local_inds_host_view_type ColIndicesA;
692 values_host_view_type ColValuesA;
695 if (A_local_->supportsRowViews ()) {
696 A_local_->getLocalRowView (row_i, ColIndicesA, ColValuesA);
697 RowNnz = ColIndicesA.size ();
700 A_local_->getLocalRowCopy (row_i, ColIndicesARCP, ColValuesARCP, RowNnz);
701 ColIndicesA = Kokkos::subview(ColIndicesARCP,std::make_pair((
size_t)0, RowNnz));
702 ColValuesA = Kokkos::subview(ColValuesARCP,std::make_pair((
size_t)0, RowNnz));
707 U_cols.push_back(row_i);
708 cur_row[row_i] = zero;
709 pattern[row_i] = ORIG;
711 size_type L_cols_heaplen = 0;
712 rownorm = STM::zero ();
713 for (
size_t i = 0; i < RowNnz; ++i) {
714 if (ColIndicesA[i] < myNumRows) {
715 if (ColIndicesA[i] < row_i) {
716 add_to_heap(ColIndicesA[i], L_cols_heap, L_cols_heaplen);
718 else if (ColIndicesA[i] > row_i) {
719 U_cols.push_back(ColIndicesA[i]);
722 cur_row[ColIndicesA[i]] = ColValuesA[i];
723 pattern[ColIndicesA[i]] = ORIG;
724 rownorm += scalar_mag(ColValuesA[i]);
735 size_type orig_U_len = U_cols.size();
736 RowNnz = L_cols_heap.size() + orig_U_len;
740 size_type L_vals_heaplen = 0;
741 while (L_cols_heaplen > 0) {
744 scalar_type multiplier = cur_row[row_k] * InvDiagU[row_k];
745 cur_row[row_k] = multiplier;
747 if (mag_mult*unorm[row_k] < rownorm) {
748 pattern[row_k] = UNUSED;
752 if (pattern[row_k] != ORIG) {
753 if (L_vals_heaplen < fillL) {
754 add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
756 else if (L_vals_heaplen==0 ||
757 mag_mult < scalar_mag(cur_row[L_vals_heap.front()])) {
758 pattern[row_k] = UNUSED;
763 pattern[L_vals_heap.front()] = UNUSED;
765 add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
771 ArrayView<local_ordinal_type> ColIndicesU = U_tmp_idx[row_k]();
772 ArrayView<scalar_type> ColValuesU = U_tmpv[row_k]();
773 size_type ColNnzU = ColIndicesU.size();
775 for(size_type j=0; j<ColNnzU; ++j) {
776 if (ColIndicesU[j] > row_k) {
779 if (pattern[col_j] != UNUSED) {
780 cur_row[col_j] -= tmp;
782 else if (scalar_mag(tmp) > rownorm) {
783 cur_row[col_j] = -tmp;
784 pattern[col_j] = FILL;
786 U_cols.push_back(col_j);
802 for (size_type i = 0; i < (size_type)ColIndicesA.size (); ++i) {
803 if (ColIndicesA[i] < row_i) {
804 L_tmp_idx[row_i].push_back(ColIndicesA[i]);
805 L_tmpv[row_i].push_back(cur_row[ColIndicesA[i]]);
806 pattern[ColIndicesA[i]] = UNUSED;
811 for (size_type j = 0; j < L_vals_heaplen; ++j) {
812 L_tmp_idx[row_i].push_back(L_vals_heap[j]);
813 L_tmpv[row_i].push_back(cur_row[L_vals_heap[j]]);
814 pattern[L_vals_heap[j]] = UNUSED;
822#ifdef IFPACK2_WRITE_ILUT_FACTORS
823 for (size_type ii = 0; ii < L_tmp_idx[row_i].size (); ++ii) {
824 ofsL << row_i <<
" " << L_tmp_idx[row_i][ii] <<
" "
825 << L_tmpv[row_i][ii] << std::endl;
831 if (cur_row[row_i] == zero) {
832 std::cerr <<
"Ifpack2::ILUT::Compute: zero pivot encountered! "
833 <<
"Replacing with rownorm and continuing..."
834 <<
"(You may need to set the parameter "
835 <<
"'fact: absolute threshold'.)" << std::endl;
836 cur_row[row_i] = rownorm;
838 InvDiagU[row_i] = one / cur_row[row_i];
841 U_tmp_idx[row_i].push_back(row_i);
842 U_tmpv[row_i].push_back(cur_row[row_i]);
843 unorm[row_i] = scalar_mag(cur_row[row_i]);
844 pattern[row_i] = UNUSED;
850 size_type U_vals_heaplen = 0;
851 for(size_type j=1; j<U_cols.size(); ++j) {
853 if (pattern[col] != ORIG) {
854 if (U_vals_heaplen < fillU) {
855 add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
857 else if (U_vals_heaplen!=0 && scalar_mag(cur_row[col]) >
858 scalar_mag(cur_row[U_vals_heap.front()])) {
860 add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
864 U_tmp_idx[row_i].push_back(col);
865 U_tmpv[row_i].push_back(cur_row[col]);
866 unorm[row_i] += scalar_mag(cur_row[col]);
868 pattern[col] = UNUSED;
871 for(size_type j=0; j<U_vals_heaplen; ++j) {
872 U_tmp_idx[row_i].push_back(U_vals_heap[j]);
873 U_tmpv[row_i].push_back(cur_row[U_vals_heap[j]]);
874 unorm[row_i] += scalar_mag(cur_row[U_vals_heap[j]]);
877 unorm[row_i] /= (orig_U_len + U_vals_heaplen);
879#ifdef IFPACK2_WRITE_ILUT_FACTORS
880 for(
int ii=0; ii<U_tmp_idx[row_i].size(); ++ii) {
881 ofsU <<row_i<<
" " <<U_tmp_idx[row_i][ii]<<
" "
882 <<U_tmpv[row_i][ii]<< std::endl;
893 Array<size_t> nnzPerRow(myNumRows);
899 L_solver_->setMatrix(Teuchos::null);
900 U_solver_->setMatrix(Teuchos::null);
903 nnzPerRow[row_i] = L_tmp_idx[row_i].size();
906 L_ = rcp (
new crs_matrix_type (A_local_->getRowMap(), A_local_->getColMap(),
910 L_->insertLocalValues (row_i, L_tmp_idx[row_i](), L_tmpv[row_i]());
916 nnzPerRow[row_i] = U_tmp_idx[row_i].size();
919 U_ = rcp (
new crs_matrix_type (A_local_->getRowMap(), A_local_->getColMap(),
923 U_->insertLocalValues (row_i, U_tmp_idx[row_i](), U_tmpv[row_i]());
928 L_solver_->setMatrix(L_);
929 L_solver_->initialize ();
930 L_solver_->compute ();
932 U_solver_->setMatrix(U_);
933 U_solver_->initialize ();
934 U_solver_->compute ();
938 RCP<const crs_matrix_type> A_local_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_local_);
940 if(A_local_crs.is_null()) {
942 Array<size_t> entriesPerRow(numRows);
944 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
946 RCP<crs_matrix_type> A_local_crs_nc =
948 A_local_->getColMap (),
951 nonconst_local_inds_host_view_type indices(
"indices",A_local_->getLocalMaxNumRowEntries());
952 nonconst_values_host_view_type values(
"values",A_local_->getLocalMaxNumRowEntries());
954 size_t numEntries = 0;
955 A_local_->getLocalRowCopy(i, indices, values, numEntries);
956 A_local_crs_nc->insertLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()),indices.data());
958 A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
959 A_local_crs = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
961 auto lclMtx = A_local_crs->getLocalMatrixDevice();
962 A_local_rowmap_ = lclMtx.graph.row_map;
963 A_local_entries_ = lclMtx.graph.entries;
964 A_local_values_ = lclMtx.values;
968 auto par_ilut_handle = KernelHandle_->get_par_ilut_handle();
969 auto nnzL = par_ilut_handle->get_nnzL();
970 static_graph_entries_t L_entries_ = static_graph_entries_t(
"L_entries", nnzL);
971 local_matrix_values_t L_values_ = local_matrix_values_t(
"L_values", nnzL);
973 auto nnzU = par_ilut_handle->get_nnzU();
974 static_graph_entries_t U_entries_ = static_graph_entries_t(
"U_entries", nnzU);
975 local_matrix_values_t U_values_ = local_matrix_values_t(
"U_values", nnzU);
977 KokkosSparse::Experimental::par_ilut_numeric(KernelHandle_.getRawPtr(),
978 A_local_rowmap_, A_local_entries_, A_local_values_,
979 L_rowmap_, L_entries_, L_values_, U_rowmap_, U_entries_, U_values_);
981 auto L_kokkosCrsGraph = local_graph_device_type(L_entries_, L_rowmap_);
982 auto U_kokkosCrsGraph = local_graph_device_type(U_entries_, U_rowmap_);
984 local_matrix_device_type L_localCrsMatrix_device;
985 L_localCrsMatrix_device = local_matrix_device_type(
"L_Factor_localmatrix",
986 A_local_->getLocalNumRows(),
991 A_local_crs->getRowMap(),
992 A_local_crs->getColMap(),
993 A_local_crs->getDomainMap(),
994 A_local_crs->getRangeMap(),
995 A_local_crs->getGraph()->getImporter(),
996 A_local_crs->getGraph()->getExporter()));
998 local_matrix_device_type U_localCrsMatrix_device;
999 U_localCrsMatrix_device = local_matrix_device_type(
"U_Factor_localmatrix",
1000 A_local_->getLocalNumRows(),
1005 A_local_crs->getRowMap(),
1006 A_local_crs->getColMap(),
1007 A_local_crs->getDomainMap(),
1008 A_local_crs->getRangeMap(),
1009 A_local_crs->getGraph()->getImporter(),
1010 A_local_crs->getGraph()->getExporter()));
1012 L_solver_->setMatrix (L_);
1013 L_solver_->compute ();
1014 U_solver_->setMatrix (U_);
1015 U_solver_->compute ();
1020 ComputeTime_ += (timer.wallTime() - startTime);