1054 Teuchos::LAPACK<int,ScalarType> lapack;
1056 ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
1057 ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
1070 "Belos::RCGSolMgr::solve(): Linear problem is not a valid object.");
1072 "Belos::RCGSolMgr::solve(): Linear problem is not ready, setProblem() has not been called.");
1073 TEUCHOS_TEST_FOR_EXCEPTION((
problem_->getLeftPrec() != Teuchos::null)&&(
problem_->getRightPrec() != Teuchos::null),
1075 "Belos::RCGSolMgr::solve(): RCG does not support split preconditioning, only set left or right preconditioner.");
1078 Teuchos::RCP<OP> precObj;
1079 if (
problem_->getLeftPrec() != Teuchos::null) {
1080 precObj = Teuchos::rcp_const_cast<OP>(
problem_->getLeftPrec());
1082 else if (
problem_->getRightPrec() != Teuchos::null) {
1083 precObj = Teuchos::rcp_const_cast<OP>(
problem_->getRightPrec());
1088 std::vector<int> currIdx(1);
1100 "Warning! Requested Krylov subspace dimension is larger than operator dimension!" << std::endl <<
1101 " The maximum number of blocks allowed for the Krylov subspace will be adjusted to " <<
numBlocks_ << std::endl;
1108 Teuchos::ParameterList plist;
1116 bool isConverged =
true;
1127 problem_->applyOp( *Utmp, *AUtmp );
1133 if ( precObj != Teuchos::null ) {
1149 Teuchos::RCP<RCGIter<ScalarType,MV,OP> > rcg_iter;
1154#ifdef BELOS_TEUCHOS_TIME_MONITOR
1158 while ( numRHS2Solve > 0 ) {
1162 if (
existU_)
printer_->print(
Debug,
"Using recycle space generated from previous call to solve()." );
1167 rcg_iter->resetNumIters();
1187 Teuchos::SerialDenseMatrix<int,ScalarType> Utr(
recycleBlocks_,1);
1194 LUUTAUtmp.assign(UTAUtmp);
1196 lapack.GESV(
recycleBlocks_, 1, LUUTAUtmp.values(), LUUTAUtmp.stride(), &(*
ipiv_)[0], Utr.values(), Utr.stride(), &info);
1198 "Belos::RCGSolMgr::solve(): LAPACK GESV failed to compute a solution.");
1210 if ( precObj != Teuchos::null ) {
1229 lapack.GETRS(
TRANS,
recycleBlocks_, 1, LUUTAUtmp.values(), LUUTAUtmp.stride(), &(*
ipiv_)[0], mu.values(), mu.stride(), &info );
1231 "Belos::RCGSolMgr::solve(): LAPACK GETRS failed to compute a solution.");
1251 for (
int ii=0; ii<(
numBlocks_+1); ++ii) { index[ii] = ii; }
1259 newstate.
Alpha = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *
Alpha_,
numBlocks_, 1 ) );
1260 newstate.
Beta = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *
Beta_,
numBlocks_, 1 ) );
1261 newstate.
D = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *
D_,
numBlocks_, 1 ) );
1273 rcg_iter->initialize(newstate);
1279 rcg_iter->iterate();
1297 isConverged =
false;
1305 else if ( rcg_iter->getCurSubspaceDim() == rcg_iter->getMaxSubspaceDim() ) {
1316 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *
D_,
numBlocks_, 1 );
1317 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *
Alpha_,
numBlocks_, 1 );
1318 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *
Beta_,
numBlocks_, 1 );
1319 Ftmp.putScalar(zero);
1320 Gtmp.putScalar(zero);
1322 Gtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii,0));
1324 Gtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1325 Gtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1327 Ftmp(ii,ii) = Dtmp(ii,0);
1336 for (
int ii=0; ii<
numBlocks_; ++ii) { index[ii] = ii; }
1347 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1349 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1353 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1355 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1359 AU1TAPtmp.putScalar(zero);
1361 ScalarType alphatmp = -1.0 / Alphatmp(
numBlocks_-1,0);
1363 AU1TAPtmp(ii,0) = Ytmp(
numBlocks_-1,ii) * alphatmp;
1378 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *
D_,
numBlocks_, 1 );
1380 AU1TAPtmp.scale(Dtmp(0,0));
1382 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *
Alpha_,
numBlocks_, 1 );
1383 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *
Beta_,
numBlocks_+1, 1 );
1385 APTAPtmp.putScalar(zero);
1387 APTAPtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii+1,0));
1389 APTAPtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1390 APTAPtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1401 F11.assign(AU1TU1tmp);
1402 F12.putScalar(zero);
1403 F21.putScalar(zero);
1404 F22.putScalar(zero);
1406 F22(ii,ii) = Dtmp(ii,0);
1416 G11.assign(AU1TAU1tmp);
1417 G12.assign(AU1TAPtmp);
1421 G21(jj,ii) = G12(ii,jj);
1422 G22.assign(APTAPtmp);
1430 for (
int ii=0; ii<
numBlocks_; ++ii) { index[ii] = ii+1; }
1451 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1452 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1456 AU1TAPtmp.putScalar(zero);
1457 ScalarType alphatmp = -1.0 / Alphatmp(
numBlocks_-1,0);
1464 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1466 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1476 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *
Alpha_,
numBlocks_, 1 );
1477 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *
Beta_,
numBlocks_, 1 );
1478 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *
D_,
numBlocks_, 1 );
1480 APTAPtmp.putScalar(zero);
1482 APTAPtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii,0));
1484 APTAPtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1485 APTAPtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1490 L2tmp.putScalar(zero);
1492 L2tmp(ii,ii) = 1./Alphatmp(ii,0);
1493 L2tmp(ii+1,ii) = -1./Alphatmp(ii,0);
1501 DeltaL2tmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Deltatmp,L2tmp,zero);
1502 AUTAPtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,UTAUtmp,DeltaL2tmp,zero);
1510 F11.assign(UTAUtmp);
1511 F12.putScalar(zero);
1512 F21.putScalar(zero);
1513 F22.putScalar(zero);
1515 F22(ii,ii) = Dtmp(ii,0);
1525 G11.assign(AUTAUtmp);
1526 G12.assign(AUTAPtmp);
1530 G21(jj,ii) = G12(ii,jj);
1531 G22.assign(APTAPtmp);
1542 for (
int ii=0; ii<
numBlocks_; ++ii) { index[ii] = ii; }
1563 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1565 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1569 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1571 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1575 AU1TUtmp.assign(UTAUtmp);
1588 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *
Alpha_,
numBlocks_, 1 );
1589 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *
Beta_,
numBlocks_+1, 1 );
1590 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *
D_,
numBlocks_, 1 );
1593 APTAPtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii+1,0));
1595 APTAPtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1596 APTAPtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1602 L2tmp(ii,ii) = 1./Alphatmp(ii,0);
1603 L2tmp(ii+1,ii) = -1./Alphatmp(ii,0);
1610 DeltaL2.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Deltatmp,L2tmp,zero);
1613 AU1TUDeltaL2.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,AU1TUtmp,DeltaL2,zero);
1616 AU1TAPtmp.putScalar(zero);
1617 AU1TAPtmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Y1,AU1TUDeltaL2,zero);
1619 ScalarType val =
dold * (-Betatmp(0,0)/Alphatmp(0,0));
1626 Y1TAU1TU.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Y1,AU1TUtmp,zero);
1627 AU1TUtmp.assign(Y1TAU1TU);
1636 F11.assign(AU1TU1tmp);
1638 F22(ii,ii) = Dtmp(ii,0);
1648 G11.assign(AU1TAU1tmp);
1649 G12.assign(AU1TAPtmp);
1653 G21(jj,ii) = G12(ii,jj);
1654 G22.assign(APTAPtmp);
1662 for (
int ii=0; ii<
numBlocks_; ++ii) { index[ii] = ii+1; }
1681 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1682 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1686 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1687 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1704 index[0] = lastp-1; index[1] = lastp;
1706 index[0] = 0; index[1] = 1;
1710 (*Beta_)(0,0) = (*
Beta_)(lastBeta,0);
1714 Teuchos::SerialDenseMatrix<int,ScalarType> mu1( Teuchos::View, *
Delta_,
recycleBlocks_, 1, 0, 0 );
1720 newstate.
P = Teuchos::null;
1722 for (
int ii=0; ii<(
numBlocks_+1); ++ii) { index[ii] = ii+1; }
1725 newstate.
Beta = Teuchos::null;
1726 newstate.
Beta = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *
Beta_,
numBlocks_, 1, 1, 0 ) );
1728 newstate.
Delta = Teuchos::null;
1734 rcg_iter->initialize(newstate);
1747 TEUCHOS_TEST_FOR_EXCEPTION(
true,std::logic_error,
1748 "Belos::RCGSolMgr::solve(): Invalid return from RCGIter::iterate().");
1751 catch (
const std::exception &e) {
1752 printer_->stream(
Errors) <<
"Error! Caught std::exception in RCGIter::iterate() at iteration "
1753 << rcg_iter->getNumIters() << std::endl
1754 << e.what() << std::endl;
1764 if ( numRHS2Solve > 0 ) {
1770 currIdx.resize( numRHS2Solve );
1781 if (numRHS2Solve > 0) {
1783 newstate.
P = Teuchos::null;
1784 newstate.
Ap = Teuchos::null;
1785 newstate.
r = Teuchos::null;
1786 newstate.
z = Teuchos::null;
1787 newstate.
U = Teuchos::null;
1788 newstate.
AU = Teuchos::null;
1789 newstate.
Alpha = Teuchos::null;
1790 newstate.
Beta = Teuchos::null;
1791 newstate.
D = Teuchos::null;
1792 newstate.
Delta = Teuchos::null;
1793 newstate.
LUUTAU = Teuchos::null;
1794 newstate.
ipiv = Teuchos::null;
1795 newstate.
rTz_old = Teuchos::null;
1805 problem_->applyOp( *Utmp, *AUtmp );
1811 if ( precObj != Teuchos::null ) {
1833#ifdef BELOS_TEUCHOS_TIME_MONITOR
1846 using Teuchos::rcp_dynamic_cast;
1849 const std::vector<MagnitudeType>* pTestValues =
1852 TEUCHOS_TEST_FOR_EXCEPTION(pTestValues == NULL, std::logic_error,
1853 "Belos::RCGSolMgr::solve(): The convergence test's getTestValue() "
1854 "method returned NULL. Please report this bug to the Belos developers.");
1856 TEUCHOS_TEST_FOR_EXCEPTION(pTestValues->size() < 1, std::logic_error,
1857 "Belos::RCGSolMgr::solve(): The convergence test's getTestValue() "
1858 "method returned a vector of length zero. Please report this bug to the "
1859 "Belos developers.");
1864 achievedTol_ = *std::max_element (pTestValues->begin(), pTestValues->end());