/* MIT License * * Copyright (c) 2016--2017 Felix Lenders * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include "trlib_private.h" #include "trlib.h" #include "_c99compat.h" trlib_int_t trlib_tri_factor_min( trlib_int_t nirblk, trlib_int_t *irblk, trlib_flt_t *diag, trlib_flt_t *offdiag, trlib_flt_t *neglin, trlib_flt_t radius, trlib_int_t itmax, trlib_flt_t tol_rel, trlib_flt_t tol_newton_tiny, trlib_int_t pos_def, trlib_int_t equality, trlib_int_t *warm0, trlib_flt_t *lam0, trlib_int_t *warm, trlib_flt_t *lam, trlib_int_t *warm_leftmost, trlib_int_t *ileftmost, trlib_flt_t *leftmost, trlib_int_t *warm_fac0, trlib_flt_t *diag_fac0, trlib_flt_t *offdiag_fac0, trlib_int_t *warm_fac, trlib_flt_t *diag_fac, trlib_flt_t *offdiag_fac, trlib_flt_t *sol0, trlib_flt_t *sol, trlib_flt_t *ones, trlib_flt_t *fwork, trlib_int_t refine, trlib_int_t verbose, trlib_int_t unicode, char *prefix, FILE *fout, trlib_int_t *timing, trlib_flt_t *obj, trlib_int_t *iter_newton, trlib_int_t *sub_fail) { // use notation of Gould paper // h = h(lam) denotes solution of (T+lam I) * h = -lin trlib_int_t *leftmost_timing = NULL; trlib_int_t *eigen_timing = NULL; // local variables #if TRLIB_MEASURE_TIME struct timespec verystart, start, end; TRLIB_TIC(verystart) leftmost_timing = timing + 1 + TRLIB_SIZE_TIMING_LINALG; eigen_timing = timing + 1 + TRLIB_SIZE_TIMING_LINALG + trlib_leftmost_timing_size(); #endif /* this is based on Theorem 5.8 in Gould paper, * the data for the first block has a 0 suffix, * the data for the \ell block has a l suffix */ trlib_int_t n0 = irblk[1]; // dimension of first block trlib_int_t nl; // dimension of block corresponding to leftmost trlib_int_t nm0 = irblk[1]-1; // length of offdiagonal of first block trlib_int_t info_fac = 0; // factorization information trlib_int_t ret = 0; // return code trlib_flt_t lam_pert = 0.0; // perturbation of leftmost eigenvalue as starting value for lam trlib_flt_t norm_sol0 = 0.0; // norm of h_0(lam) trlib_int_t jj = 0; // local iteration counter trlib_flt_t dlam = 0.0; // increment in newton iteration trlib_int_t inc = 1; // increment in vector storage trlib_flt_t *w = fwork; // auxiliary vector to be used in newton iteration trlib_flt_t *diag_lam = fwork+(irblk[nirblk]); // vector that holds diag + lam, could be saved if we would implement iterative refinement ourselves trlib_flt_t *work = fwork+2*(irblk[nirblk]); // workspace for iterative refinement trlib_flt_t ferr = 0.0; // forward error bound from iterative refinement trlib_flt_t berr = 0.0; // backward error bound from iterative refinement trlib_flt_t pert_low, pert_up; // lower and upper bound on perturbation of lambda trlib_flt_t dot = 0.0, dot2 = 0.0; // save dot products trlib_flt_t invD_norm_w_sq = 0.0; // || w ||_{D^-1}^2 *iter_newton = 0; // newton iteration counter // FIXME: ensure diverse warmstarts work as expected // initialization: *sub_fail = 0; // set sol to 0 as a safeguard memset(sol, 0, irblk[nirblk]*sizeof(trlib_flt_t)); // first make sure that lam0, h_0 is accurate TRLIB_PRINTLN_1("Solving trust region problem, radius %e; starting on first irreducible block", radius) // if only blocks changed that differ from the first, then there is nothing to do if (nirblk > 1 && *warm0) { TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc) if (unicode) { TRLIB_PRINTLN_1("Solution provided via warmstart, \u03bb\u2080 = %e, \u2016h\u2080\u2016 = %e", *lam0, norm_sol0) } else { TRLIB_PRINTLN_1("Solution provided via warmstart, lam0 = %e, ||h0|| = %e", *lam0, norm_sol0) } if (norm_sol0-radius < 0.0) { if (unicode) { TRLIB_PRINTLN_1(" violates \u2016h\u2080\u2016 - radius \u2265 0, but is %e, switch to coldstart", norm_sol0-radius) } else { TRLIB_PRINTLN_1(" violates ||h0|| - radius >= 0, but is %e, switch to coldstart", norm_sol0-radius) } *warm0 = 0; } } if (nirblk == 1 || !*warm0) { // seek for lam0, h_0 with (T0+lam0*I) pos def and ||h_0(lam_0)|| = radius /* as a first step to initialize the newton iteration, * find such a pair with the loosened requirement ||h_0(lam_0)|| >= radius */ if(*warm0) { if(!*warm_fac0) { // factorize T + lam0 I TRLIB_DCOPY(&n0, diag, &inc, diag_lam, &inc) // diag_lam <-- diag TRLIB_DAXPY(&n0, lam0, ones, &inc, diag_lam, &inc) // diag_lam <-- lam0 + diag_lam TRLIB_DCOPY(&n0, diag_lam, &inc, diag_fac0, &inc) // diag_fac0 <-- diag_lam TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac0, &inc) // offdiag_fac0 <-- offdiag TRLIB_DPTTRF(&n0, diag_fac0, offdiag_fac0, &info_fac) // compute factorization if (info_fac != 0) { *warm0 = 0; } // factorization failed, switch to coldstart else { *warm_fac0 = 1; } } if(*warm_fac0) { // solve for h0(lam0) and compute norm TRLIB_DCOPY(&n0, neglin, &inc, sol0, &inc) // sol0 <-- neglin TRLIB_DPTTRS(&n0, &inc, diag_fac0, offdiag_fac0, sol0, &n0, &info_fac) // sol <-- (T+lam0 I)^-1 sol if(info_fac!=0) { if (unicode) { TRLIB_PRINTLN_2("Failure on computing h\u2080 upon initialization") } else { TRLIB_PRINTLN_2("Failure on computing h0 upon initialization") } TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc) if (norm_sol0 >= radius) { *warm0 = 1; } else { *warm0 = 0; } } } if(!*warm0) { *lam0 = 0.0; if (unicode) { TRLIB_PRINTLN_1("Coldstart. Seeking suitable initial \u03bb\u2080, starting with 0") } else { TRLIB_PRINTLN_1("Coldstart. Seeking suitable initial lam0, starting with 0") } TRLIB_DCOPY(&n0, diag, &inc, diag_fac0, &inc) // diag_fac0 <-- diag0 TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac0, &inc) // offdiag_fac0 <-- offdiag0 TRLIB_DCOPY(&n0, neglin, &inc, sol0, &inc) // sol0 <-- neglin0 TRLIB_DPTTRF(&n0, diag_fac0, offdiag_fac0, &info_fac) // compute factorization if (info_fac == 0) { // test if lam0 = 0 is suitable TRLIB_DCOPY(&n0, neglin, &inc, sol0, &inc) // sol0 <-- neglin TRLIB_DPTTRS(&n0, &inc, diag_fac0, offdiag_fac0, sol0, &n0, &info_fac) // sol0 <-- T0^-1 sol0 if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on computing h\u2080 upon initialization") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc) if (norm_sol0 get leftmost ev of first block!") } *sub_fail = trlib_leftmost_irreducible(irblk[1], diag, offdiag, *warm_leftmost, *leftmost, 1000, TRLIB_EPS_POW_75, verbose-2, unicode, " LM ", fout, leftmost_timing, leftmost, &jj); // ferr can safely be overwritten by computed leftmost for the moment as can jj with the number of rp iterations // if (*sub_fail != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_LM) } failure of leftmost: may lead to inefficiency, since what we are doing may be slow... // T - leftmost*I is singular, so do bisection to catch factorization and find suitable initial lambda pert_low = - TRLIB_EPS_POW_5 * fabs(*leftmost); // lower bound on allowed perturbation pert_up = 1.0/TRLIB_EPS; // upper bound on allowed perturbation jj = 0; // counter on number of tries lam_pert = 0.0; TRLIB_PRINTLN_1(" ") if (unicode) { TRLIB_PRINTLN_1(" perturb \u03bb\u2080 by safeguarded bisection to find suitable initial value") } else { TRLIB_PRINTLN_1(" perturb lam0 by safeguarded bisection to find suitable initial value") } while( jj < 50 ) { if( jj % 20 == 0 ) { if (unicode) { TRLIB_PRINTLN_2(" %2s%14s%14s%14s%3s%14s", "it", "low ", "pert ", "up ", "pd", " \u2016h\u2080\u2016 - radius") } else { TRLIB_PRINTLN_2(" %2s%14s%14s%14s%3s%14s", "it", "low ", "pert ", "up ", "pd", " ||h0|| - radius") } } *lam0 = -(*leftmost) + lam_pert; // factorize T + lam I TRLIB_DCOPY(&n0, diag, &inc, diag_lam, &inc) // diag_lam <-- diag TRLIB_DAXPY(&n0, lam0, ones, &inc, diag_lam, &inc) // diag_lam <-- lam + diag_lam TRLIB_DCOPY(&n0, diag_lam, &inc, diag_fac0, &inc) // diag_fac <-- diag_lam TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac0, &inc) // offdiag_fac <-- offdiag TRLIB_DPTTRF(&n0, diag_fac0, offdiag_fac0, &info_fac) // compute factorization if(info_fac == 0) { pert_up = lam_pert; // as this ensures a factorization, it provides a upper bound TRLIB_DCOPY(&n0, neglin, &inc, sol0, &inc) // sol0 <-- neglin TRLIB_DPTTRS(&n0, &inc, diag_fac0, offdiag_fac0, sol0, &n0, &info_fac) // sol0 <-- T0^-1 sol0 if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on computing h\u2080 in safeguarded initialization iteration") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc) TRLIB_PRINTLN_2(" %2ld%14e%14e%14e%3s%14e", jj, pert_low, lam_pert, pert_up, " +", norm_sol0 - radius) if(norm_sol0 >= radius) { break; } else { // lam_pert has to be decreased since we caught factorization but got solution that was too small lam_pert = .5*(pert_low+pert_up); } } else { TRLIB_PRINTLN_2(" %2ld%14e%14e%14e%3s", jj, pert_low, lam_pert, pert_up, " -") pert_low = lam_pert; // as factorization fails, it provides a upper bound // now increase perturbation, either by bisection if there is a useful upper bound, // otherwise by a small increment if( pert_up == 1.0/TRLIB_EPS ) { if( lam_pert == 0.0 ) { lam_pert = (1.0+fabs(*leftmost))*TRLIB_EPS_POW_75; } else { lam_pert = 2.0*lam_pert; } } else { lam_pert = .5*(pert_low+pert_up); } } ++jj; } // ensure that we get a factorization and compute solution with it if(info_fac != 0) { lam_pert = pert_up; *lam0 = -(*leftmost) + lam_pert; TRLIB_DCOPY(&n0, diag, &inc, diag_lam, &inc) // diag_lam <-- diag TRLIB_DAXPY(&n0, lam0, ones, &inc, diag_lam, &inc) // diag_lam <-- lam + diag_lam TRLIB_DCOPY(&n0, diag_lam, &inc, diag_fac0, &inc) // diag_fac <-- diag_lam TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac0, &inc) // offdiag_fac <-- offdiag TRLIB_DPTTRF(&n0, diag_fac0, offdiag_fac0, &info_fac) // compute factorization if(info_fac != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR) } TRLIB_DPTTRS(&n0, &inc, diag_fac0, offdiag_fac0, sol0, &n0, &info_fac) // sol0 <-- T0^-1 sol0 if (info_fac != 0) { if (unicode) { TRLIB_PRINTLN_2("Failure on computing h\u2080 upon after factorization was ensured") } else { TRLIB_PRINTLN_2("Failure on computing h\u2080 upon after factorization was ensured") } TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc) } } } } if (norm_sol0 >= radius) { // perform newton iteration if possible /* now a suitable pair lam0, h0 has been found. * perform a newton iteration on 0 = 1/||h0(lam0)|| - 1/radius */ if (unicode) { TRLIB_PRINTLN_1("Starting Newton iteration for \u03bb\u2080 with initial choice %e", *lam0) } else { TRLIB_PRINTLN_1("Starting Newton iteration for lam0 with initial choice %e", *lam0) } while (1) { /* compute newton correction to lam, by (1) Factoring T0 + lam0 I = LDL^T (2) Solving (T0+lam0 I)*h0 = -lin (3) L*w = h0/||h0|| (4) compute increment (||h0||-Delta)/Delta/||w||_{D^-1}^2 */ // steps (1) and (2) have already been performed on initializaton or previous iteration /* step (3) L*w = h/||h|| compute ||w||_{D^-1}^2 in same loop */ ferr = 1.0/norm_sol0; TRLIB_DCOPY(&n0, sol0, &inc, w, &inc) TRLIB_DSCAL(&n0, &ferr, w, &inc) // w <-- sol/||sol|| invD_norm_w_sq = w[0]*w[0]/diag_fac0[0]; for( jj = 1; jj < n0; ++jj ) { w[jj] = w[jj] - offdiag_fac0[jj-1]*w[jj-1]; invD_norm_w_sq += w[jj]*w[jj]/diag_fac0[jj]; } // step (4) compute increment (||h||-Delta)/Delta/||w||_{D^-1}^2 dlam = (norm_sol0-radius)/(radius*invD_norm_w_sq); // iteration completed *iter_newton += 1; // test if dlam is not tiny or newton limit exceeded, return eventually if ( *lam0 + dlam == *lam0 || fabs(dlam) <= tol_newton_tiny * fmax(1.0, fabs(*lam0)) || *iter_newton > itmax) { if (unicode) { TRLIB_PRINTLN_1("%s%e%s%e", "Newton breakdown, d\u03bb = ", dlam, " \u03bb = ", *lam0) } else { TRLIB_PRINTLN_1("%s%e%s%e", "Newton breakdown, dlam = ", dlam, " \u03bb = ", *lam0) } if(*iter_newton > itmax) { ret = TRLIB_TTR_ITMAX; break; } ret = TRLIB_TTR_NEWTON_BREAK; break; } // prepare next iteration // update lam *lam0 += dlam; // step (1) Factoring T0 + lam0 I = LDL^T TRLIB_DCOPY(&n0, diag, &inc, diag_lam, &inc) // diag_lam <-- diag TRLIB_DAXPY(&n0, lam0, ones, &inc, diag_lam, &inc) // diag_lam <-- lam + diag_lam TRLIB_DCOPY(&n0, diag_lam, &inc, diag_fac0, &inc) // diag_fac <-- diag_lam TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac0, &inc) // offdiag_fac <-- offdiag TRLIB_DPTTRF(&n0, diag_fac0, offdiag_fac0, &info_fac) // compute factorization if (info_fac != 0) { if (unicode) { TRLIB_PRINTLN_2("Fail on factorization, \u03bb = %e, d\u03bb = %e! Exiting Newton Iteration", *lam0, dlam) } else {TRLIB_PRINTLN_2("Fail on factorization, lam = %e, dlam = %e! Exiting Newton Iteration", *lam0, dlam) } TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR) } // step (2) Solving (T+lam I)*h = -lin TRLIB_DCOPY(&n0, neglin, &inc, sol0, &inc) // sol <-- neglin TRLIB_DPTTRS(&n0, &inc, diag_fac0, offdiag_fac0, sol0, &n0, &info_fac) // sol <-- (T+lam I)^-1 sol if (info_fac != 0) { if (unicode) { TRLIB_PRINTLN_2("Failure on computing h\u2080 in newton iteration") } else { TRLIB_PRINTLN_2("Failure on computing h0 in newton iteration") } TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } if (refine) { TRLIB_DPTRFS(&n0, &inc, diag_lam, offdiag, diag_fac0, offdiag_fac0, neglin, &n0, sol0, &n0, &ferr, &berr, work, &info_fac) } if (info_fac != 0) { if (unicode) { TRLIB_PRINTLN_2("Failure on computing h\u2080 in refinement in newton iteration") } else { TRLIB_PRINTLN_2("Failure on computing h\u2080 in refinement in newton iteration") } TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc) if (*iter_newton % 20 == 1) { if (unicode) { TRLIB_PRINTLN_1("%6s%14s%14s%14s", " iter ", " \u03bb ", " d\u03bb ", " \u2016h\u2080(\u03bb)\u2016-radius") } else { TRLIB_PRINTLN_1("%6s%14s%14s%14s", " iter ", " lam ", " dlam ", " tr resdidual") } } TRLIB_PRINTLN_1("%6ld%14e%14e%14e", *iter_newton, *lam0, dlam, norm_sol0-radius) // test for convergence if (norm_sol0 - radius <= tol_rel * radius) { // what if norm_sol < radius in a significant way? // theory tells this should not happen... ret = TRLIB_TTR_CONV_BOUND; break; } } } *warm0 = 1; // test if we trust region problem is solved on first irreducible with sufficient accuracy, // otherwise build linear combination with eigenvector to leftmost that solves trust region constraint if ( fabs(radius - norm_sol0) >= TRLIB_EPS_POW_5*radius ) { if(*lam0 == 0.0 && !equality) { ret = TRLIB_TTR_CONV_INTERIOR; } else { if (unicode) { TRLIB_PRINTLN_1(" Found \u03bb\u2080 with tr residual %e! Bail out with h\u2080 + \u03b1 eig", radius - norm_sol0) } else { TRLIB_PRINTLN_1(" Found lam0 with tr residual %e! Bail out with h0 + alpha eig", radius - norm_sol0) } *sub_fail = trlib_eigen_inverse(n0, diag, offdiag, *leftmost, 10, TRLIB_EPS_POW_5, ones, diag_fac, offdiag_fac, sol, verbose-2, unicode, " EI", fout, eigen_timing, &ferr, &berr, &jj); // can safely overwrite ferr, berr, jj with results. only interesting: eigenvector if (*sub_fail != 0 && *sub_fail != -1) { TRLIB_PRINTLN_2("Failure in eigenvector computation: %ld", *sub_fail) TRLIB_RETURN(TRLIB_TTR_FAIL_EIG) } if (*sub_fail == -1) { TRLIB_PRINTLN_2("In eigenvector computation itmax reached, continue with approximate eigenvector") } // compute solution as linear combination of h0 and eigenvector // ||h0 + t*eig||^2 = ||h_0||^2 + t * + t^2 = radius^2 TRLIB_DDOT(dot, &n0, sol0, &inc, sol, &inc); // dot = trlib_quadratic_zero( norm_sol0*norm_sol0 - radius*radius, 2.0*dot, TRLIB_EPS_POW_75, verbose - 3, unicode, prefix, fout, &ferr, &berr); // select solution that corresponds to smaller objective // quadratic as a function of t without offset // q(t) = 1/2 * leftmost * t^2 + (leftmost * + ) * t TRLIB_DDOT(dot2, &n0, sol, &inc, neglin, &inc) // dot2 = - if( .5*(*leftmost)*ferr*ferr + ((*leftmost)*dot - dot2)*ferr <= .5*(*leftmost)*berr*berr + ((*leftmost)*dot - dot2)*berr) { TRLIB_DAXPY(&n0, &ferr, sol, &inc, sol0, &inc) } else { TRLIB_DAXPY(&n0, &berr, sol, &inc, sol0, &inc) } ret = TRLIB_TTR_HARD_INIT_LAM; } } /* now in a situation were accurate lam0, h_0 exists to first irreducible block * invoke Theorem 5.8: * (i) if lam0 >= -leftmost the pair lam0, h_0 solves the problem * (ii) if lam0 < -leftmost a solution has to be constructed to lam = -leftmost */ // quick exit: only one irreducible block if (nirblk == 1) { *lam = *lam0; *warm = 1; TRLIB_DCOPY(&n0, sol0, &inc, sol, &inc) // sol <== sol0 // compute objective. first store 2*gradient in w, then compute obj = .5*(sol, w) TRLIB_DCOPY(&n0, neglin, &inc, w, &inc) ferr = -2.0; TRLIB_DSCAL(&n0, &ferr, w, &inc) ferr = 1.0; // w <-- -2 neglin TRLIB_DLAGTM("N", &n0, &inc, &ferr, offdiag, diag, offdiag, sol, &n0, &ferr, w, &n0) // w <-- T*sol + w TRLIB_DDOT(dot, &n0, sol, &inc, w, &inc) *obj = 0.5*dot; // obj = .5*(sol, w) TRLIB_RETURN(ret) } // now that we have accurate lam, h_0 invoke Theorem 5.8 // check if lam <= leftmost --> in that case the first block information describes everything if (unicode) { TRLIB_PRINTLN_1("\nCheck if \u03bb\u2080 provides global solution, get leftmost ev for irred blocks") } else { TRLIB_PRINTLN_1("\nCheck if lam0 provides global solution, get leftmost ev for irred blocks") } if(!*warm_leftmost) { *sub_fail = trlib_leftmost(nirblk, irblk, diag, offdiag, 0, leftmost[nirblk-1], 1000, TRLIB_EPS_POW_75, verbose-2, unicode, " LM ", fout, leftmost_timing, ileftmost, leftmost); *warm_leftmost = 1; } TRLIB_PRINTLN_1(" leftmost = %e (block %ld)", leftmost[*ileftmost], *ileftmost) if(*lam0 >= -leftmost[*ileftmost]) { if (unicode) { TRLIB_PRINTLN_1(" \u03bb\u2080 \u2265 -leftmost \u21d2 \u03bb = \u03bb\u2080, exit: h\u2080(\u03bb\u2080)") } else { TRLIB_PRINTLN_1(" lam0 >= -leftmost => lam = lam0, exit: h0(lam0)") } *lam = *lam0; *warm = 1; TRLIB_DCOPY(&n0, sol0, &inc, sol, &inc) // sol <== sol0 // compute objective. first store 2*gradient in w, then compute obj = .5*(sol, w) TRLIB_DCOPY(&n0, neglin, &inc, w, &inc) ferr = -2.0; TRLIB_DSCAL(&n0, &ferr, w, &inc) ferr = 1.0; // w <-- -2 neglin TRLIB_DLAGTM("N", &n0, &inc, &ferr, offdiag, diag, offdiag, sol, &n0, &ferr, w, &n0) // w <-- T*sol + w TRLIB_DDOT(dot, &n0, sol, &inc, w, &inc) *obj = 0.5*dot; // obj = .5*(sol, w) TRLIB_RETURN(ret) } else { if (unicode) { TRLIB_PRINTLN_1(" -leftmost > \u03bb\u2080 \u21d2 \u03bb = -leftmost, exit: h\u2080(-leftmost) + \u03b1 u") } else { TRLIB_PRINTLN_1(" -leftmost > lam0 => lam = -leftmost, exit: h0(-leftmost) + alpha u") } // Compute solution of (T0 - leftmost*I)*h0 = neglin *lam = -leftmost[*ileftmost]; *warm = 1; TRLIB_DCOPY(&n0, neglin, &inc, sol, &inc) // neglin <-- sol if(!*warm_fac){ TRLIB_DCOPY(&n0, diag, &inc, diag_lam, &inc) // diag_lam <-- diag TRLIB_DAXPY(&n0, lam, ones, &inc, diag_lam, &inc) // diag_lam <-- lam + diag_lam TRLIB_DCOPY(&n0, diag_lam, &inc, diag_fac, &inc) // diag_fac <-- diag_lam TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac, &inc) // offdiag_fac <-- offdiag TRLIB_DPTTRF(&n0, diag_fac, offdiag_fac, &info_fac) // compute factorization if (info_fac != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR) } } *warm_fac = 1; TRLIB_DPTTRS(&n0, &inc, diag_fac, offdiag_fac, sol, &n0, &info_fac) // sol <-- (T+lam I)^-1 sol if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on computing h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } if (refine) { TRLIB_DPTRFS(&n0, &inc, diag_lam, offdiag, diag_fac, offdiag_fac, neglin, &n0, sol, &n0, &ferr, &berr, work, &info_fac) } if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on refining h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } TRLIB_DNRM2(norm_sol0, &n0, sol, &inc) // compute normalized eigenvector u corresponding to leftmost of block ileftmost nl = irblk[*ileftmost+1]-irblk[*ileftmost]; *sub_fail = trlib_eigen_inverse(nl, diag+irblk[*ileftmost], offdiag+irblk[*ileftmost], leftmost[*ileftmost], 10, TRLIB_EPS_POW_5, ones, diag_fac+irblk[*ileftmost], offdiag_fac+irblk[*ileftmost], sol+irblk[*ileftmost], verbose-2, unicode, " EI", fout, eigen_timing, &ferr, &berr, &jj); // can safely overwrite ferr, berr, jj with results. only interesting: eigenvector if (*sub_fail != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_EIG) } // solution is of form [h,0,...,0,alpha*u,0,...,0] // alpha = sqrt( radius^2 - ||h||^2 ) ferr = sqrt( radius*radius - norm_sol0*norm_sol0 ); TRLIB_DSCAL(&nl, &ferr, sol+irblk[*ileftmost], &inc) if (unicode) { TRLIB_PRINTLN_1(" with \u2016h\u2080(-leftmost)\u2016 = %e, \u03b1 = %e", norm_sol0, ferr) } else { TRLIB_PRINTLN_1(" with ||h0(-leftmost)|| = %e, alpha = %e", norm_sol0, ferr) } ret = TRLIB_TTR_HARD; // compute objective. first store 2*gradient in w, then compute obj = .5*(sol, w) *obj = 0.5*leftmost[*ileftmost]*ferr*ferr; TRLIB_DCOPY(&n0, neglin, &inc, w, &inc) ferr = -2.0; TRLIB_DSCAL(&n0, &ferr, w, &inc) ferr = 1.0; // w <-- -2 neglin TRLIB_DLAGTM("N", &n0, &inc, &ferr, offdiag, diag, offdiag, sol, &n0, &ferr, w, &n0) // w <-- T*sol + w TRLIB_DDOT(dot, &n0, sol, &inc, w, &inc) *obj = *obj+0.5*dot; // obj = .5*(sol, w) TRLIB_RETURN(ret); } } trlib_int_t trlib_tri_factor_regularized_umin( trlib_int_t n, trlib_flt_t *diag, trlib_flt_t *offdiag, trlib_flt_t *neglin, trlib_flt_t lam, trlib_flt_t *sol, trlib_flt_t *ones, trlib_flt_t *fwork, trlib_int_t refine, trlib_int_t verbose, trlib_int_t unicode, char *prefix, FILE *fout, trlib_int_t *timing, trlib_flt_t *norm_sol, trlib_int_t *sub_fail) { // local variables #if TRLIB_MEASURE_TIME struct timespec verystart, start, end; TRLIB_TIC(verystart) #endif trlib_flt_t *diag_lam = fwork; // vector that holds diag + lam, could be saved if we would implement iterative refinement ourselves trlib_flt_t *diag_fac = fwork+n; // vector that holds diagonal of factor of diag + lam trlib_flt_t *offdiag_fac = fwork+2*n; // vector that holds offdiagonal of factor of diag + lam trlib_flt_t *work = fwork+3*n; // workspace for iterative refinement trlib_flt_t ferr = 0.0; // forward error bound from iterative refinement trlib_flt_t berr = 0.0; // backward error bound from iterative refinement trlib_int_t inc = 1; // vector increment trlib_int_t info_fac = 0; // LAPACK return code trlib_int_t nm = n-1; // factorize T + lam0 I TRLIB_DCOPY(&n, diag, &inc, diag_lam, &inc) // diag_lam <-- diag TRLIB_DAXPY(&n, &lam, ones, &inc, diag_lam, &inc) // diag_lam <-- lam0 + diag_lam TRLIB_DCOPY(&n, diag_lam, &inc, diag_fac, &inc) // diag_fac <-- diag_lam TRLIB_DCOPY(&nm, offdiag, &inc, offdiag_fac, &inc) // offdiag_fac <-- offdiag TRLIB_DPTTRF(&n, diag_fac, offdiag_fac, &info_fac) // compute factorization if (info_fac != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR); } // factorization failed, switch to coldastart TRLIB_DCOPY(&n, neglin, &inc, sol, &inc) // sol <-- neglin TRLIB_DPTTRS(&n, &inc, diag_fac, offdiag_fac, sol, &n, &info_fac) // sol <-- (T+lam I)^-1 sol if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on backsolve for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } if (refine) { TRLIB_DPTRFS(&n, &inc, diag_lam, offdiag, diag_fac, offdiag_fac, neglin, &n, sol, &n, &ferr, &berr, work, &info_fac) } if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on iterative refinement for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } TRLIB_DNRM2(*norm_sol, &n, sol, &inc) TRLIB_RETURN(TRLIB_TTR_CONV_INTERIOR); } trlib_int_t trlib_tri_factor_get_regularization( trlib_int_t n, trlib_flt_t *diag, trlib_flt_t *offdiag, trlib_flt_t *neglin, trlib_flt_t *lam, trlib_flt_t sigma, trlib_flt_t sigma_l, trlib_flt_t sigma_u, trlib_flt_t *sol, trlib_flt_t *ones, trlib_flt_t *fwork, trlib_int_t refine, trlib_int_t verbose, trlib_int_t unicode, char *prefix, FILE *fout, trlib_int_t *timing, trlib_flt_t *norm_sol, trlib_int_t *sub_fail) { // local variables #if TRLIB_MEASURE_TIME struct timespec verystart, start, end; TRLIB_TIC(verystart) #endif trlib_flt_t *diag_lam = fwork; // vector that holds diag + lam, could be saved if we would implement iterative refinement ourselves trlib_flt_t *diag_fac = fwork+n; // vector that holds diagonal of factor of diag + lam trlib_flt_t *offdiag_fac = fwork+2*n; // vector that holds offdiagonal of factor of diag + lam trlib_flt_t *work = fwork+3*n; // workspace for iterative refinement trlib_flt_t *aux = fwork+5*n; // auxiliary vector ds/n trlib_flt_t ferr = 0.0; // forward error bound from iterative refinement trlib_flt_t berr = 0.0; // backward error bound from iterative refinement trlib_int_t inc = 1; // vector increment trlib_int_t info_fac; // LAPACK return code trlib_int_t nm = n-1; trlib_flt_t lambda_l = 0.0; // lower bound on lambda trlib_flt_t lambda_u = 1e20; // upper bound on lambda trlib_int_t jj = 0; // local loop variable trlib_flt_t dlam = 0.0; // step in lambda trlib_flt_t dn = 0.0; // derivative of norm // get suitable lambda for which factorization exists info_fac = 1; while(info_fac != 0 && jj < 500) { // factorize T + lam0 I TRLIB_DCOPY(&n, diag, &inc, diag_lam, &inc) // diag_lam <-- diag TRLIB_DAXPY(&n, lam, ones, &inc, diag_lam, &inc) // diag_lam <-- lam0 + diag_lam TRLIB_DCOPY(&n, diag_lam, &inc, diag_fac, &inc) // diag_fac <-- diag_lam TRLIB_DCOPY(&nm, offdiag, &inc, offdiag_fac, &inc) // offdiag_fac <-- offdiag TRLIB_DPTTRF(&n, diag_fac, offdiag_fac, &info_fac) // compute factorization if(info_fac == 0) { break; } if(*lam > lambda_u) { break; } lambda_l = *lam; //if (*lam == 0.0) { *lam = 1.0; } *lam = 2.0 * (*lam); jj++; } if (info_fac != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR); } // factorization failed TRLIB_PRINTLN_1("Initial Regularization Factor that allows Cholesky: %e", *lam); TRLIB_DCOPY(&n, neglin, &inc, sol, &inc) // sol <-- neglin TRLIB_DPTTRS(&n, &inc, diag_fac, offdiag_fac, sol, &n, &info_fac) // sol <-- (T+lam I)^-1 sol if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on backsolve for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } if (refine) { TRLIB_DPTRFS(&n, &inc, diag_lam, offdiag, diag_fac, offdiag_fac, neglin, &n, sol, &n, &ferr, &berr, work, &info_fac) } if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on iterative refinement for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } TRLIB_DNRM2(*norm_sol, &n, sol, &inc) jj = 0; TRLIB_PRINTLN_2("%ld\t Reg %e\t Reg/Norm %e\t lb %e ub %e", jj, *lam, *lam/(*norm_sol), sigma_l, sigma_u); // check if accetable if( *norm_sol * sigma_l <= *lam && *lam <= *norm_sol * sigma_u ) { TRLIB_PRINTLN_1("Exit with Regularization Factor %e and Reg/Norm %e", *lam, *lam/(*norm_sol)) TRLIB_RETURN(TRLIB_TTR_CONV_INTERIOR); } else { /* do safeguarded newton iteration on f(lam) = lam/n(lam) with n(lam) = ||s(lam)|| * then dn = 1/n with - (H+lam I) ds = s * thus get - f/df = (n*lam - n*n*sigma) / ( lam dn - n) with dn = and (H + lam I) ds/n = - s/n * note that f is increasing for lam such that (H+lam I) is spd */ jj = 0; while(jj < 500) { // first get the vector ds/n TRLIB_DCOPY(&n, sol, &inc, aux, &inc) // aux <-- sol dn = -1.0/(*norm_sol); // scaling for right hand side TRLIB_DSCAL(&n, &dn, aux, &inc) // aux <-- -sol/||norm_sol|| TRLIB_DDOT(dn, &n, sol, &inc, aux, &inc) // dn = // compute step correction dlam = (*lam*(*norm_sol)-*norm_sol*(*norm_sol)*sigma) / (*lam*dn - *norm_sol); // check feasibility of step if (*lam + dlam <= lambda_u && lambda_l <= *lam + dlam) { *lam = *lam + dlam; } else { *lam = .5*( lambda_l + lambda_u); } // compute next function value // factorize T + lam0 I TRLIB_DCOPY(&n, diag, &inc, diag_lam, &inc) // diag_lam <-- diag TRLIB_DAXPY(&n, lam, ones, &inc, diag_lam, &inc) // diag_lam <-- lam0 + diag_lam TRLIB_DCOPY(&n, diag_lam, &inc, diag_fac, &inc) // diag_fac <-- diag_lam TRLIB_DCOPY(&nm, offdiag, &inc, offdiag_fac, &inc) // offdiag_fac <-- offdiag TRLIB_DPTTRF(&n, diag_fac, offdiag_fac, &info_fac) // compute factorization if (info_fac != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR); } // factorization failed TRLIB_DCOPY(&n, neglin, &inc, sol, &inc) // sol <-- neglin TRLIB_DPTTRS(&n, &inc, diag_fac, offdiag_fac, sol, &n, &info_fac) // sol <-- (T+lam I)^-1 sol if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on backsolve for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } if (refine) { TRLIB_DPTRFS(&n, &inc, diag_lam, offdiag, diag_fac, offdiag_fac, neglin, &n, sol, &n, &ferr, &berr, work, &info_fac) } if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on iterative refinement for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) } TRLIB_DNRM2(*norm_sol, &n, sol, &inc) jj++; TRLIB_PRINTLN_2("%ld\t Reg %e\t Reg/Norm %e\t lb %e ub %e", jj, *lam, *lam/(*norm_sol), sigma_l, sigma_u); // check if accetable if( *norm_sol * sigma_l <= *lam && *lam <= *norm_sol * sigma_u ) { TRLIB_PRINTLN_1("Exit with Regularization Factor %e and Reg/Norm %e", *lam, *lam/(*norm_sol)) TRLIB_RETURN(TRLIB_TTR_CONV_INTERIOR); } else { // contract bounds if(*lam > *norm_sol * sigma_u) { lambda_u = *lam; } if(*lam < *norm_sol * sigma_l) { lambda_l = *lam; } } } TRLIB_PRINTLN_1("Failure: no convergence to determine regularaization factor, last iterate %e", *lam) TRLIB_RETURN(TRLIB_TTR_NEWTON_BREAK) } } trlib_int_t trlib_tri_factor_regularize_posdef( trlib_int_t n, trlib_flt_t *diag, trlib_flt_t *offdiag, trlib_flt_t tol_away, trlib_flt_t security_step, trlib_flt_t *regdiag) { /* modify diagonal to be able to factorize Cholesky recurrence for diagonal is diag_fac[0] = diag[0] diag_fac[i+1] = diag[i+1] - offdiag[i]*offdiag[i] / diag_fac[i] we have to ensure diag_fac > 0 */ trlib_flt_t diag_fac = 0.0; trlib_int_t pivot = 0; regdiag[0] = 0.0; if (diag[0] <= tol_away) { regdiag[0] = security_step*tol_away; } diag_fac = diag[0] + regdiag[0]; for(pivot = 0; pivot < n-1; ++pivot) { regdiag[pivot+1] = 0.0; if ( diag[pivot+1] - offdiag[pivot]*offdiag[pivot]/diag_fac <= tol_away * diag_fac ) { regdiag[pivot+1] = security_step * fabs(offdiag[pivot]*offdiag[pivot]/diag_fac - diag[pivot+1]); } diag_fac = diag[pivot+1] + regdiag[pivot+1] - offdiag[pivot]*offdiag[pivot]/diag_fac; } return 0; } trlib_int_t trlib_tri_timing_size() { #if TRLIB_MEASURE_TIME return 1+TRLIB_SIZE_TIMING_LINALG+trlib_leftmost_timing_size()+trlib_eigen_timing_size(); #endif return 0; } trlib_int_t trlib_tri_factor_memory_size(trlib_int_t n) { return 6*n; }