001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.optimization.general; 018 019 import java.util.Arrays; 020 021 import org.apache.commons.math.FunctionEvaluationException; 022 import org.apache.commons.math.optimization.OptimizationException; 023 import org.apache.commons.math.optimization.VectorialPointValuePair; 024 025 026 /** 027 * This class solves a least squares problem using the Levenberg-Marquardt algorithm. 028 * 029 * <p>This implementation <em>should</em> work even for over-determined systems 030 * (i.e. systems having more point than equations). Over-determined systems 031 * are solved by ignoring the point which have the smallest impact according 032 * to their jacobian column norm. Only the rank of the matrix and some loop bounds 033 * are changed to implement this.</p> 034 * 035 * <p>The resolution engine is a simple translation of the MINPACK <a 036 * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor 037 * changes. The changes include the over-determined resolution and the Q.R. 038 * decomposition which has been rewritten following the algorithm described in the 039 * P. Lascaux and R. Theodor book <i>Analyse numérique matricielle 040 * appliquée à l'art de l'ingénieur</i>, Masson 1986.</p> 041 * <p>The authors of the original fortran version are: 042 * <ul> 043 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 044 * <li>Burton S. Garbow</li> 045 * <li>Kenneth E. Hillstrom</li> 046 * <li>Jorge J. More</li> 047 * </ul> 048 * The redistribution policy for MINPACK is available <a 049 * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it 050 * is reproduced below.</p> 051 * 052 * <table border="0" width="80%" cellpadding="10" align="center" bgcolor="#E0E0E0"> 053 * <tr><td> 054 * Minpack Copyright Notice (1999) University of Chicago. 055 * All rights reserved 056 * </td></tr> 057 * <tr><td> 058 * Redistribution and use in source and binary forms, with or without 059 * modification, are permitted provided that the following conditions 060 * are met: 061 * <ol> 062 * <li>Redistributions of source code must retain the above copyright 063 * notice, this list of conditions and the following disclaimer.</li> 064 * <li>Redistributions in binary form must reproduce the above 065 * copyright notice, this list of conditions and the following 066 * disclaimer in the documentation and/or other materials provided 067 * with the distribution.</li> 068 * <li>The end-user documentation included with the redistribution, if any, 069 * must include the following acknowledgment: 070 * <code>This product includes software developed by the University of 071 * Chicago, as Operator of Argonne National Laboratory.</code> 072 * Alternately, this acknowledgment may appear in the software itself, 073 * if and wherever such third-party acknowledgments normally appear.</li> 074 * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" 075 * WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE 076 * UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND 077 * THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR 078 * IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES 079 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE 080 * OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY 081 * OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR 082 * USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF 083 * THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) 084 * DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION 085 * UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL 086 * BE CORRECTED.</strong></li> 087 * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT 088 * HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF 089 * ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, 090 * INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF 091 * ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF 092 * PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER 093 * SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT 094 * (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, 095 * EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE 096 * POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li> 097 * <ol></td></tr> 098 * </table> 099 * @version $Revision: 825919 $ $Date: 2009-10-16 10:51:55 -0400 (Fri, 16 Oct 2009) $ 100 * @since 2.0 101 * 102 */ 103 public class LevenbergMarquardtOptimizer extends AbstractLeastSquaresOptimizer { 104 105 /** Number of solved point. */ 106 private int solvedCols; 107 108 /** Diagonal elements of the R matrix in the Q.R. decomposition. */ 109 private double[] diagR; 110 111 /** Norms of the columns of the jacobian matrix. */ 112 private double[] jacNorm; 113 114 /** Coefficients of the Householder transforms vectors. */ 115 private double[] beta; 116 117 /** Columns permutation array. */ 118 private int[] permutation; 119 120 /** Rank of the jacobian matrix. */ 121 private int rank; 122 123 /** Levenberg-Marquardt parameter. */ 124 private double lmPar; 125 126 /** Parameters evolution direction associated with lmPar. */ 127 private double[] lmDir; 128 129 /** Positive input variable used in determining the initial step bound. */ 130 private double initialStepBoundFactor; 131 132 /** Desired relative error in the sum of squares. */ 133 private double costRelativeTolerance; 134 135 /** Desired relative error in the approximate solution parameters. */ 136 private double parRelativeTolerance; 137 138 /** Desired max cosine on the orthogonality between the function vector 139 * and the columns of the jacobian. */ 140 private double orthoTolerance; 141 142 /** 143 * Build an optimizer for least squares problems. 144 * <p>The default values for the algorithm settings are: 145 * <ul> 146 * <li>{@link #setInitialStepBoundFactor initial step bound factor}: 100.0</li> 147 * <li>{@link #setMaxIterations maximal iterations}: 1000</li> 148 * <li>{@link #setCostRelativeTolerance cost relative tolerance}: 1.0e-10</li> 149 * <li>{@link #setParRelativeTolerance parameters relative tolerance}: 1.0e-10</li> 150 * <li>{@link #setOrthoTolerance orthogonality tolerance}: 1.0e-10</li> 151 * </ul> 152 * </p> 153 */ 154 public LevenbergMarquardtOptimizer() { 155 156 // set up the superclass with a default max cost evaluations setting 157 setMaxIterations(1000); 158 159 // default values for the tuning parameters 160 setInitialStepBoundFactor(100.0); 161 setCostRelativeTolerance(1.0e-10); 162 setParRelativeTolerance(1.0e-10); 163 setOrthoTolerance(1.0e-10); 164 165 } 166 167 /** 168 * Set the positive input variable used in determining the initial step bound. 169 * This bound is set to the product of initialStepBoundFactor and the euclidean 170 * norm of diag*x if nonzero, or else to initialStepBoundFactor itself. In most 171 * cases factor should lie in the interval (0.1, 100.0). 100.0 is a generally 172 * recommended value. 173 * 174 * @param initialStepBoundFactor initial step bound factor 175 */ 176 public void setInitialStepBoundFactor(double initialStepBoundFactor) { 177 this.initialStepBoundFactor = initialStepBoundFactor; 178 } 179 180 /** 181 * Set the desired relative error in the sum of squares. 182 * 183 * @param costRelativeTolerance desired relative error in the sum of squares 184 */ 185 public void setCostRelativeTolerance(double costRelativeTolerance) { 186 this.costRelativeTolerance = costRelativeTolerance; 187 } 188 189 /** 190 * Set the desired relative error in the approximate solution parameters. 191 * 192 * @param parRelativeTolerance desired relative error 193 * in the approximate solution parameters 194 */ 195 public void setParRelativeTolerance(double parRelativeTolerance) { 196 this.parRelativeTolerance = parRelativeTolerance; 197 } 198 199 /** 200 * Set the desired max cosine on the orthogonality. 201 * 202 * @param orthoTolerance desired max cosine on the orthogonality 203 * between the function vector and the columns of the jacobian 204 */ 205 public void setOrthoTolerance(double orthoTolerance) { 206 this.orthoTolerance = orthoTolerance; 207 } 208 209 /** {@inheritDoc} */ 210 @Override 211 protected VectorialPointValuePair doOptimize() 212 throws FunctionEvaluationException, OptimizationException, IllegalArgumentException { 213 214 // arrays shared with the other private methods 215 solvedCols = Math.min(rows, cols); 216 diagR = new double[cols]; 217 jacNorm = new double[cols]; 218 beta = new double[cols]; 219 permutation = new int[cols]; 220 lmDir = new double[cols]; 221 222 // local point 223 double delta = 0; 224 double xNorm = 0; 225 double[] diag = new double[cols]; 226 double[] oldX = new double[cols]; 227 double[] oldRes = new double[rows]; 228 double[] work1 = new double[cols]; 229 double[] work2 = new double[cols]; 230 double[] work3 = new double[cols]; 231 232 // evaluate the function at the starting point and calculate its norm 233 updateResidualsAndCost(); 234 235 // outer loop 236 lmPar = 0; 237 boolean firstIteration = true; 238 while (true) { 239 240 incrementIterationsCounter(); 241 242 // compute the Q.R. decomposition of the jacobian matrix 243 updateJacobian(); 244 qrDecomposition(); 245 246 // compute Qt.res 247 qTy(residuals); 248 249 // now we don't need Q anymore, 250 // so let jacobian contain the R matrix with its diagonal elements 251 for (int k = 0; k < solvedCols; ++k) { 252 int pk = permutation[k]; 253 jacobian[k][pk] = diagR[pk]; 254 } 255 256 if (firstIteration) { 257 258 // scale the point according to the norms of the columns 259 // of the initial jacobian 260 xNorm = 0; 261 for (int k = 0; k < cols; ++k) { 262 double dk = jacNorm[k]; 263 if (dk == 0) { 264 dk = 1.0; 265 } 266 double xk = dk * point[k]; 267 xNorm += xk * xk; 268 diag[k] = dk; 269 } 270 xNorm = Math.sqrt(xNorm); 271 272 // initialize the step bound delta 273 delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm); 274 275 } 276 277 // check orthogonality between function vector and jacobian columns 278 double maxCosine = 0; 279 if (cost != 0) { 280 for (int j = 0; j < solvedCols; ++j) { 281 int pj = permutation[j]; 282 double s = jacNorm[pj]; 283 if (s != 0) { 284 double sum = 0; 285 for (int i = 0; i <= j; ++i) { 286 sum += jacobian[i][pj] * residuals[i]; 287 } 288 maxCosine = Math.max(maxCosine, Math.abs(sum) / (s * cost)); 289 } 290 } 291 } 292 if (maxCosine <= orthoTolerance) { 293 // convergence has been reached 294 return new VectorialPointValuePair(point, objective); 295 } 296 297 // rescale if necessary 298 for (int j = 0; j < cols; ++j) { 299 diag[j] = Math.max(diag[j], jacNorm[j]); 300 } 301 302 // inner loop 303 for (double ratio = 0; ratio < 1.0e-4;) { 304 305 // save the state 306 for (int j = 0; j < solvedCols; ++j) { 307 int pj = permutation[j]; 308 oldX[pj] = point[pj]; 309 } 310 double previousCost = cost; 311 double[] tmpVec = residuals; 312 residuals = oldRes; 313 oldRes = tmpVec; 314 315 // determine the Levenberg-Marquardt parameter 316 determineLMParameter(oldRes, delta, diag, work1, work2, work3); 317 318 // compute the new point and the norm of the evolution direction 319 double lmNorm = 0; 320 for (int j = 0; j < solvedCols; ++j) { 321 int pj = permutation[j]; 322 lmDir[pj] = -lmDir[pj]; 323 point[pj] = oldX[pj] + lmDir[pj]; 324 double s = diag[pj] * lmDir[pj]; 325 lmNorm += s * s; 326 } 327 lmNorm = Math.sqrt(lmNorm); 328 329 // on the first iteration, adjust the initial step bound. 330 if (firstIteration) { 331 delta = Math.min(delta, lmNorm); 332 } 333 334 // evaluate the function at x + p and calculate its norm 335 updateResidualsAndCost(); 336 337 // compute the scaled actual reduction 338 double actRed = -1.0; 339 if (0.1 * cost < previousCost) { 340 double r = cost / previousCost; 341 actRed = 1.0 - r * r; 342 } 343 344 // compute the scaled predicted reduction 345 // and the scaled directional derivative 346 for (int j = 0; j < solvedCols; ++j) { 347 int pj = permutation[j]; 348 double dirJ = lmDir[pj]; 349 work1[j] = 0; 350 for (int i = 0; i <= j; ++i) { 351 work1[i] += jacobian[i][pj] * dirJ; 352 } 353 } 354 double coeff1 = 0; 355 for (int j = 0; j < solvedCols; ++j) { 356 coeff1 += work1[j] * work1[j]; 357 } 358 double pc2 = previousCost * previousCost; 359 coeff1 = coeff1 / pc2; 360 double coeff2 = lmPar * lmNorm * lmNorm / pc2; 361 double preRed = coeff1 + 2 * coeff2; 362 double dirDer = -(coeff1 + coeff2); 363 364 // ratio of the actual to the predicted reduction 365 ratio = (preRed == 0) ? 0 : (actRed / preRed); 366 367 // update the step bound 368 if (ratio <= 0.25) { 369 double tmp = 370 (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5; 371 if ((0.1 * cost >= previousCost) || (tmp < 0.1)) { 372 tmp = 0.1; 373 } 374 delta = tmp * Math.min(delta, 10.0 * lmNorm); 375 lmPar /= tmp; 376 } else if ((lmPar == 0) || (ratio >= 0.75)) { 377 delta = 2 * lmNorm; 378 lmPar *= 0.5; 379 } 380 381 // test for successful iteration. 382 if (ratio >= 1.0e-4) { 383 // successful iteration, update the norm 384 firstIteration = false; 385 xNorm = 0; 386 for (int k = 0; k < cols; ++k) { 387 double xK = diag[k] * point[k]; 388 xNorm += xK * xK; 389 } 390 xNorm = Math.sqrt(xNorm); 391 } else { 392 // failed iteration, reset the previous values 393 cost = previousCost; 394 for (int j = 0; j < solvedCols; ++j) { 395 int pj = permutation[j]; 396 point[pj] = oldX[pj]; 397 } 398 tmpVec = residuals; 399 residuals = oldRes; 400 oldRes = tmpVec; 401 } 402 403 // tests for convergence. 404 if (((Math.abs(actRed) <= costRelativeTolerance) && 405 (preRed <= costRelativeTolerance) && 406 (ratio <= 2.0)) || 407 (delta <= parRelativeTolerance * xNorm)) { 408 return new VectorialPointValuePair(point, objective); 409 } 410 411 // tests for termination and stringent tolerances 412 // (2.2204e-16 is the machine epsilon for IEEE754) 413 if ((Math.abs(actRed) <= 2.2204e-16) && (preRed <= 2.2204e-16) && (ratio <= 2.0)) { 414 throw new OptimizationException("cost relative tolerance is too small ({0})," + 415 " no further reduction in the" + 416 " sum of squares is possible", 417 costRelativeTolerance); 418 } else if (delta <= 2.2204e-16 * xNorm) { 419 throw new OptimizationException("parameters relative tolerance is too small" + 420 " ({0}), no further improvement in" + 421 " the approximate solution is possible", 422 parRelativeTolerance); 423 } else if (maxCosine <= 2.2204e-16) { 424 throw new OptimizationException("orthogonality tolerance is too small ({0})," + 425 " solution is orthogonal to the jacobian", 426 orthoTolerance); 427 } 428 429 } 430 431 } 432 433 } 434 435 /** 436 * Determine the Levenberg-Marquardt parameter. 437 * <p>This implementation is a translation in Java of the MINPACK 438 * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a> 439 * routine.</p> 440 * <p>This method sets the lmPar and lmDir attributes.</p> 441 * <p>The authors of the original fortran function are:</p> 442 * <ul> 443 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 444 * <li>Burton S. Garbow</li> 445 * <li>Kenneth E. Hillstrom</li> 446 * <li>Jorge J. More</li> 447 * </ul> 448 * <p>Luc Maisonobe did the Java translation.</p> 449 * 450 * @param qy array containing qTy 451 * @param delta upper bound on the euclidean norm of diagR * lmDir 452 * @param diag diagonal matrix 453 * @param work1 work array 454 * @param work2 work array 455 * @param work3 work array 456 */ 457 private void determineLMParameter(double[] qy, double delta, double[] diag, 458 double[] work1, double[] work2, double[] work3) { 459 460 // compute and store in x the gauss-newton direction, if the 461 // jacobian is rank-deficient, obtain a least squares solution 462 for (int j = 0; j < rank; ++j) { 463 lmDir[permutation[j]] = qy[j]; 464 } 465 for (int j = rank; j < cols; ++j) { 466 lmDir[permutation[j]] = 0; 467 } 468 for (int k = rank - 1; k >= 0; --k) { 469 int pk = permutation[k]; 470 double ypk = lmDir[pk] / diagR[pk]; 471 for (int i = 0; i < k; ++i) { 472 lmDir[permutation[i]] -= ypk * jacobian[i][pk]; 473 } 474 lmDir[pk] = ypk; 475 } 476 477 // evaluate the function at the origin, and test 478 // for acceptance of the Gauss-Newton direction 479 double dxNorm = 0; 480 for (int j = 0; j < solvedCols; ++j) { 481 int pj = permutation[j]; 482 double s = diag[pj] * lmDir[pj]; 483 work1[pj] = s; 484 dxNorm += s * s; 485 } 486 dxNorm = Math.sqrt(dxNorm); 487 double fp = dxNorm - delta; 488 if (fp <= 0.1 * delta) { 489 lmPar = 0; 490 return; 491 } 492 493 // if the jacobian is not rank deficient, the Newton step provides 494 // a lower bound, parl, for the zero of the function, 495 // otherwise set this bound to zero 496 double sum2; 497 double parl = 0; 498 if (rank == solvedCols) { 499 for (int j = 0; j < solvedCols; ++j) { 500 int pj = permutation[j]; 501 work1[pj] *= diag[pj] / dxNorm; 502 } 503 sum2 = 0; 504 for (int j = 0; j < solvedCols; ++j) { 505 int pj = permutation[j]; 506 double sum = 0; 507 for (int i = 0; i < j; ++i) { 508 sum += jacobian[i][pj] * work1[permutation[i]]; 509 } 510 double s = (work1[pj] - sum) / diagR[pj]; 511 work1[pj] = s; 512 sum2 += s * s; 513 } 514 parl = fp / (delta * sum2); 515 } 516 517 // calculate an upper bound, paru, for the zero of the function 518 sum2 = 0; 519 for (int j = 0; j < solvedCols; ++j) { 520 int pj = permutation[j]; 521 double sum = 0; 522 for (int i = 0; i <= j; ++i) { 523 sum += jacobian[i][pj] * qy[i]; 524 } 525 sum /= diag[pj]; 526 sum2 += sum * sum; 527 } 528 double gNorm = Math.sqrt(sum2); 529 double paru = gNorm / delta; 530 if (paru == 0) { 531 // 2.2251e-308 is the smallest positive real for IEE754 532 paru = 2.2251e-308 / Math.min(delta, 0.1); 533 } 534 535 // if the input par lies outside of the interval (parl,paru), 536 // set par to the closer endpoint 537 lmPar = Math.min(paru, Math.max(lmPar, parl)); 538 if (lmPar == 0) { 539 lmPar = gNorm / dxNorm; 540 } 541 542 for (int countdown = 10; countdown >= 0; --countdown) { 543 544 // evaluate the function at the current value of lmPar 545 if (lmPar == 0) { 546 lmPar = Math.max(2.2251e-308, 0.001 * paru); 547 } 548 double sPar = Math.sqrt(lmPar); 549 for (int j = 0; j < solvedCols; ++j) { 550 int pj = permutation[j]; 551 work1[pj] = sPar * diag[pj]; 552 } 553 determineLMDirection(qy, work1, work2, work3); 554 555 dxNorm = 0; 556 for (int j = 0; j < solvedCols; ++j) { 557 int pj = permutation[j]; 558 double s = diag[pj] * lmDir[pj]; 559 work3[pj] = s; 560 dxNorm += s * s; 561 } 562 dxNorm = Math.sqrt(dxNorm); 563 double previousFP = fp; 564 fp = dxNorm - delta; 565 566 // if the function is small enough, accept the current value 567 // of lmPar, also test for the exceptional cases where parl is zero 568 if ((Math.abs(fp) <= 0.1 * delta) || 569 ((parl == 0) && (fp <= previousFP) && (previousFP < 0))) { 570 return; 571 } 572 573 // compute the Newton correction 574 for (int j = 0; j < solvedCols; ++j) { 575 int pj = permutation[j]; 576 work1[pj] = work3[pj] * diag[pj] / dxNorm; 577 } 578 for (int j = 0; j < solvedCols; ++j) { 579 int pj = permutation[j]; 580 work1[pj] /= work2[j]; 581 double tmp = work1[pj]; 582 for (int i = j + 1; i < solvedCols; ++i) { 583 work1[permutation[i]] -= jacobian[i][pj] * tmp; 584 } 585 } 586 sum2 = 0; 587 for (int j = 0; j < solvedCols; ++j) { 588 double s = work1[permutation[j]]; 589 sum2 += s * s; 590 } 591 double correction = fp / (delta * sum2); 592 593 // depending on the sign of the function, update parl or paru. 594 if (fp > 0) { 595 parl = Math.max(parl, lmPar); 596 } else if (fp < 0) { 597 paru = Math.min(paru, lmPar); 598 } 599 600 // compute an improved estimate for lmPar 601 lmPar = Math.max(parl, lmPar + correction); 602 603 } 604 } 605 606 /** 607 * Solve a*x = b and d*x = 0 in the least squares sense. 608 * <p>This implementation is a translation in Java of the MINPACK 609 * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a> 610 * routine.</p> 611 * <p>This method sets the lmDir and lmDiag attributes.</p> 612 * <p>The authors of the original fortran function are:</p> 613 * <ul> 614 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 615 * <li>Burton S. Garbow</li> 616 * <li>Kenneth E. Hillstrom</li> 617 * <li>Jorge J. More</li> 618 * </ul> 619 * <p>Luc Maisonobe did the Java translation.</p> 620 * 621 * @param qy array containing qTy 622 * @param diag diagonal matrix 623 * @param lmDiag diagonal elements associated with lmDir 624 * @param work work array 625 */ 626 private void determineLMDirection(double[] qy, double[] diag, 627 double[] lmDiag, double[] work) { 628 629 // copy R and Qty to preserve input and initialize s 630 // in particular, save the diagonal elements of R in lmDir 631 for (int j = 0; j < solvedCols; ++j) { 632 int pj = permutation[j]; 633 for (int i = j + 1; i < solvedCols; ++i) { 634 jacobian[i][pj] = jacobian[j][permutation[i]]; 635 } 636 lmDir[j] = diagR[pj]; 637 work[j] = qy[j]; 638 } 639 640 // eliminate the diagonal matrix d using a Givens rotation 641 for (int j = 0; j < solvedCols; ++j) { 642 643 // prepare the row of d to be eliminated, locating the 644 // diagonal element using p from the Q.R. factorization 645 int pj = permutation[j]; 646 double dpj = diag[pj]; 647 if (dpj != 0) { 648 Arrays.fill(lmDiag, j + 1, lmDiag.length, 0); 649 } 650 lmDiag[j] = dpj; 651 652 // the transformations to eliminate the row of d 653 // modify only a single element of Qty 654 // beyond the first n, which is initially zero. 655 double qtbpj = 0; 656 for (int k = j; k < solvedCols; ++k) { 657 int pk = permutation[k]; 658 659 // determine a Givens rotation which eliminates the 660 // appropriate element in the current row of d 661 if (lmDiag[k] != 0) { 662 663 final double sin; 664 final double cos; 665 double rkk = jacobian[k][pk]; 666 if (Math.abs(rkk) < Math.abs(lmDiag[k])) { 667 final double cotan = rkk / lmDiag[k]; 668 sin = 1.0 / Math.sqrt(1.0 + cotan * cotan); 669 cos = sin * cotan; 670 } else { 671 final double tan = lmDiag[k] / rkk; 672 cos = 1.0 / Math.sqrt(1.0 + tan * tan); 673 sin = cos * tan; 674 } 675 676 // compute the modified diagonal element of R and 677 // the modified element of (Qty,0) 678 jacobian[k][pk] = cos * rkk + sin * lmDiag[k]; 679 final double temp = cos * work[k] + sin * qtbpj; 680 qtbpj = -sin * work[k] + cos * qtbpj; 681 work[k] = temp; 682 683 // accumulate the tranformation in the row of s 684 for (int i = k + 1; i < solvedCols; ++i) { 685 double rik = jacobian[i][pk]; 686 final double temp2 = cos * rik + sin * lmDiag[i]; 687 lmDiag[i] = -sin * rik + cos * lmDiag[i]; 688 jacobian[i][pk] = temp2; 689 } 690 691 } 692 } 693 694 // store the diagonal element of s and restore 695 // the corresponding diagonal element of R 696 lmDiag[j] = jacobian[j][permutation[j]]; 697 jacobian[j][permutation[j]] = lmDir[j]; 698 699 } 700 701 // solve the triangular system for z, if the system is 702 // singular, then obtain a least squares solution 703 int nSing = solvedCols; 704 for (int j = 0; j < solvedCols; ++j) { 705 if ((lmDiag[j] == 0) && (nSing == solvedCols)) { 706 nSing = j; 707 } 708 if (nSing < solvedCols) { 709 work[j] = 0; 710 } 711 } 712 if (nSing > 0) { 713 for (int j = nSing - 1; j >= 0; --j) { 714 int pj = permutation[j]; 715 double sum = 0; 716 for (int i = j + 1; i < nSing; ++i) { 717 sum += jacobian[i][pj] * work[i]; 718 } 719 work[j] = (work[j] - sum) / lmDiag[j]; 720 } 721 } 722 723 // permute the components of z back to components of lmDir 724 for (int j = 0; j < lmDir.length; ++j) { 725 lmDir[permutation[j]] = work[j]; 726 } 727 728 } 729 730 /** 731 * Decompose a matrix A as A.P = Q.R using Householder transforms. 732 * <p>As suggested in the P. Lascaux and R. Theodor book 733 * <i>Analyse numérique matricielle appliquée à 734 * l'art de l'ingénieur</i> (Masson, 1986), instead of representing 735 * the Householder transforms with u<sub>k</sub> unit vectors such that: 736 * <pre> 737 * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup> 738 * </pre> 739 * we use <sub>k</sub> non-unit vectors such that: 740 * <pre> 741 * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup> 742 * </pre> 743 * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>. 744 * The beta<sub>k</sub> coefficients are provided upon exit as recomputing 745 * them from the v<sub>k</sub> vectors would be costly.</p> 746 * <p>This decomposition handles rank deficient cases since the tranformations 747 * are performed in non-increasing columns norms order thanks to columns 748 * pivoting. The diagonal elements of the R matrix are therefore also in 749 * non-increasing absolute values order.</p> 750 * @exception OptimizationException if the decomposition cannot be performed 751 */ 752 private void qrDecomposition() throws OptimizationException { 753 754 // initializations 755 for (int k = 0; k < cols; ++k) { 756 permutation[k] = k; 757 double norm2 = 0; 758 for (int i = 0; i < jacobian.length; ++i) { 759 double akk = jacobian[i][k]; 760 norm2 += akk * akk; 761 } 762 jacNorm[k] = Math.sqrt(norm2); 763 } 764 765 // transform the matrix column after column 766 for (int k = 0; k < cols; ++k) { 767 768 // select the column with the greatest norm on active components 769 int nextColumn = -1; 770 double ak2 = Double.NEGATIVE_INFINITY; 771 for (int i = k; i < cols; ++i) { 772 double norm2 = 0; 773 for (int j = k; j < jacobian.length; ++j) { 774 double aki = jacobian[j][permutation[i]]; 775 norm2 += aki * aki; 776 } 777 if (Double.isInfinite(norm2) || Double.isNaN(norm2)) { 778 throw new OptimizationException( 779 "unable to perform Q.R decomposition on the {0}x{1} jacobian matrix", 780 rows, cols); 781 } 782 if (norm2 > ak2) { 783 nextColumn = i; 784 ak2 = norm2; 785 } 786 } 787 if (ak2 == 0) { 788 rank = k; 789 return; 790 } 791 int pk = permutation[nextColumn]; 792 permutation[nextColumn] = permutation[k]; 793 permutation[k] = pk; 794 795 // choose alpha such that Hk.u = alpha ek 796 double akk = jacobian[k][pk]; 797 double alpha = (akk > 0) ? -Math.sqrt(ak2) : Math.sqrt(ak2); 798 double betak = 1.0 / (ak2 - akk * alpha); 799 beta[pk] = betak; 800 801 // transform the current column 802 diagR[pk] = alpha; 803 jacobian[k][pk] -= alpha; 804 805 // transform the remaining columns 806 for (int dk = cols - 1 - k; dk > 0; --dk) { 807 double gamma = 0; 808 for (int j = k; j < jacobian.length; ++j) { 809 gamma += jacobian[j][pk] * jacobian[j][permutation[k + dk]]; 810 } 811 gamma *= betak; 812 for (int j = k; j < jacobian.length; ++j) { 813 jacobian[j][permutation[k + dk]] -= gamma * jacobian[j][pk]; 814 } 815 } 816 817 } 818 819 rank = solvedCols; 820 821 } 822 823 /** 824 * Compute the product Qt.y for some Q.R. decomposition. 825 * 826 * @param y vector to multiply (will be overwritten with the result) 827 */ 828 private void qTy(double[] y) { 829 for (int k = 0; k < cols; ++k) { 830 int pk = permutation[k]; 831 double gamma = 0; 832 for (int i = k; i < rows; ++i) { 833 gamma += jacobian[i][pk] * y[i]; 834 } 835 gamma *= beta[pk]; 836 for (int i = k; i < rows; ++i) { 837 y[i] -= gamma * jacobian[i][pk]; 838 } 839 } 840 } 841 842 }