001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.analysis.interpolation; 018 019 import java.io.Serializable; 020 import java.util.Arrays; 021 022 import org.apache.commons.math.MathException; 023 import org.apache.commons.math.analysis.polynomials.PolynomialSplineFunction; 024 025 /** 026 * Implements the <a href="http://en.wikipedia.org/wiki/Local_regression"> 027 * Local Regression Algorithm</a> (also Loess, Lowess) for interpolation of 028 * real univariate functions. 029 * <p/> 030 * For reference, see 031 * <a href="http://www.math.tau.ac.il/~yekutiel/MA seminar/Cleveland 1979.pdf"> 032 * William S. Cleveland - Robust Locally Weighted Regression and Smoothing 033 * Scatterplots</a> 034 * <p/> 035 * This class implements both the loess method and serves as an interpolation 036 * adapter to it, allowing to build a spline on the obtained loess fit. 037 * 038 * @version $Revision: 925812 $ $Date: 2010-03-21 11:49:31 -0400 (Sun, 21 Mar 2010) $ 039 * @since 2.0 040 */ 041 public class LoessInterpolator 042 implements UnivariateRealInterpolator, Serializable { 043 044 /** Default value of the bandwidth parameter. */ 045 public static final double DEFAULT_BANDWIDTH = 0.3; 046 047 /** Default value of the number of robustness iterations. */ 048 public static final int DEFAULT_ROBUSTNESS_ITERS = 2; 049 050 /** 051 * Default value for accuracy. 052 * @since 2.1 053 */ 054 public static final double DEFAULT_ACCURACY = 1e-12; 055 056 /** serializable version identifier. */ 057 private static final long serialVersionUID = 5204927143605193821L; 058 059 /** 060 * The bandwidth parameter: when computing the loess fit at 061 * a particular point, this fraction of source points closest 062 * to the current point is taken into account for computing 063 * a least-squares regression. 064 * <p/> 065 * A sensible value is usually 0.25 to 0.5. 066 */ 067 private final double bandwidth; 068 069 /** 070 * The number of robustness iterations parameter: this many 071 * robustness iterations are done. 072 * <p/> 073 * A sensible value is usually 0 (just the initial fit without any 074 * robustness iterations) to 4. 075 */ 076 private final int robustnessIters; 077 078 /** 079 * If the median residual at a certain robustness iteration 080 * is less than this amount, no more iterations are done. 081 */ 082 private final double accuracy; 083 084 /** 085 * Constructs a new {@link LoessInterpolator} 086 * with a bandwidth of {@link #DEFAULT_BANDWIDTH}, 087 * {@link #DEFAULT_ROBUSTNESS_ITERS} robustness iterations 088 * and an accuracy of {#link #DEFAULT_ACCURACY}. 089 * See {@link #LoessInterpolator(double, int, double)} for an explanation of 090 * the parameters. 091 */ 092 public LoessInterpolator() { 093 this.bandwidth = DEFAULT_BANDWIDTH; 094 this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS; 095 this.accuracy = DEFAULT_ACCURACY; 096 } 097 098 /** 099 * Constructs a new {@link LoessInterpolator} 100 * with given bandwidth and number of robustness iterations. 101 * <p> 102 * Calling this constructor is equivalent to calling {link {@link 103 * #LoessInterpolator(double, int, double) LoessInterpolator(bandwidth, 104 * robustnessIters, LoessInterpolator.DEFAULT_ACCURACY)} 105 * </p> 106 * 107 * @param bandwidth when computing the loess fit at 108 * a particular point, this fraction of source points closest 109 * to the current point is taken into account for computing 110 * a least-squares regression.</br> 111 * A sensible value is usually 0.25 to 0.5, the default value is 112 * {@link #DEFAULT_BANDWIDTH}. 113 * @param robustnessIters This many robustness iterations are done.</br> 114 * A sensible value is usually 0 (just the initial fit without any 115 * robustness iterations) to 4, the default value is 116 * {@link #DEFAULT_ROBUSTNESS_ITERS}. 117 * @throws MathException if bandwidth does not lie in the interval [0,1] 118 * or if robustnessIters is negative. 119 * @see #LoessInterpolator(double, int, double) 120 */ 121 public LoessInterpolator(double bandwidth, int robustnessIters) throws MathException { 122 this(bandwidth, robustnessIters, DEFAULT_ACCURACY); 123 } 124 125 /** 126 * Constructs a new {@link LoessInterpolator} 127 * with given bandwidth, number of robustness iterations and accuracy. 128 * 129 * @param bandwidth when computing the loess fit at 130 * a particular point, this fraction of source points closest 131 * to the current point is taken into account for computing 132 * a least-squares regression.</br> 133 * A sensible value is usually 0.25 to 0.5, the default value is 134 * {@link #DEFAULT_BANDWIDTH}. 135 * @param robustnessIters This many robustness iterations are done.</br> 136 * A sensible value is usually 0 (just the initial fit without any 137 * robustness iterations) to 4, the default value is 138 * {@link #DEFAULT_ROBUSTNESS_ITERS}. 139 * @param accuracy If the median residual at a certain robustness iteration 140 * is less than this amount, no more iterations are done. 141 * @throws MathException if bandwidth does not lie in the interval [0,1] 142 * or if robustnessIters is negative. 143 * @see #LoessInterpolator(double, int) 144 * @since 2.1 145 */ 146 public LoessInterpolator(double bandwidth, int robustnessIters, double accuracy) throws MathException { 147 if (bandwidth < 0 || bandwidth > 1) { 148 throw new MathException("bandwidth must be in the interval [0,1], but got {0}", 149 bandwidth); 150 } 151 this.bandwidth = bandwidth; 152 if (robustnessIters < 0) { 153 throw new MathException("the number of robustness iterations must " + 154 "be non-negative, but got {0}", 155 robustnessIters); 156 } 157 this.robustnessIters = robustnessIters; 158 this.accuracy = accuracy; 159 } 160 161 /** 162 * Compute an interpolating function by performing a loess fit 163 * on the data at the original abscissae and then building a cubic spline 164 * with a 165 * {@link org.apache.commons.math.analysis.interpolation.SplineInterpolator} 166 * on the resulting fit. 167 * 168 * @param xval the arguments for the interpolation points 169 * @param yval the values for the interpolation points 170 * @return A cubic spline built upon a loess fit to the data at the original abscissae 171 * @throws MathException if some of the following conditions are false: 172 * <ul> 173 * <li> Arguments and values are of the same size that is greater than zero</li> 174 * <li> The arguments are in a strictly increasing order</li> 175 * <li> All arguments and values are finite real numbers</li> 176 * </ul> 177 */ 178 public final PolynomialSplineFunction interpolate( 179 final double[] xval, final double[] yval) throws MathException { 180 return new SplineInterpolator().interpolate(xval, smooth(xval, yval)); 181 } 182 183 /** 184 * Compute a weighted loess fit on the data at the original abscissae. 185 * 186 * @param xval the arguments for the interpolation points 187 * @param yval the values for the interpolation points 188 * @param weights point weights: coefficients by which the robustness weight of a point is multiplied 189 * @return values of the loess fit at corresponding original abscissae 190 * @throws MathException if some of the following conditions are false: 191 * <ul> 192 * <li> Arguments and values are of the same size that is greater than zero</li> 193 * <li> The arguments are in a strictly increasing order</li> 194 * <li> All arguments and values are finite real numbers</li> 195 * </ul> 196 * @since 2.1 197 */ 198 public final double[] smooth(final double[] xval, final double[] yval, final double[] weights) 199 throws MathException { 200 if (xval.length != yval.length) { 201 throw new MathException( 202 "Loess expects the abscissa and ordinate arrays " + 203 "to be of the same size, " + 204 "but got {0} abscissae and {1} ordinatae", 205 xval.length, yval.length); 206 } 207 208 final int n = xval.length; 209 210 if (n == 0) { 211 throw new MathException("Loess expects at least 1 point"); 212 } 213 214 checkAllFiniteReal(xval, "all abscissae must be finite real numbers, but {0}-th is {1}"); 215 checkAllFiniteReal(yval, "all ordinatae must be finite real numbers, but {0}-th is {1}"); 216 checkAllFiniteReal(weights, "all weights must be finite real numbers, but {0}-th is {1}"); 217 218 checkStrictlyIncreasing(xval); 219 220 if (n == 1) { 221 return new double[]{yval[0]}; 222 } 223 224 if (n == 2) { 225 return new double[]{yval[0], yval[1]}; 226 } 227 228 int bandwidthInPoints = (int) (bandwidth * n); 229 230 if (bandwidthInPoints < 2) { 231 throw new MathException( 232 "the bandwidth must be large enough to " + 233 "accomodate at least 2 points. There are {0} " + 234 " data points, and bandwidth must be at least {1} " + 235 " but it is only {2}", 236 n, 2.0 / n, bandwidth); 237 } 238 239 final double[] res = new double[n]; 240 241 final double[] residuals = new double[n]; 242 final double[] sortedResiduals = new double[n]; 243 244 final double[] robustnessWeights = new double[n]; 245 246 // Do an initial fit and 'robustnessIters' robustness iterations. 247 // This is equivalent to doing 'robustnessIters+1' robustness iterations 248 // starting with all robustness weights set to 1. 249 Arrays.fill(robustnessWeights, 1); 250 251 for (int iter = 0; iter <= robustnessIters; ++iter) { 252 final int[] bandwidthInterval = {0, bandwidthInPoints - 1}; 253 // At each x, compute a local weighted linear regression 254 for (int i = 0; i < n; ++i) { 255 final double x = xval[i]; 256 257 // Find out the interval of source points on which 258 // a regression is to be made. 259 if (i > 0) { 260 updateBandwidthInterval(xval, weights, i, bandwidthInterval); 261 } 262 263 final int ileft = bandwidthInterval[0]; 264 final int iright = bandwidthInterval[1]; 265 266 // Compute the point of the bandwidth interval that is 267 // farthest from x 268 final int edge; 269 if (xval[i] - xval[ileft] > xval[iright] - xval[i]) { 270 edge = ileft; 271 } else { 272 edge = iright; 273 } 274 275 // Compute a least-squares linear fit weighted by 276 // the product of robustness weights and the tricube 277 // weight function. 278 // See http://en.wikipedia.org/wiki/Linear_regression 279 // (section "Univariate linear case") 280 // and http://en.wikipedia.org/wiki/Weighted_least_squares 281 // (section "Weighted least squares") 282 double sumWeights = 0; 283 double sumX = 0; 284 double sumXSquared = 0; 285 double sumY = 0; 286 double sumXY = 0; 287 double denom = Math.abs(1.0 / (xval[edge] - x)); 288 for (int k = ileft; k <= iright; ++k) { 289 final double xk = xval[k]; 290 final double yk = yval[k]; 291 final double dist = (k < i) ? x - xk : xk - x; 292 final double w = tricube(dist * denom) * robustnessWeights[k] * weights[k]; 293 final double xkw = xk * w; 294 sumWeights += w; 295 sumX += xkw; 296 sumXSquared += xk * xkw; 297 sumY += yk * w; 298 sumXY += yk * xkw; 299 } 300 301 final double meanX = sumX / sumWeights; 302 final double meanY = sumY / sumWeights; 303 final double meanXY = sumXY / sumWeights; 304 final double meanXSquared = sumXSquared / sumWeights; 305 306 final double beta; 307 if (Math.sqrt(Math.abs(meanXSquared - meanX * meanX)) < accuracy) { 308 beta = 0; 309 } else { 310 beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX); 311 } 312 313 final double alpha = meanY - beta * meanX; 314 315 res[i] = beta * x + alpha; 316 residuals[i] = Math.abs(yval[i] - res[i]); 317 } 318 319 // No need to recompute the robustness weights at the last 320 // iteration, they won't be needed anymore 321 if (iter == robustnessIters) { 322 break; 323 } 324 325 // Recompute the robustness weights. 326 327 // Find the median residual. 328 // An arraycopy and a sort are completely tractable here, 329 // because the preceding loop is a lot more expensive 330 System.arraycopy(residuals, 0, sortedResiduals, 0, n); 331 Arrays.sort(sortedResiduals); 332 final double medianResidual = sortedResiduals[n / 2]; 333 334 if (Math.abs(medianResidual) < accuracy) { 335 break; 336 } 337 338 for (int i = 0; i < n; ++i) { 339 final double arg = residuals[i] / (6 * medianResidual); 340 if (arg >= 1) { 341 robustnessWeights[i] = 0; 342 } else { 343 final double w = 1 - arg * arg; 344 robustnessWeights[i] = w * w; 345 } 346 } 347 } 348 349 return res; 350 } 351 352 /** 353 * Compute a loess fit on the data at the original abscissae. 354 * 355 * @param xval the arguments for the interpolation points 356 * @param yval the values for the interpolation points 357 * @return values of the loess fit at corresponding original abscissae 358 * @throws MathException if some of the following conditions are false: 359 * <ul> 360 * <li> Arguments and values are of the same size that is greater than zero</li> 361 * <li> The arguments are in a strictly increasing order</li> 362 * <li> All arguments and values are finite real numbers</li> 363 * </ul> 364 */ 365 public final double[] smooth(final double[] xval, final double[] yval) 366 throws MathException { 367 if (xval.length != yval.length) { 368 throw new MathException( 369 "Loess expects the abscissa and ordinate arrays " + 370 "to be of the same size, " + 371 "but got {0} abscissae and {1} ordinatae", 372 xval.length, yval.length); 373 } 374 375 final double[] unitWeights = new double[xval.length]; 376 Arrays.fill(unitWeights, 1.0); 377 378 return smooth(xval, yval, unitWeights); 379 } 380 381 /** 382 * Given an index interval into xval that embraces a certain number of 383 * points closest to xval[i-1], update the interval so that it embraces 384 * the same number of points closest to xval[i], ignoring zero weights. 385 * 386 * @param xval arguments array 387 * @param weights weights array 388 * @param i the index around which the new interval should be computed 389 * @param bandwidthInterval a two-element array {left, right} such that: <p/> 390 * <tt>(left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])</tt> 391 * <p/> and also <p/> 392 * <tt>(right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])</tt>. 393 * The array will be updated. 394 */ 395 private static void updateBandwidthInterval(final double[] xval, final double[] weights, 396 final int i, 397 final int[] bandwidthInterval) { 398 final int left = bandwidthInterval[0]; 399 final int right = bandwidthInterval[1]; 400 401 // The right edge should be adjusted if the next point to the right 402 // is closer to xval[i] than the leftmost point of the current interval 403 int nextRight = nextNonzero(weights, right); 404 if (nextRight < xval.length && xval[nextRight] - xval[i] < xval[i] - xval[left]) { 405 int nextLeft = nextNonzero(weights, bandwidthInterval[0]); 406 bandwidthInterval[0] = nextLeft; 407 bandwidthInterval[1] = nextRight; 408 } 409 } 410 411 /** 412 * Returns the smallest index j such that j > i && (j==weights.length || weights[j] != 0) 413 * @param weights weights array 414 * @param i the index from which to start search; must be < weights.length 415 * @return the smallest index j such that j > i && (j==weights.length || weights[j] != 0) 416 */ 417 private static int nextNonzero(final double[] weights, final int i) { 418 int j = i + 1; 419 while(j < weights.length && weights[j] == 0) { 420 j++; 421 } 422 return j; 423 } 424 425 /** 426 * Compute the 427 * <a href="http://en.wikipedia.org/wiki/Local_regression#Weight_function">tricube</a> 428 * weight function 429 * 430 * @param x the argument 431 * @return (1-|x|^3)^3 432 */ 433 private static double tricube(final double x) { 434 final double tmp = 1 - x * x * x; 435 return tmp * tmp * tmp; 436 } 437 438 /** 439 * Check that all elements of an array are finite real numbers. 440 * 441 * @param values the values array 442 * @param pattern pattern of the error message 443 * @throws MathException if one of the values is not a finite real number 444 */ 445 private static void checkAllFiniteReal(final double[] values, final String pattern) 446 throws MathException { 447 for (int i = 0; i < values.length; i++) { 448 final double x = values[i]; 449 if (Double.isInfinite(x) || Double.isNaN(x)) { 450 throw new MathException(pattern, i, x); 451 } 452 } 453 } 454 455 /** 456 * Check that elements of the abscissae array are in a strictly 457 * increasing order. 458 * 459 * @param xval the abscissae array 460 * @throws MathException if the abscissae array 461 * is not in a strictly increasing order 462 */ 463 private static void checkStrictlyIncreasing(final double[] xval) 464 throws MathException { 465 for (int i = 0; i < xval.length; ++i) { 466 if (i >= 1 && xval[i - 1] >= xval[i]) { 467 throw new MathException( 468 "the abscissae array must be sorted in a strictly " + 469 "increasing order, but the {0}-th element is {1} " + 470 "whereas {2}-th is {3}", 471 i - 1, xval[i - 1], i, xval[i]); 472 } 473 } 474 } 475 }