001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.descriptive; 018 019 import java.io.Serializable; 020 import java.lang.reflect.InvocationTargetException; 021 import java.util.Arrays; 022 023 import org.apache.commons.math.MathRuntimeException; 024 import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 025 import org.apache.commons.math.stat.descriptive.moment.Kurtosis; 026 import org.apache.commons.math.stat.descriptive.moment.Mean; 027 import org.apache.commons.math.stat.descriptive.moment.Skewness; 028 import org.apache.commons.math.stat.descriptive.moment.Variance; 029 import org.apache.commons.math.stat.descriptive.rank.Max; 030 import org.apache.commons.math.stat.descriptive.rank.Min; 031 import org.apache.commons.math.stat.descriptive.rank.Percentile; 032 import org.apache.commons.math.stat.descriptive.summary.Sum; 033 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 034 import org.apache.commons.math.util.ResizableDoubleArray; 035 036 037 /** 038 * Maintains a dataset of values of a single variable and computes descriptive 039 * statistics based on stored data. The {@link #getWindowSize() windowSize} 040 * property sets a limit on the number of values that can be stored in the 041 * dataset. The default value, INFINITE_WINDOW, puts no limit on the size of 042 * the dataset. This value should be used with caution, as the backing store 043 * will grow without bound in this case. For very large datasets, 044 * {@link SummaryStatistics}, which does not store the dataset, should be used 045 * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and 046 * more values are added than can be stored in the dataset, new values are 047 * added in a "rolling" manner, with new values replacing the "oldest" values 048 * in the dataset. 049 * 050 * <p>Note: this class is not threadsafe. Use 051 * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple 052 * threads is required.</p> 053 * 054 * @version $Revision: 885278 $ $Date: 2009-11-29 16:47:51 -0500 (Sun, 29 Nov 2009) $ 055 */ 056 public class DescriptiveStatistics implements StatisticalSummary, Serializable { 057 058 /** 059 * Represents an infinite window size. When the {@link #getWindowSize()} 060 * returns this value, there is no limit to the number of data values 061 * that can be stored in the dataset. 062 */ 063 public static final int INFINITE_WINDOW = -1; 064 065 /** Serialization UID */ 066 private static final long serialVersionUID = 4133067267405273064L; 067 068 /** Name of the setQuantile method. */ 069 private static final String SET_QUANTILE_METHOD_NAME = "setQuantile"; 070 071 /** Message for unsupported setQuantile. */ 072 private static final String UNSUPPORTED_METHOD_MESSAGE = 073 "percentile implementation {0} does not support {1}"; 074 075 /** Message for illegal accesson setquantile. */ 076 private static final String ILLEGAL_ACCESS_MESSAGE = 077 "cannot access {0} method in percentile implementation {1}"; 078 079 /** hold the window size **/ 080 protected int windowSize = INFINITE_WINDOW; 081 082 /** 083 * Stored data values 084 */ 085 protected ResizableDoubleArray eDA = new ResizableDoubleArray(); 086 087 /** Mean statistic implementation - can be reset by setter. */ 088 private UnivariateStatistic meanImpl = new Mean(); 089 090 /** Geometric mean statistic implementation - can be reset by setter. */ 091 private UnivariateStatistic geometricMeanImpl = new GeometricMean(); 092 093 /** Kurtosis statistic implementation - can be reset by setter. */ 094 private UnivariateStatistic kurtosisImpl = new Kurtosis(); 095 096 /** Maximum statistic implementation - can be reset by setter. */ 097 private UnivariateStatistic maxImpl = new Max(); 098 099 /** Minimum statistic implementation - can be reset by setter. */ 100 private UnivariateStatistic minImpl = new Min(); 101 102 /** Percentile statistic implementation - can be reset by setter. */ 103 private UnivariateStatistic percentileImpl = new Percentile(); 104 105 /** Skewness statistic implementation - can be reset by setter. */ 106 private UnivariateStatistic skewnessImpl = new Skewness(); 107 108 /** Variance statistic implementation - can be reset by setter. */ 109 private UnivariateStatistic varianceImpl = new Variance(); 110 111 /** Sum of squares statistic implementation - can be reset by setter. */ 112 private UnivariateStatistic sumsqImpl = new SumOfSquares(); 113 114 /** Sum statistic implementation - can be reset by setter. */ 115 private UnivariateStatistic sumImpl = new Sum(); 116 117 /** 118 * Construct a DescriptiveStatistics instance with an infinite window 119 */ 120 public DescriptiveStatistics() { 121 } 122 123 /** 124 * Construct a DescriptiveStatistics instance with the specified window 125 * 126 * @param window the window size. 127 */ 128 public DescriptiveStatistics(int window) { 129 setWindowSize(window); 130 } 131 132 /** 133 * Copy constructor. Construct a new DescriptiveStatistics instance that 134 * is a copy of original. 135 * 136 * @param original DescriptiveStatistics instance to copy 137 */ 138 public DescriptiveStatistics(DescriptiveStatistics original) { 139 copy(original, this); 140 } 141 142 /** 143 * Adds the value to the dataset. If the dataset is at the maximum size 144 * (i.e., the number of stored elements equals the currently configured 145 * windowSize), the first (oldest) element in the dataset is discarded 146 * to make room for the new value. 147 * 148 * @param v the value to be added 149 */ 150 public void addValue(double v) { 151 if (windowSize != INFINITE_WINDOW) { 152 if (getN() == windowSize) { 153 eDA.addElementRolling(v); 154 } else if (getN() < windowSize) { 155 eDA.addElement(v); 156 } 157 } else { 158 eDA.addElement(v); 159 } 160 } 161 162 /** 163 * Removes the most recent value from the dataset. 164 */ 165 public void removeMostRecentValue() { 166 eDA.discardMostRecentElements(1); 167 } 168 169 /** 170 * Replaces the most recently stored value with the given value. 171 * There must be at least one element stored to call this method. 172 * 173 * @param v the value to replace the most recent stored value 174 * @return replaced value 175 */ 176 public double replaceMostRecentValue(double v) { 177 return eDA.substituteMostRecentElement(v); 178 } 179 180 /** 181 * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> 182 * arithmetic mean </a> of the available values 183 * @return The mean or Double.NaN if no values have been added. 184 */ 185 public double getMean() { 186 return apply(meanImpl); 187 } 188 189 /** 190 * Returns the <a href="http://www.xycoon.com/geometric_mean.htm"> 191 * geometric mean </a> of the available values 192 * @return The geometricMean, Double.NaN if no values have been added, 193 * or if the product of the available values is less than or equal to 0. 194 */ 195 public double getGeometricMean() { 196 return apply(geometricMeanImpl); 197 } 198 199 /** 200 * Returns the variance of the available values. 201 * @return The variance, Double.NaN if no values have been added 202 * or 0.0 for a single value set. 203 */ 204 public double getVariance() { 205 return apply(varianceImpl); 206 } 207 208 /** 209 * Returns the standard deviation of the available values. 210 * @return The standard deviation, Double.NaN if no values have been added 211 * or 0.0 for a single value set. 212 */ 213 public double getStandardDeviation() { 214 double stdDev = Double.NaN; 215 if (getN() > 0) { 216 if (getN() > 1) { 217 stdDev = Math.sqrt(getVariance()); 218 } else { 219 stdDev = 0.0; 220 } 221 } 222 return stdDev; 223 } 224 225 /** 226 * Returns the skewness of the available values. Skewness is a 227 * measure of the asymmetry of a given distribution. 228 * @return The skewness, Double.NaN if no values have been added 229 * or 0.0 for a value set <=2. 230 */ 231 public double getSkewness() { 232 return apply(skewnessImpl); 233 } 234 235 /** 236 * Returns the Kurtosis of the available values. Kurtosis is a 237 * measure of the "peakedness" of a distribution 238 * @return The kurtosis, Double.NaN if no values have been added, or 0.0 239 * for a value set <=3. 240 */ 241 public double getKurtosis() { 242 return apply(kurtosisImpl); 243 } 244 245 /** 246 * Returns the maximum of the available values 247 * @return The max or Double.NaN if no values have been added. 248 */ 249 public double getMax() { 250 return apply(maxImpl); 251 } 252 253 /** 254 * Returns the minimum of the available values 255 * @return The min or Double.NaN if no values have been added. 256 */ 257 public double getMin() { 258 return apply(minImpl); 259 } 260 261 /** 262 * Returns the number of available values 263 * @return The number of available values 264 */ 265 public long getN() { 266 return eDA.getNumElements(); 267 } 268 269 /** 270 * Returns the sum of the values that have been added to Univariate. 271 * @return The sum or Double.NaN if no values have been added 272 */ 273 public double getSum() { 274 return apply(sumImpl); 275 } 276 277 /** 278 * Returns the sum of the squares of the available values. 279 * @return The sum of the squares or Double.NaN if no 280 * values have been added. 281 */ 282 public double getSumsq() { 283 return apply(sumsqImpl); 284 } 285 286 /** 287 * Resets all statistics and storage 288 */ 289 public void clear() { 290 eDA.clear(); 291 } 292 293 294 /** 295 * Returns the maximum number of values that can be stored in the 296 * dataset, or INFINITE_WINDOW (-1) if there is no limit. 297 * 298 * @return The current window size or -1 if its Infinite. 299 */ 300 public int getWindowSize() { 301 return windowSize; 302 } 303 304 /** 305 * WindowSize controls the number of values which contribute 306 * to the reported statistics. For example, if 307 * windowSize is set to 3 and the values {1,2,3,4,5} 308 * have been added <strong> in that order</strong> 309 * then the <i>available values</i> are {3,4,5} and all 310 * reported statistics will be based on these values 311 * @param windowSize sets the size of the window. 312 */ 313 public void setWindowSize(int windowSize) { 314 if (windowSize < 1) { 315 if (windowSize != INFINITE_WINDOW) { 316 throw MathRuntimeException.createIllegalArgumentException( 317 "window size must be positive ({0})", windowSize); 318 } 319 } 320 321 this.windowSize = windowSize; 322 323 // We need to check to see if we need to discard elements 324 // from the front of the array. If the windowSize is less than 325 // the current number of elements. 326 if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) { 327 eDA.discardFrontElements(eDA.getNumElements() - windowSize); 328 } 329 } 330 331 /** 332 * Returns the current set of values in an array of double primitives. 333 * The order of addition is preserved. The returned array is a fresh 334 * copy of the underlying data -- i.e., it is not a reference to the 335 * stored data. 336 * 337 * @return returns the current set of numbers in the order in which they 338 * were added to this set 339 */ 340 public double[] getValues() { 341 return eDA.getElements(); 342 } 343 344 /** 345 * Returns the current set of values in an array of double primitives, 346 * sorted in ascending order. The returned array is a fresh 347 * copy of the underlying data -- i.e., it is not a reference to the 348 * stored data. 349 * @return returns the current set of 350 * numbers sorted in ascending order 351 */ 352 public double[] getSortedValues() { 353 double[] sort = getValues(); 354 Arrays.sort(sort); 355 return sort; 356 } 357 358 /** 359 * Returns the element at the specified index 360 * @param index The Index of the element 361 * @return return the element at the specified index 362 */ 363 public double getElement(int index) { 364 return eDA.getElement(index); 365 } 366 367 /** 368 * Returns an estimate for the pth percentile of the stored values. 369 * <p> 370 * The implementation provided here follows the first estimation procedure presented 371 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> 372 * </p><p> 373 * <strong>Preconditions</strong>:<ul> 374 * <li><code>0 < p ≤ 100</code> (otherwise an 375 * <code>IllegalArgumentException</code> is thrown)</li> 376 * <li>at least one value must be stored (returns <code>Double.NaN 377 * </code> otherwise)</li> 378 * </ul></p> 379 * 380 * @param p the requested percentile (scaled from 0 - 100) 381 * @return An estimate for the pth percentile of the stored data 382 * @throws IllegalStateException if percentile implementation has been 383 * overridden and the supplied implementation does not support setQuantile 384 * values 385 */ 386 public double getPercentile(double p) { 387 if (percentileImpl instanceof Percentile) { 388 ((Percentile) percentileImpl).setQuantile(p); 389 } else { 390 try { 391 percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, 392 new Class[] {Double.TYPE}).invoke(percentileImpl, 393 new Object[] {Double.valueOf(p)}); 394 } catch (NoSuchMethodException e1) { // Setter guard should prevent 395 throw MathRuntimeException.createIllegalArgumentException( 396 UNSUPPORTED_METHOD_MESSAGE, 397 percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); 398 } catch (IllegalAccessException e2) { 399 throw MathRuntimeException.createIllegalArgumentException( 400 ILLEGAL_ACCESS_MESSAGE, 401 SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); 402 } catch (InvocationTargetException e3) { 403 throw MathRuntimeException.createIllegalArgumentException(e3.getCause()); 404 } 405 } 406 return apply(percentileImpl); 407 } 408 409 /** 410 * Generates a text report displaying univariate statistics from values 411 * that have been added. Each statistic is displayed on a separate 412 * line. 413 * 414 * @return String with line feeds displaying statistics 415 */ 416 @Override 417 public String toString() { 418 StringBuffer outBuffer = new StringBuffer(); 419 String endl = "\n"; 420 outBuffer.append("DescriptiveStatistics:").append(endl); 421 outBuffer.append("n: ").append(getN()).append(endl); 422 outBuffer.append("min: ").append(getMin()).append(endl); 423 outBuffer.append("max: ").append(getMax()).append(endl); 424 outBuffer.append("mean: ").append(getMean()).append(endl); 425 outBuffer.append("std dev: ").append(getStandardDeviation()) 426 .append(endl); 427 outBuffer.append("median: ").append(getPercentile(50)).append(endl); 428 outBuffer.append("skewness: ").append(getSkewness()).append(endl); 429 outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl); 430 return outBuffer.toString(); 431 } 432 433 /** 434 * Apply the given statistic to the data associated with this set of statistics. 435 * @param stat the statistic to apply 436 * @return the computed value of the statistic. 437 */ 438 public double apply(UnivariateStatistic stat) { 439 return stat.evaluate(eDA.getInternalValues(), eDA.start(), eDA.getNumElements()); 440 } 441 442 // Implementation getters and setter 443 444 /** 445 * Returns the currently configured mean implementation. 446 * 447 * @return the UnivariateStatistic implementing the mean 448 * @since 1.2 449 */ 450 public synchronized UnivariateStatistic getMeanImpl() { 451 return meanImpl; 452 } 453 454 /** 455 * <p>Sets the implementation for the mean.</p> 456 * 457 * @param meanImpl the UnivariateStatistic instance to use 458 * for computing the mean 459 * @since 1.2 460 */ 461 public synchronized void setMeanImpl(UnivariateStatistic meanImpl) { 462 this.meanImpl = meanImpl; 463 } 464 465 /** 466 * Returns the currently configured geometric mean implementation. 467 * 468 * @return the UnivariateStatistic implementing the geometric mean 469 * @since 1.2 470 */ 471 public synchronized UnivariateStatistic getGeometricMeanImpl() { 472 return geometricMeanImpl; 473 } 474 475 /** 476 * <p>Sets the implementation for the gemoetric mean.</p> 477 * 478 * @param geometricMeanImpl the UnivariateStatistic instance to use 479 * for computing the geometric mean 480 * @since 1.2 481 */ 482 public synchronized void setGeometricMeanImpl( 483 UnivariateStatistic geometricMeanImpl) { 484 this.geometricMeanImpl = geometricMeanImpl; 485 } 486 487 /** 488 * Returns the currently configured kurtosis implementation. 489 * 490 * @return the UnivariateStatistic implementing the kurtosis 491 * @since 1.2 492 */ 493 public synchronized UnivariateStatistic getKurtosisImpl() { 494 return kurtosisImpl; 495 } 496 497 /** 498 * <p>Sets the implementation for the kurtosis.</p> 499 * 500 * @param kurtosisImpl the UnivariateStatistic instance to use 501 * for computing the kurtosis 502 * @since 1.2 503 */ 504 public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) { 505 this.kurtosisImpl = kurtosisImpl; 506 } 507 508 /** 509 * Returns the currently configured maximum implementation. 510 * 511 * @return the UnivariateStatistic implementing the maximum 512 * @since 1.2 513 */ 514 public synchronized UnivariateStatistic getMaxImpl() { 515 return maxImpl; 516 } 517 518 /** 519 * <p>Sets the implementation for the maximum.</p> 520 * 521 * @param maxImpl the UnivariateStatistic instance to use 522 * for computing the maximum 523 * @since 1.2 524 */ 525 public synchronized void setMaxImpl(UnivariateStatistic maxImpl) { 526 this.maxImpl = maxImpl; 527 } 528 529 /** 530 * Returns the currently configured minimum implementation. 531 * 532 * @return the UnivariateStatistic implementing the minimum 533 * @since 1.2 534 */ 535 public synchronized UnivariateStatistic getMinImpl() { 536 return minImpl; 537 } 538 539 /** 540 * <p>Sets the implementation for the minimum.</p> 541 * 542 * @param minImpl the UnivariateStatistic instance to use 543 * for computing the minimum 544 * @since 1.2 545 */ 546 public synchronized void setMinImpl(UnivariateStatistic minImpl) { 547 this.minImpl = minImpl; 548 } 549 550 /** 551 * Returns the currently configured percentile implementation. 552 * 553 * @return the UnivariateStatistic implementing the percentile 554 * @since 1.2 555 */ 556 public synchronized UnivariateStatistic getPercentileImpl() { 557 return percentileImpl; 558 } 559 560 /** 561 * Sets the implementation to be used by {@link #getPercentile(double)}. 562 * The supplied <code>UnivariateStatistic</code> must provide a 563 * <code>setQuantile(double)</code> method; otherwise 564 * <code>IllegalArgumentException</code> is thrown. 565 * 566 * @param percentileImpl the percentileImpl to set 567 * @throws IllegalArgumentException if the supplied implementation does not 568 * provide a <code>setQuantile</code> method 569 * @since 1.2 570 */ 571 public synchronized void setPercentileImpl( 572 UnivariateStatistic percentileImpl) { 573 try { 574 percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, 575 new Class[] {Double.TYPE}).invoke(percentileImpl, 576 new Object[] {Double.valueOf(50.0d)}); 577 } catch (NoSuchMethodException e1) { 578 throw MathRuntimeException.createIllegalArgumentException( 579 "percentile implementation {0} does not support setQuantile", 580 percentileImpl.getClass().getName()); 581 } catch (IllegalAccessException e2) { 582 throw MathRuntimeException.createIllegalArgumentException( 583 ILLEGAL_ACCESS_MESSAGE, 584 SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); 585 } catch (InvocationTargetException e3) { 586 throw MathRuntimeException.createIllegalArgumentException(e3.getCause()); 587 } 588 this.percentileImpl = percentileImpl; 589 } 590 591 /** 592 * Returns the currently configured skewness implementation. 593 * 594 * @return the UnivariateStatistic implementing the skewness 595 * @since 1.2 596 */ 597 public synchronized UnivariateStatistic getSkewnessImpl() { 598 return skewnessImpl; 599 } 600 601 /** 602 * <p>Sets the implementation for the skewness.</p> 603 * 604 * @param skewnessImpl the UnivariateStatistic instance to use 605 * for computing the skewness 606 * @since 1.2 607 */ 608 public synchronized void setSkewnessImpl( 609 UnivariateStatistic skewnessImpl) { 610 this.skewnessImpl = skewnessImpl; 611 } 612 613 /** 614 * Returns the currently configured variance implementation. 615 * 616 * @return the UnivariateStatistic implementing the variance 617 * @since 1.2 618 */ 619 public synchronized UnivariateStatistic getVarianceImpl() { 620 return varianceImpl; 621 } 622 623 /** 624 * <p>Sets the implementation for the variance.</p> 625 * 626 * @param varianceImpl the UnivariateStatistic instance to use 627 * for computing the variance 628 * @since 1.2 629 */ 630 public synchronized void setVarianceImpl( 631 UnivariateStatistic varianceImpl) { 632 this.varianceImpl = varianceImpl; 633 } 634 635 /** 636 * Returns the currently configured sum of squares implementation. 637 * 638 * @return the UnivariateStatistic implementing the sum of squares 639 * @since 1.2 640 */ 641 public synchronized UnivariateStatistic getSumsqImpl() { 642 return sumsqImpl; 643 } 644 645 /** 646 * <p>Sets the implementation for the sum of squares.</p> 647 * 648 * @param sumsqImpl the UnivariateStatistic instance to use 649 * for computing the sum of squares 650 * @since 1.2 651 */ 652 public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) { 653 this.sumsqImpl = sumsqImpl; 654 } 655 656 /** 657 * Returns the currently configured sum implementation. 658 * 659 * @return the UnivariateStatistic implementing the sum 660 * @since 1.2 661 */ 662 public synchronized UnivariateStatistic getSumImpl() { 663 return sumImpl; 664 } 665 666 /** 667 * <p>Sets the implementation for the sum.</p> 668 * 669 * @param sumImpl the UnivariateStatistic instance to use 670 * for computing the sum 671 * @since 1.2 672 */ 673 public synchronized void setSumImpl(UnivariateStatistic sumImpl) { 674 this.sumImpl = sumImpl; 675 } 676 677 /** 678 * Returns a copy of this DescriptiveStatistics instance with the same internal state. 679 * 680 * @return a copy of this 681 */ 682 public DescriptiveStatistics copy() { 683 DescriptiveStatistics result = new DescriptiveStatistics(); 684 copy(this, result); 685 return result; 686 } 687 688 /** 689 * Copies source to dest. 690 * <p>Neither source nor dest can be null.</p> 691 * 692 * @param source DescriptiveStatistics to copy 693 * @param dest DescriptiveStatistics to copy to 694 * @throws NullPointerException if either source or dest is null 695 */ 696 public static void copy(DescriptiveStatistics source, DescriptiveStatistics dest) { 697 // Copy data and window size 698 dest.eDA = source.eDA.copy(); 699 dest.windowSize = source.windowSize; 700 701 // Copy implementations 702 dest.maxImpl = source.maxImpl.copy(); 703 dest.meanImpl = source.meanImpl.copy(); 704 dest.minImpl = source.minImpl.copy(); 705 dest.sumImpl = source.sumImpl.copy(); 706 dest.varianceImpl = source.varianceImpl.copy(); 707 dest.sumsqImpl = source.sumsqImpl.copy(); 708 dest.geometricMeanImpl = source.geometricMeanImpl.copy(); 709 dest.kurtosisImpl = source.kurtosisImpl; 710 dest.skewnessImpl = source.skewnessImpl; 711 dest.percentileImpl = source.percentileImpl; 712 } 713 }