001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.descriptive;
018
019 import java.io.Serializable;
020 import java.lang.reflect.InvocationTargetException;
021 import java.util.Arrays;
022
023 import org.apache.commons.math.MathRuntimeException;
024 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
025 import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
026 import org.apache.commons.math.stat.descriptive.moment.Mean;
027 import org.apache.commons.math.stat.descriptive.moment.Skewness;
028 import org.apache.commons.math.stat.descriptive.moment.Variance;
029 import org.apache.commons.math.stat.descriptive.rank.Max;
030 import org.apache.commons.math.stat.descriptive.rank.Min;
031 import org.apache.commons.math.stat.descriptive.rank.Percentile;
032 import org.apache.commons.math.stat.descriptive.summary.Sum;
033 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
034 import org.apache.commons.math.util.ResizableDoubleArray;
035
036
037 /**
038 * Maintains a dataset of values of a single variable and computes descriptive
039 * statistics based on stored data. The {@link #getWindowSize() windowSize}
040 * property sets a limit on the number of values that can be stored in the
041 * dataset. The default value, INFINITE_WINDOW, puts no limit on the size of
042 * the dataset. This value should be used with caution, as the backing store
043 * will grow without bound in this case. For very large datasets,
044 * {@link SummaryStatistics}, which does not store the dataset, should be used
045 * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and
046 * more values are added than can be stored in the dataset, new values are
047 * added in a "rolling" manner, with new values replacing the "oldest" values
048 * in the dataset.
049 *
050 * <p>Note: this class is not threadsafe. Use
051 * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple
052 * threads is required.</p>
053 *
054 * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
055 */
056 public class DescriptiveStatistics implements StatisticalSummary, Serializable {
057
058 /** Serialization UID */
059 private static final long serialVersionUID = 4133067267405273064L;
060
061 /** hold the window size **/
062 protected int windowSize = INFINITE_WINDOW;
063
064 /**
065 * Stored data values
066 */
067 protected ResizableDoubleArray eDA = new ResizableDoubleArray();
068
069 /** Mean statistic implementation - can be reset by setter. */
070 private UnivariateStatistic meanImpl = new Mean();
071
072 /** Geometric mean statistic implementation - can be reset by setter. */
073 private UnivariateStatistic geometricMeanImpl = new GeometricMean();
074
075 /** Kurtosis statistic implementation - can be reset by setter. */
076 private UnivariateStatistic kurtosisImpl = new Kurtosis();
077
078 /** Maximum statistic implementation - can be reset by setter. */
079 private UnivariateStatistic maxImpl = new Max();
080
081 /** Minimum statistic implementation - can be reset by setter. */
082 private UnivariateStatistic minImpl = new Min();
083
084 /** Percentile statistic implementation - can be reset by setter. */
085 private UnivariateStatistic percentileImpl = new Percentile();
086
087 /** Skewness statistic implementation - can be reset by setter. */
088 private UnivariateStatistic skewnessImpl = new Skewness();
089
090 /** Variance statistic implementation - can be reset by setter. */
091 private UnivariateStatistic varianceImpl = new Variance();
092
093 /** Sum of squares statistic implementation - can be reset by setter. */
094 private UnivariateStatistic sumsqImpl = new SumOfSquares();
095
096 /** Sum statistic implementation - can be reset by setter. */
097 private UnivariateStatistic sumImpl = new Sum();
098
099 /**
100 * Construct a DescriptiveStatistics instance with an infinite window
101 */
102 public DescriptiveStatistics() {
103 }
104
105 /**
106 * Construct a DescriptiveStatistics instance with the specified window
107 *
108 * @param window the window size.
109 */
110 public DescriptiveStatistics(int window) {
111 setWindowSize(window);
112 }
113
114 /**
115 * Copy constructor. Construct a new DescriptiveStatistics instance that
116 * is a copy of original.
117 *
118 * @param original DescriptiveStatistics instance to copy
119 */
120 public DescriptiveStatistics(DescriptiveStatistics original) {
121 copy(original, this);
122 }
123
124 /**
125 * Represents an infinite window size. When the {@link #getWindowSize()}
126 * returns this value, there is no limit to the number of data values
127 * that can be stored in the dataset.
128 */
129 public static final int INFINITE_WINDOW = -1;
130
131 /**
132 * Adds the value to the dataset. If the dataset is at the maximum size
133 * (i.e., the number of stored elements equals the currently configured
134 * windowSize), the first (oldest) element in the dataset is discarded
135 * to make room for the new value.
136 *
137 * @param v the value to be added
138 */
139 public void addValue(double v) {
140 if (windowSize != INFINITE_WINDOW) {
141 if (getN() == windowSize) {
142 eDA.addElementRolling(v);
143 } else if (getN() < windowSize) {
144 eDA.addElement(v);
145 }
146 } else {
147 eDA.addElement(v);
148 }
149 }
150
151 /**
152 * Removes the most recent value from the dataset.
153 */
154 public void removeMostRecentValue() {
155 eDA.discardMostRecentElements(1);
156 }
157
158 /**
159 * Replaces the most recently stored value with the given value.
160 * There must be at least one element stored to call this method.
161 *
162 * @param v the value to replace the most recent stored value
163 * @return replaced value
164 */
165 public double replaceMostRecentValue(double v) {
166 return eDA.substituteMostRecentElement(v);
167 }
168
169 /**
170 * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
171 * arithmetic mean </a> of the available values
172 * @return The mean or Double.NaN if no values have been added.
173 */
174 public double getMean() {
175 return apply(meanImpl);
176 }
177
178 /**
179 * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
180 * geometric mean </a> of the available values
181 * @return The geometricMean, Double.NaN if no values have been added,
182 * or if the product of the available values is less than or equal to 0.
183 */
184 public double getGeometricMean() {
185 return apply(geometricMeanImpl);
186 }
187
188 /**
189 * Returns the variance of the available values.
190 * @return The variance, Double.NaN if no values have been added
191 * or 0.0 for a single value set.
192 */
193 public double getVariance() {
194 return apply(varianceImpl);
195 }
196
197 /**
198 * Returns the standard deviation of the available values.
199 * @return The standard deviation, Double.NaN if no values have been added
200 * or 0.0 for a single value set.
201 */
202 public double getStandardDeviation() {
203 double stdDev = Double.NaN;
204 if (getN() > 0) {
205 if (getN() > 1) {
206 stdDev = Math.sqrt(getVariance());
207 } else {
208 stdDev = 0.0;
209 }
210 }
211 return (stdDev);
212 }
213
214 /**
215 * Returns the skewness of the available values. Skewness is a
216 * measure of the asymmetry of a given distribution.
217 * @return The skewness, Double.NaN if no values have been added
218 * or 0.0 for a value set <=2.
219 */
220 public double getSkewness() {
221 return apply(skewnessImpl);
222 }
223
224 /**
225 * Returns the Kurtosis of the available values. Kurtosis is a
226 * measure of the "peakedness" of a distribution
227 * @return The kurtosis, Double.NaN if no values have been added, or 0.0
228 * for a value set <=3.
229 */
230 public double getKurtosis() {
231 return apply(kurtosisImpl);
232 }
233
234 /**
235 * Returns the maximum of the available values
236 * @return The max or Double.NaN if no values have been added.
237 */
238 public double getMax() {
239 return apply(maxImpl);
240 }
241
242 /**
243 * Returns the minimum of the available values
244 * @return The min or Double.NaN if no values have been added.
245 */
246 public double getMin() {
247 return apply(minImpl);
248 }
249
250 /**
251 * Returns the number of available values
252 * @return The number of available values
253 */
254 public long getN() {
255 return eDA.getNumElements();
256 }
257
258 /**
259 * Returns the sum of the values that have been added to Univariate.
260 * @return The sum or Double.NaN if no values have been added
261 */
262 public double getSum() {
263 return apply(sumImpl);
264 }
265
266 /**
267 * Returns the sum of the squares of the available values.
268 * @return The sum of the squares or Double.NaN if no
269 * values have been added.
270 */
271 public double getSumsq() {
272 return apply(sumsqImpl);
273 }
274
275 /**
276 * Resets all statistics and storage
277 */
278 public void clear() {
279 eDA.clear();
280 }
281
282
283 /**
284 * Returns the maximum number of values that can be stored in the
285 * dataset, or INFINITE_WINDOW (-1) if there is no limit.
286 *
287 * @return The current window size or -1 if its Infinite.
288 */
289 public int getWindowSize() {
290 return windowSize;
291 }
292
293 /**
294 * WindowSize controls the number of values which contribute
295 * to the reported statistics. For example, if
296 * windowSize is set to 3 and the values {1,2,3,4,5}
297 * have been added <strong> in that order</strong>
298 * then the <i>available values</i> are {3,4,5} and all
299 * reported statistics will be based on these values
300 * @param windowSize sets the size of the window.
301 */
302 public void setWindowSize(int windowSize) {
303 if (windowSize < 1) {
304 if (windowSize != INFINITE_WINDOW) {
305 throw MathRuntimeException.createIllegalArgumentException(
306 "window size must be positive ({0})", windowSize);
307 }
308 }
309
310 this.windowSize = windowSize;
311
312 // We need to check to see if we need to discard elements
313 // from the front of the array. If the windowSize is less than
314 // the current number of elements.
315 if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
316 eDA.discardFrontElements(eDA.getNumElements() - windowSize);
317 }
318 }
319
320 /**
321 * Returns the current set of values in an array of double primitives.
322 * The order of addition is preserved. The returned array is a fresh
323 * copy of the underlying data -- i.e., it is not a reference to the
324 * stored data.
325 *
326 * @return returns the current set of numbers in the order in which they
327 * were added to this set
328 */
329 public double[] getValues() {
330 return eDA.getElements();
331 }
332
333 /**
334 * Returns the current set of values in an array of double primitives,
335 * sorted in ascending order. The returned array is a fresh
336 * copy of the underlying data -- i.e., it is not a reference to the
337 * stored data.
338 * @return returns the current set of
339 * numbers sorted in ascending order
340 */
341 public double[] getSortedValues() {
342 double[] sort = getValues();
343 Arrays.sort(sort);
344 return sort;
345 }
346
347 /**
348 * Returns the element at the specified index
349 * @param index The Index of the element
350 * @return return the element at the specified index
351 */
352 public double getElement(int index) {
353 return eDA.getElement(index);
354 }
355
356 /**
357 * Returns an estimate for the pth percentile of the stored values.
358 * <p>
359 * The implementation provided here follows the first estimation procedure presented
360 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
361 * </p><p>
362 * <strong>Preconditions</strong>:<ul>
363 * <li><code>0 < p ≤ 100</code> (otherwise an
364 * <code>IllegalArgumentException</code> is thrown)</li>
365 * <li>at least one value must be stored (returns <code>Double.NaN
366 * </code> otherwise)</li>
367 * </ul></p>
368 *
369 * @param p the requested percentile (scaled from 0 - 100)
370 * @return An estimate for the pth percentile of the stored data
371 * @throws IllegalStateException if percentile implementation has been
372 * overridden and the supplied implementation does not support setQuantile
373 * values
374 */
375 public double getPercentile(double p) {
376 if (percentileImpl instanceof Percentile) {
377 ((Percentile) percentileImpl).setQuantile(p);
378 } else {
379 try {
380 percentileImpl.getClass().getMethod("setQuantile",
381 new Class[] {Double.TYPE}).invoke(percentileImpl,
382 new Object[] {Double.valueOf(p)});
383 } catch (NoSuchMethodException e1) { // Setter guard should prevent
384 throw MathRuntimeException.createIllegalArgumentException(
385 "percentile implementation {0} does not support setQuantile",
386 percentileImpl.getClass().getName());
387 } catch (IllegalAccessException e2) {
388 throw MathRuntimeException.createIllegalArgumentException(
389 "cannot access setQuantile method in percentile implementation {0}",
390 percentileImpl.getClass().getName());
391 } catch (InvocationTargetException e3) {
392 throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
393 }
394 }
395 return apply(percentileImpl);
396 }
397
398 /**
399 * Generates a text report displaying univariate statistics from values
400 * that have been added. Each statistic is displayed on a separate
401 * line.
402 *
403 * @return String with line feeds displaying statistics
404 */
405 @Override
406 public String toString() {
407 StringBuffer outBuffer = new StringBuffer();
408 String endl = "\n";
409 outBuffer.append("DescriptiveStatistics:").append(endl);
410 outBuffer.append("n: ").append(getN()).append(endl);
411 outBuffer.append("min: ").append(getMin()).append(endl);
412 outBuffer.append("max: ").append(getMax()).append(endl);
413 outBuffer.append("mean: ").append(getMean()).append(endl);
414 outBuffer.append("std dev: ").append(getStandardDeviation())
415 .append(endl);
416 outBuffer.append("median: ").append(getPercentile(50)).append(endl);
417 outBuffer.append("skewness: ").append(getSkewness()).append(endl);
418 outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl);
419 return outBuffer.toString();
420 }
421
422 /**
423 * Apply the given statistic to the data associated with this set of statistics.
424 * @param stat the statistic to apply
425 * @return the computed value of the statistic.
426 */
427 public double apply(UnivariateStatistic stat) {
428 return stat.evaluate(eDA.getInternalValues(), eDA.start(), eDA.getNumElements());
429 }
430
431 // Implementation getters and setter
432
433 /**
434 * Returns the currently configured mean implementation.
435 *
436 * @return the UnivariateStatistic implementing the mean
437 * @since 1.2
438 */
439 public synchronized UnivariateStatistic getMeanImpl() {
440 return meanImpl;
441 }
442
443 /**
444 * <p>Sets the implementation for the mean.</p>
445 *
446 * @param meanImpl the UnivariateStatistic instance to use
447 * for computing the mean
448 * @since 1.2
449 */
450 public synchronized void setMeanImpl(UnivariateStatistic meanImpl) {
451 this.meanImpl = meanImpl;
452 }
453
454 /**
455 * Returns the currently configured geometric mean implementation.
456 *
457 * @return the UnivariateStatistic implementing the geometric mean
458 * @since 1.2
459 */
460 public synchronized UnivariateStatistic getGeometricMeanImpl() {
461 return geometricMeanImpl;
462 }
463
464 /**
465 * <p>Sets the implementation for the gemoetric mean.</p>
466 *
467 * @param geometricMeanImpl the UnivariateStatistic instance to use
468 * for computing the geometric mean
469 * @since 1.2
470 */
471 public synchronized void setGeometricMeanImpl(
472 UnivariateStatistic geometricMeanImpl) {
473 this.geometricMeanImpl = geometricMeanImpl;
474 }
475
476 /**
477 * Returns the currently configured kurtosis implementation.
478 *
479 * @return the UnivariateStatistic implementing the kurtosis
480 * @since 1.2
481 */
482 public synchronized UnivariateStatistic getKurtosisImpl() {
483 return kurtosisImpl;
484 }
485
486 /**
487 * <p>Sets the implementation for the kurtosis.</p>
488 *
489 * @param kurtosisImpl the UnivariateStatistic instance to use
490 * for computing the kurtosis
491 * @since 1.2
492 */
493 public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) {
494 this.kurtosisImpl = kurtosisImpl;
495 }
496
497 /**
498 * Returns the currently configured maximum implementation.
499 *
500 * @return the UnivariateStatistic implementing the maximum
501 * @since 1.2
502 */
503 public synchronized UnivariateStatistic getMaxImpl() {
504 return maxImpl;
505 }
506
507 /**
508 * <p>Sets the implementation for the maximum.</p>
509 *
510 * @param maxImpl the UnivariateStatistic instance to use
511 * for computing the maximum
512 * @since 1.2
513 */
514 public synchronized void setMaxImpl(UnivariateStatistic maxImpl) {
515 this.maxImpl = maxImpl;
516 }
517
518 /**
519 * Returns the currently configured minimum implementation.
520 *
521 * @return the UnivariateStatistic implementing the minimum
522 * @since 1.2
523 */
524 public synchronized UnivariateStatistic getMinImpl() {
525 return minImpl;
526 }
527
528 /**
529 * <p>Sets the implementation for the minimum.</p>
530 *
531 * @param minImpl the UnivariateStatistic instance to use
532 * for computing the minimum
533 * @since 1.2
534 */
535 public synchronized void setMinImpl(UnivariateStatistic minImpl) {
536 this.minImpl = minImpl;
537 }
538
539 /**
540 * Returns the currently configured percentile implementation.
541 *
542 * @return the UnivariateStatistic implementing the percentile
543 * @since 1.2
544 */
545 public synchronized UnivariateStatistic getPercentileImpl() {
546 return percentileImpl;
547 }
548
549 /**
550 * Sets the implementation to be used by {@link #getPercentile(double)}.
551 * The supplied <code>UnivariateStatistic</code> must provide a
552 * <code>setQuantile(double)</code> method; otherwise
553 * <code>IllegalArgumentException</code> is thrown.
554 *
555 * @param percentileImpl the percentileImpl to set
556 * @throws IllegalArgumentException if the supplied implementation does not
557 * provide a <code>setQuantile</code> method
558 * @since 1.2
559 */
560 public synchronized void setPercentileImpl(
561 UnivariateStatistic percentileImpl) {
562 try {
563 percentileImpl.getClass().getMethod("setQuantile",
564 new Class[] {Double.TYPE}).invoke(percentileImpl,
565 new Object[] {Double.valueOf(50.0d)});
566 } catch (NoSuchMethodException e1) {
567 throw MathRuntimeException.createIllegalArgumentException(
568 "percentile implementation {0} does not support setQuantile",
569 percentileImpl.getClass().getName());
570 } catch (IllegalAccessException e2) {
571 throw MathRuntimeException.createIllegalArgumentException(
572 "cannot access setQuantile method in percentile implementation {0}",
573 percentileImpl.getClass().getName());
574 } catch (InvocationTargetException e3) {
575 throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
576 }
577 this.percentileImpl = percentileImpl;
578 }
579
580 /**
581 * Returns the currently configured skewness implementation.
582 *
583 * @return the UnivariateStatistic implementing the skewness
584 * @since 1.2
585 */
586 public synchronized UnivariateStatistic getSkewnessImpl() {
587 return skewnessImpl;
588 }
589
590 /**
591 * <p>Sets the implementation for the skewness.</p>
592 *
593 * @param skewnessImpl the UnivariateStatistic instance to use
594 * for computing the skewness
595 * @since 1.2
596 */
597 public synchronized void setSkewnessImpl(
598 UnivariateStatistic skewnessImpl) {
599 this.skewnessImpl = skewnessImpl;
600 }
601
602 /**
603 * Returns the currently configured variance implementation.
604 *
605 * @return the UnivariateStatistic implementing the variance
606 * @since 1.2
607 */
608 public synchronized UnivariateStatistic getVarianceImpl() {
609 return varianceImpl;
610 }
611
612 /**
613 * <p>Sets the implementation for the variance.</p>
614 *
615 * @param varianceImpl the UnivariateStatistic instance to use
616 * for computing the variance
617 * @since 1.2
618 */
619 public synchronized void setVarianceImpl(
620 UnivariateStatistic varianceImpl) {
621 this.varianceImpl = varianceImpl;
622 }
623
624 /**
625 * Returns the currently configured sum of squares implementation.
626 *
627 * @return the UnivariateStatistic implementing the sum of squares
628 * @since 1.2
629 */
630 public synchronized UnivariateStatistic getSumsqImpl() {
631 return sumsqImpl;
632 }
633
634 /**
635 * <p>Sets the implementation for the sum of squares.</p>
636 *
637 * @param sumsqImpl the UnivariateStatistic instance to use
638 * for computing the sum of squares
639 * @since 1.2
640 */
641 public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) {
642 this.sumsqImpl = sumsqImpl;
643 }
644
645 /**
646 * Returns the currently configured sum implementation.
647 *
648 * @return the UnivariateStatistic implementing the sum
649 * @since 1.2
650 */
651 public synchronized UnivariateStatistic getSumImpl() {
652 return sumImpl;
653 }
654
655 /**
656 * <p>Sets the implementation for the sum.</p>
657 *
658 * @param sumImpl the UnivariateStatistic instance to use
659 * for computing the sum
660 * @since 1.2
661 */
662 public synchronized void setSumImpl(UnivariateStatistic sumImpl) {
663 this.sumImpl = sumImpl;
664 }
665
666 /**
667 * Returns a copy of this DescriptiveStatistics instance with the same internal state.
668 *
669 * @return a copy of this
670 */
671 public DescriptiveStatistics copy() {
672 DescriptiveStatistics result = new DescriptiveStatistics();
673 copy(this, result);
674 return result;
675 }
676
677 /**
678 * Copies source to dest.
679 * <p>Neither source nor dest can be null.</p>
680 *
681 * @param source DescriptiveStatistics to copy
682 * @param dest DescriptiveStatistics to copy to
683 * @throws NullPointerException if either source or dest is null
684 */
685 public static void copy(DescriptiveStatistics source, DescriptiveStatistics dest) {
686 // Copy data and window size
687 dest.eDA = source.eDA.copy();
688 dest.windowSize = source.windowSize;
689
690 // Copy implementations
691 dest.maxImpl = source.maxImpl.copy();
692 dest.meanImpl = source.meanImpl.copy();
693 dest.minImpl = source.minImpl.copy();
694 dest.sumImpl = source.sumImpl.copy();
695 dest.varianceImpl = source.varianceImpl.copy();
696 dest.sumsqImpl = source.sumsqImpl.copy();
697 dest.geometricMeanImpl = source.geometricMeanImpl.copy();
698 dest.kurtosisImpl = source.kurtosisImpl;
699 dest.skewnessImpl = source.skewnessImpl;
700 dest.percentileImpl = source.percentileImpl;
701 }
702 }