001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.descriptive;
018    
019    import java.io.Serializable;
020    import java.lang.reflect.InvocationTargetException;
021    import java.util.Arrays;
022    
023    import org.apache.commons.math.MathRuntimeException;
024    import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
025    import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
026    import org.apache.commons.math.stat.descriptive.moment.Mean;
027    import org.apache.commons.math.stat.descriptive.moment.Skewness;
028    import org.apache.commons.math.stat.descriptive.moment.Variance;
029    import org.apache.commons.math.stat.descriptive.rank.Max;
030    import org.apache.commons.math.stat.descriptive.rank.Min;
031    import org.apache.commons.math.stat.descriptive.rank.Percentile;
032    import org.apache.commons.math.stat.descriptive.summary.Sum;
033    import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
034    import org.apache.commons.math.util.ResizableDoubleArray;
035    
036    
037    /**
038     * Maintains a dataset of values of a single variable and computes descriptive
039     * statistics based on stored data. The {@link #getWindowSize() windowSize}
040     * property sets a limit on the number of values that can be stored in the
041     * dataset.  The default value, INFINITE_WINDOW, puts no limit on the size of
042     * the dataset.  This value should be used with caution, as the backing store
043     * will grow without bound in this case.  For very large datasets,
044     * {@link SummaryStatistics}, which does not store the dataset, should be used
045     * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and
046     * more values are added than can be stored in the dataset, new values are
047     * added in a "rolling" manner, with new values replacing the "oldest" values
048     * in the dataset.
049     *
050     * <p>Note: this class is not threadsafe.  Use
051     * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple
052     * threads is required.</p>
053     *
054     * @version $Revision: 885278 $ $Date: 2009-11-29 16:47:51 -0500 (Sun, 29 Nov 2009) $
055     */
056    public class DescriptiveStatistics implements StatisticalSummary, Serializable {
057    
058        /**
059         * Represents an infinite window size.  When the {@link #getWindowSize()}
060         * returns this value, there is no limit to the number of data values
061         * that can be stored in the dataset.
062         */
063        public static final int INFINITE_WINDOW = -1;
064    
065        /** Serialization UID */
066        private static final long serialVersionUID = 4133067267405273064L;
067    
068        /** Name of the setQuantile method. */
069        private static final String SET_QUANTILE_METHOD_NAME = "setQuantile";
070    
071        /** Message for unsupported setQuantile. */
072        private static final String UNSUPPORTED_METHOD_MESSAGE =
073            "percentile implementation {0} does not support {1}";
074    
075        /** Message for illegal accesson setquantile. */
076        private static final String ILLEGAL_ACCESS_MESSAGE =
077            "cannot access {0} method in percentile implementation {1}";
078    
079        /** hold the window size **/
080        protected int windowSize = INFINITE_WINDOW;
081    
082        /**
083         *  Stored data values
084         */
085        protected ResizableDoubleArray eDA = new ResizableDoubleArray();
086    
087        /** Mean statistic implementation - can be reset by setter. */
088        private UnivariateStatistic meanImpl = new Mean();
089    
090        /** Geometric mean statistic implementation - can be reset by setter. */
091        private UnivariateStatistic geometricMeanImpl = new GeometricMean();
092    
093        /** Kurtosis statistic implementation - can be reset by setter. */
094        private UnivariateStatistic kurtosisImpl = new Kurtosis();
095    
096        /** Maximum statistic implementation - can be reset by setter. */
097        private UnivariateStatistic maxImpl = new Max();
098    
099        /** Minimum statistic implementation - can be reset by setter. */
100        private UnivariateStatistic minImpl = new Min();
101    
102        /** Percentile statistic implementation - can be reset by setter. */
103        private UnivariateStatistic percentileImpl = new Percentile();
104    
105        /** Skewness statistic implementation - can be reset by setter. */
106        private UnivariateStatistic skewnessImpl = new Skewness();
107    
108        /** Variance statistic implementation - can be reset by setter. */
109        private UnivariateStatistic varianceImpl = new Variance();
110    
111        /** Sum of squares statistic implementation - can be reset by setter. */
112        private UnivariateStatistic sumsqImpl = new SumOfSquares();
113    
114        /** Sum statistic implementation - can be reset by setter. */
115        private UnivariateStatistic sumImpl = new Sum();
116    
117        /**
118         * Construct a DescriptiveStatistics instance with an infinite window
119         */
120        public DescriptiveStatistics() {
121        }
122    
123        /**
124         * Construct a DescriptiveStatistics instance with the specified window
125         *
126         * @param window the window size.
127         */
128        public DescriptiveStatistics(int window) {
129            setWindowSize(window);
130        }
131    
132        /**
133         * Copy constructor.  Construct a new DescriptiveStatistics instance that
134         * is a copy of original.
135         *
136         * @param original DescriptiveStatistics instance to copy
137         */
138        public DescriptiveStatistics(DescriptiveStatistics original) {
139            copy(original, this);
140        }
141    
142        /**
143         * Adds the value to the dataset. If the dataset is at the maximum size
144         * (i.e., the number of stored elements equals the currently configured
145         * windowSize), the first (oldest) element in the dataset is discarded
146         * to make room for the new value.
147         *
148         * @param v the value to be added
149         */
150        public void addValue(double v) {
151            if (windowSize != INFINITE_WINDOW) {
152                if (getN() == windowSize) {
153                    eDA.addElementRolling(v);
154                } else if (getN() < windowSize) {
155                    eDA.addElement(v);
156                }
157            } else {
158                eDA.addElement(v);
159            }
160        }
161    
162        /**
163         * Removes the most recent value from the dataset.
164         */
165        public void removeMostRecentValue() {
166            eDA.discardMostRecentElements(1);
167        }
168    
169        /**
170         * Replaces the most recently stored value with the given value.
171         * There must be at least one element stored to call this method.
172         *
173         * @param v the value to replace the most recent stored value
174         * @return replaced value
175         */
176        public double replaceMostRecentValue(double v) {
177            return eDA.substituteMostRecentElement(v);
178        }
179    
180        /**
181         * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
182         * arithmetic mean </a> of the available values
183         * @return The mean or Double.NaN if no values have been added.
184         */
185        public double getMean() {
186            return apply(meanImpl);
187        }
188    
189        /**
190         * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
191         * geometric mean </a> of the available values
192         * @return The geometricMean, Double.NaN if no values have been added,
193         * or if the product of the available values is less than or equal to 0.
194         */
195        public double getGeometricMean() {
196            return apply(geometricMeanImpl);
197        }
198    
199        /**
200         * Returns the variance of the available values.
201         * @return The variance, Double.NaN if no values have been added
202         * or 0.0 for a single value set.
203         */
204        public double getVariance() {
205            return apply(varianceImpl);
206        }
207    
208        /**
209         * Returns the standard deviation of the available values.
210         * @return The standard deviation, Double.NaN if no values have been added
211         * or 0.0 for a single value set.
212         */
213        public double getStandardDeviation() {
214            double stdDev = Double.NaN;
215            if (getN() > 0) {
216                if (getN() > 1) {
217                    stdDev = Math.sqrt(getVariance());
218                } else {
219                    stdDev = 0.0;
220                }
221            }
222            return stdDev;
223        }
224    
225        /**
226         * Returns the skewness of the available values. Skewness is a
227         * measure of the asymmetry of a given distribution.
228         * @return The skewness, Double.NaN if no values have been added
229         * or 0.0 for a value set &lt;=2.
230         */
231        public double getSkewness() {
232            return apply(skewnessImpl);
233        }
234    
235        /**
236         * Returns the Kurtosis of the available values. Kurtosis is a
237         * measure of the "peakedness" of a distribution
238         * @return The kurtosis, Double.NaN if no values have been added, or 0.0
239         * for a value set &lt;=3.
240         */
241        public double getKurtosis() {
242            return apply(kurtosisImpl);
243        }
244    
245        /**
246         * Returns the maximum of the available values
247         * @return The max or Double.NaN if no values have been added.
248         */
249        public double getMax() {
250            return apply(maxImpl);
251        }
252    
253        /**
254        * Returns the minimum of the available values
255        * @return The min or Double.NaN if no values have been added.
256        */
257        public double getMin() {
258            return apply(minImpl);
259        }
260    
261        /**
262         * Returns the number of available values
263         * @return The number of available values
264         */
265        public long getN() {
266            return eDA.getNumElements();
267        }
268    
269        /**
270         * Returns the sum of the values that have been added to Univariate.
271         * @return The sum or Double.NaN if no values have been added
272         */
273        public double getSum() {
274            return apply(sumImpl);
275        }
276    
277        /**
278         * Returns the sum of the squares of the available values.
279         * @return The sum of the squares or Double.NaN if no
280         * values have been added.
281         */
282        public double getSumsq() {
283            return apply(sumsqImpl);
284        }
285    
286        /**
287         * Resets all statistics and storage
288         */
289        public void clear() {
290            eDA.clear();
291        }
292    
293    
294        /**
295         * Returns the maximum number of values that can be stored in the
296         * dataset, or INFINITE_WINDOW (-1) if there is no limit.
297         *
298         * @return The current window size or -1 if its Infinite.
299         */
300        public int getWindowSize() {
301            return windowSize;
302        }
303    
304        /**
305         * WindowSize controls the number of values which contribute
306         * to the reported statistics.  For example, if
307         * windowSize is set to 3 and the values {1,2,3,4,5}
308         * have been added <strong> in that order</strong>
309         * then the <i>available values</i> are {3,4,5} and all
310         * reported statistics will be based on these values
311         * @param windowSize sets the size of the window.
312         */
313        public void setWindowSize(int windowSize) {
314            if (windowSize < 1) {
315                if (windowSize != INFINITE_WINDOW) {
316                    throw MathRuntimeException.createIllegalArgumentException(
317                          "window size must be positive ({0})", windowSize);
318                }
319            }
320    
321            this.windowSize = windowSize;
322    
323            // We need to check to see if we need to discard elements
324            // from the front of the array.  If the windowSize is less than
325            // the current number of elements.
326            if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
327                eDA.discardFrontElements(eDA.getNumElements() - windowSize);
328            }
329        }
330    
331        /**
332         * Returns the current set of values in an array of double primitives.
333         * The order of addition is preserved.  The returned array is a fresh
334         * copy of the underlying data -- i.e., it is not a reference to the
335         * stored data.
336         *
337         * @return returns the current set of numbers in the order in which they
338         *         were added to this set
339         */
340        public double[] getValues() {
341            return eDA.getElements();
342        }
343    
344        /**
345         * Returns the current set of values in an array of double primitives,
346         * sorted in ascending order.  The returned array is a fresh
347         * copy of the underlying data -- i.e., it is not a reference to the
348         * stored data.
349         * @return returns the current set of
350         * numbers sorted in ascending order
351         */
352        public double[] getSortedValues() {
353            double[] sort = getValues();
354            Arrays.sort(sort);
355            return sort;
356        }
357    
358        /**
359         * Returns the element at the specified index
360         * @param index The Index of the element
361         * @return return the element at the specified index
362         */
363        public double getElement(int index) {
364            return eDA.getElement(index);
365        }
366    
367        /**
368         * Returns an estimate for the pth percentile of the stored values.
369         * <p>
370         * The implementation provided here follows the first estimation procedure presented
371         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
372         * </p><p>
373         * <strong>Preconditions</strong>:<ul>
374         * <li><code>0 &lt; p &le; 100</code> (otherwise an
375         * <code>IllegalArgumentException</code> is thrown)</li>
376         * <li>at least one value must be stored (returns <code>Double.NaN
377         *     </code> otherwise)</li>
378         * </ul></p>
379         *
380         * @param p the requested percentile (scaled from 0 - 100)
381         * @return An estimate for the pth percentile of the stored data
382         * @throws IllegalStateException if percentile implementation has been
383         *  overridden and the supplied implementation does not support setQuantile
384         * values
385         */
386        public double getPercentile(double p) {
387            if (percentileImpl instanceof Percentile) {
388                ((Percentile) percentileImpl).setQuantile(p);
389            } else {
390                try {
391                    percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
392                            new Class[] {Double.TYPE}).invoke(percentileImpl,
393                                    new Object[] {Double.valueOf(p)});
394                } catch (NoSuchMethodException e1) { // Setter guard should prevent
395                    throw MathRuntimeException.createIllegalArgumentException(
396                          UNSUPPORTED_METHOD_MESSAGE,
397                          percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
398                } catch (IllegalAccessException e2) {
399                    throw MathRuntimeException.createIllegalArgumentException(
400                          ILLEGAL_ACCESS_MESSAGE,
401                          SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
402                } catch (InvocationTargetException e3) {
403                    throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
404                }
405            }
406            return apply(percentileImpl);
407        }
408    
409        /**
410         * Generates a text report displaying univariate statistics from values
411         * that have been added.  Each statistic is displayed on a separate
412         * line.
413         *
414         * @return String with line feeds displaying statistics
415         */
416        @Override
417        public String toString() {
418            StringBuffer outBuffer = new StringBuffer();
419            String endl = "\n";
420            outBuffer.append("DescriptiveStatistics:").append(endl);
421            outBuffer.append("n: ").append(getN()).append(endl);
422            outBuffer.append("min: ").append(getMin()).append(endl);
423            outBuffer.append("max: ").append(getMax()).append(endl);
424            outBuffer.append("mean: ").append(getMean()).append(endl);
425            outBuffer.append("std dev: ").append(getStandardDeviation())
426                .append(endl);
427            outBuffer.append("median: ").append(getPercentile(50)).append(endl);
428            outBuffer.append("skewness: ").append(getSkewness()).append(endl);
429            outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl);
430            return outBuffer.toString();
431        }
432    
433        /**
434         * Apply the given statistic to the data associated with this set of statistics.
435         * @param stat the statistic to apply
436         * @return the computed value of the statistic.
437         */
438        public double apply(UnivariateStatistic stat) {
439            return stat.evaluate(eDA.getInternalValues(), eDA.start(), eDA.getNumElements());
440        }
441    
442        // Implementation getters and setter
443    
444        /**
445         * Returns the currently configured mean implementation.
446         *
447         * @return the UnivariateStatistic implementing the mean
448         * @since 1.2
449         */
450        public synchronized UnivariateStatistic getMeanImpl() {
451            return meanImpl;
452        }
453    
454        /**
455         * <p>Sets the implementation for the mean.</p>
456         *
457         * @param meanImpl the UnivariateStatistic instance to use
458         * for computing the mean
459         * @since 1.2
460         */
461        public synchronized void setMeanImpl(UnivariateStatistic meanImpl) {
462            this.meanImpl = meanImpl;
463        }
464    
465        /**
466         * Returns the currently configured geometric mean implementation.
467         *
468         * @return the UnivariateStatistic implementing the geometric mean
469         * @since 1.2
470         */
471        public synchronized UnivariateStatistic getGeometricMeanImpl() {
472            return geometricMeanImpl;
473        }
474    
475        /**
476         * <p>Sets the implementation for the gemoetric mean.</p>
477         *
478         * @param geometricMeanImpl the UnivariateStatistic instance to use
479         * for computing the geometric mean
480         * @since 1.2
481         */
482        public synchronized void setGeometricMeanImpl(
483                UnivariateStatistic geometricMeanImpl) {
484            this.geometricMeanImpl = geometricMeanImpl;
485        }
486    
487        /**
488         * Returns the currently configured kurtosis implementation.
489         *
490         * @return the UnivariateStatistic implementing the kurtosis
491         * @since 1.2
492         */
493        public synchronized UnivariateStatistic getKurtosisImpl() {
494            return kurtosisImpl;
495        }
496    
497        /**
498         * <p>Sets the implementation for the kurtosis.</p>
499         *
500         * @param kurtosisImpl the UnivariateStatistic instance to use
501         * for computing the kurtosis
502         * @since 1.2
503         */
504        public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) {
505            this.kurtosisImpl = kurtosisImpl;
506        }
507    
508        /**
509         * Returns the currently configured maximum implementation.
510         *
511         * @return the UnivariateStatistic implementing the maximum
512         * @since 1.2
513         */
514        public synchronized UnivariateStatistic getMaxImpl() {
515            return maxImpl;
516        }
517    
518        /**
519         * <p>Sets the implementation for the maximum.</p>
520         *
521         * @param maxImpl the UnivariateStatistic instance to use
522         * for computing the maximum
523         * @since 1.2
524         */
525        public synchronized void setMaxImpl(UnivariateStatistic maxImpl) {
526            this.maxImpl = maxImpl;
527        }
528    
529        /**
530         * Returns the currently configured minimum implementation.
531         *
532         * @return the UnivariateStatistic implementing the minimum
533         * @since 1.2
534         */
535        public synchronized UnivariateStatistic getMinImpl() {
536            return minImpl;
537        }
538    
539        /**
540         * <p>Sets the implementation for the minimum.</p>
541         *
542         * @param minImpl the UnivariateStatistic instance to use
543         * for computing the minimum
544         * @since 1.2
545         */
546        public synchronized void setMinImpl(UnivariateStatistic minImpl) {
547            this.minImpl = minImpl;
548        }
549    
550        /**
551         * Returns the currently configured percentile implementation.
552         *
553         * @return the UnivariateStatistic implementing the percentile
554         * @since 1.2
555         */
556        public synchronized UnivariateStatistic getPercentileImpl() {
557            return percentileImpl;
558        }
559    
560        /**
561         * Sets the implementation to be used by {@link #getPercentile(double)}.
562         * The supplied <code>UnivariateStatistic</code> must provide a
563         * <code>setQuantile(double)</code> method; otherwise
564         * <code>IllegalArgumentException</code> is thrown.
565         *
566         * @param percentileImpl the percentileImpl to set
567         * @throws IllegalArgumentException if the supplied implementation does not
568         *  provide a <code>setQuantile</code> method
569         * @since 1.2
570         */
571        public synchronized void setPercentileImpl(
572                UnivariateStatistic percentileImpl) {
573            try {
574                percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
575                        new Class[] {Double.TYPE}).invoke(percentileImpl,
576                                new Object[] {Double.valueOf(50.0d)});
577            } catch (NoSuchMethodException e1) {
578                throw MathRuntimeException.createIllegalArgumentException(
579                      "percentile implementation {0} does not support setQuantile",
580                      percentileImpl.getClass().getName());
581            } catch (IllegalAccessException e2) {
582                throw MathRuntimeException.createIllegalArgumentException(
583                      ILLEGAL_ACCESS_MESSAGE,
584                      SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
585            } catch (InvocationTargetException e3) {
586                throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
587            }
588            this.percentileImpl = percentileImpl;
589        }
590    
591        /**
592         * Returns the currently configured skewness implementation.
593         *
594         * @return the UnivariateStatistic implementing the skewness
595         * @since 1.2
596         */
597        public synchronized UnivariateStatistic getSkewnessImpl() {
598            return skewnessImpl;
599        }
600    
601        /**
602         * <p>Sets the implementation for the skewness.</p>
603         *
604         * @param skewnessImpl the UnivariateStatistic instance to use
605         * for computing the skewness
606         * @since 1.2
607         */
608        public synchronized void setSkewnessImpl(
609                UnivariateStatistic skewnessImpl) {
610            this.skewnessImpl = skewnessImpl;
611        }
612    
613        /**
614         * Returns the currently configured variance implementation.
615         *
616         * @return the UnivariateStatistic implementing the variance
617         * @since 1.2
618         */
619        public synchronized UnivariateStatistic getVarianceImpl() {
620            return varianceImpl;
621        }
622    
623        /**
624         * <p>Sets the implementation for the variance.</p>
625         *
626         * @param varianceImpl the UnivariateStatistic instance to use
627         * for computing the variance
628         * @since 1.2
629         */
630        public synchronized void setVarianceImpl(
631                UnivariateStatistic varianceImpl) {
632            this.varianceImpl = varianceImpl;
633        }
634    
635        /**
636         * Returns the currently configured sum of squares implementation.
637         *
638         * @return the UnivariateStatistic implementing the sum of squares
639         * @since 1.2
640         */
641        public synchronized UnivariateStatistic getSumsqImpl() {
642            return sumsqImpl;
643        }
644    
645        /**
646         * <p>Sets the implementation for the sum of squares.</p>
647         *
648         * @param sumsqImpl the UnivariateStatistic instance to use
649         * for computing the sum of squares
650         * @since 1.2
651         */
652        public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) {
653            this.sumsqImpl = sumsqImpl;
654        }
655    
656        /**
657         * Returns the currently configured sum implementation.
658         *
659         * @return the UnivariateStatistic implementing the sum
660         * @since 1.2
661         */
662        public synchronized UnivariateStatistic getSumImpl() {
663            return sumImpl;
664        }
665    
666        /**
667         * <p>Sets the implementation for the sum.</p>
668         *
669         * @param sumImpl the UnivariateStatistic instance to use
670         * for computing the sum
671         * @since 1.2
672         */
673        public synchronized void setSumImpl(UnivariateStatistic sumImpl) {
674            this.sumImpl = sumImpl;
675        }
676    
677        /**
678         * Returns a copy of this DescriptiveStatistics instance with the same internal state.
679         *
680         * @return a copy of this
681         */
682        public DescriptiveStatistics copy() {
683            DescriptiveStatistics result = new DescriptiveStatistics();
684            copy(this, result);
685            return result;
686        }
687    
688        /**
689         * Copies source to dest.
690         * <p>Neither source nor dest can be null.</p>
691         *
692         * @param source DescriptiveStatistics to copy
693         * @param dest DescriptiveStatistics to copy to
694         * @throws NullPointerException if either source or dest is null
695         */
696        public static void copy(DescriptiveStatistics source, DescriptiveStatistics dest) {
697            // Copy data and window size
698            dest.eDA = source.eDA.copy();
699            dest.windowSize = source.windowSize;
700    
701            // Copy implementations
702            dest.maxImpl = source.maxImpl.copy();
703            dest.meanImpl = source.meanImpl.copy();
704            dest.minImpl = source.minImpl.copy();
705            dest.sumImpl = source.sumImpl.copy();
706            dest.varianceImpl = source.varianceImpl.copy();
707            dest.sumsqImpl = source.sumsqImpl.copy();
708            dest.geometricMeanImpl = source.geometricMeanImpl.copy();
709            dest.kurtosisImpl = source.kurtosisImpl;
710            dest.skewnessImpl = source.skewnessImpl;
711            dest.percentileImpl = source.percentileImpl;
712        }
713    }