001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.descriptive;
018
019 import java.io.Serializable;
020
021 import org.apache.commons.math.MathRuntimeException;
022 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
023 import org.apache.commons.math.stat.descriptive.moment.Mean;
024 import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
025 import org.apache.commons.math.stat.descriptive.moment.Variance;
026 import org.apache.commons.math.stat.descriptive.rank.Max;
027 import org.apache.commons.math.stat.descriptive.rank.Min;
028 import org.apache.commons.math.stat.descriptive.summary.Sum;
029 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
030 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
031 import org.apache.commons.math.util.MathUtils;
032
033 /**
034 * <p>
035 * Computes summary statistics for a stream of data values added using the
036 * {@link #addValue(double) addValue} method. The data values are not stored in
037 * memory, so this class can be used to compute statistics for very large data
038 * streams.
039 * </p>
040 * <p>
041 * The {@link StorelessUnivariateStatistic} instances used to maintain summary
042 * state and compute statistics are configurable via setters. For example, the
043 * default implementation for the variance can be overridden by calling
044 * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
045 * these methods must implement the {@link StorelessUnivariateStatistic}
046 * interface and configuration must be completed before <code>addValue</code>
047 * is called. No configuration is necessary to use the default, commons-math
048 * provided implementations.
049 * </p>
050 * <p>
051 * Note: This class is not thread-safe. Use
052 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
053 * threads is required.
054 * </p>
055 * @version $Revision: 791728 $ $Date: 2009-07-07 03:17:50 -0400 (Tue, 07 Jul 2009) $
056 */
057 public class SummaryStatistics implements StatisticalSummary, Serializable {
058
059 /** Serialization UID */
060 private static final long serialVersionUID = -2021321786743555871L;
061
062 /**
063 * Construct a SummaryStatistics instance
064 */
065 public SummaryStatistics() {
066 }
067
068 /**
069 * A copy constructor. Creates a deep-copy of the {@code original}.
070 *
071 * @param original the {@code SummaryStatistics} instance to copy
072 */
073 public SummaryStatistics(SummaryStatistics original) {
074 copy(original, this);
075 }
076
077 /** count of values that have been added */
078 protected long n = 0;
079
080 /** SecondMoment is used to compute the mean and variance */
081 protected SecondMoment secondMoment = new SecondMoment();
082
083 /** sum of values that have been added */
084 protected Sum sum = new Sum();
085
086 /** sum of the square of each value that has been added */
087 protected SumOfSquares sumsq = new SumOfSquares();
088
089 /** min of values that have been added */
090 protected Min min = new Min();
091
092 /** max of values that have been added */
093 protected Max max = new Max();
094
095 /** sumLog of values that have been added */
096 protected SumOfLogs sumLog = new SumOfLogs();
097
098 /** geoMean of values that have been added */
099 protected GeometricMean geoMean = new GeometricMean(sumLog);
100
101 /** mean of values that have been added */
102 protected Mean mean = new Mean();
103
104 /** variance of values that have been added */
105 protected Variance variance = new Variance();
106
107 /** Sum statistic implementation - can be reset by setter. */
108 private StorelessUnivariateStatistic sumImpl = sum;
109
110 /** Sum of squares statistic implementation - can be reset by setter. */
111 private StorelessUnivariateStatistic sumsqImpl = sumsq;
112
113 /** Minimum statistic implementation - can be reset by setter. */
114 private StorelessUnivariateStatistic minImpl = min;
115
116 /** Maximum statistic implementation - can be reset by setter. */
117 private StorelessUnivariateStatistic maxImpl = max;
118
119 /** Sum of log statistic implementation - can be reset by setter. */
120 private StorelessUnivariateStatistic sumLogImpl = sumLog;
121
122 /** Geometric mean statistic implementation - can be reset by setter. */
123 private StorelessUnivariateStatistic geoMeanImpl = geoMean;
124
125 /** Mean statistic implementation - can be reset by setter. */
126 private StorelessUnivariateStatistic meanImpl = mean;
127
128 /** Variance statistic implementation - can be reset by setter. */
129 private StorelessUnivariateStatistic varianceImpl = variance;
130
131 /**
132 * Return a {@link StatisticalSummaryValues} instance reporting current
133 * statistics.
134 * @return Current values of statistics
135 */
136 public StatisticalSummary getSummary() {
137 return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
138 getMax(), getMin(), getSum());
139 }
140
141 /**
142 * Add a value to the data
143 * @param value the value to add
144 */
145 public void addValue(double value) {
146 sumImpl.increment(value);
147 sumsqImpl.increment(value);
148 minImpl.increment(value);
149 maxImpl.increment(value);
150 sumLogImpl.increment(value);
151 secondMoment.increment(value);
152 // If mean, variance or geomean have been overridden,
153 // need to increment these
154 if (!(meanImpl instanceof Mean)) {
155 meanImpl.increment(value);
156 }
157 if (!(varianceImpl instanceof Variance)) {
158 varianceImpl.increment(value);
159 }
160 if (!(geoMeanImpl instanceof GeometricMean)) {
161 geoMeanImpl.increment(value);
162 }
163 n++;
164 }
165
166 /**
167 * Returns the number of available values
168 * @return The number of available values
169 */
170 public long getN() {
171 return n;
172 }
173
174 /**
175 * Returns the sum of the values that have been added
176 * @return The sum or <code>Double.NaN</code> if no values have been added
177 */
178 public double getSum() {
179 return sumImpl.getResult();
180 }
181
182 /**
183 * Returns the sum of the squares of the values that have been added.
184 * <p>
185 * Double.NaN is returned if no values have been added.
186 * </p>
187 * @return The sum of squares
188 */
189 public double getSumsq() {
190 return sumsqImpl.getResult();
191 }
192
193 /**
194 * Returns the mean of the values that have been added.
195 * <p>
196 * Double.NaN is returned if no values have been added.
197 * </p>
198 * @return the mean
199 */
200 public double getMean() {
201 if (mean == meanImpl) {
202 return new Mean(secondMoment).getResult();
203 } else {
204 return meanImpl.getResult();
205 }
206 }
207
208 /**
209 * Returns the standard deviation of the values that have been added.
210 * <p>
211 * Double.NaN is returned if no values have been added.
212 * </p>
213 * @return the standard deviation
214 */
215 public double getStandardDeviation() {
216 double stdDev = Double.NaN;
217 if (getN() > 0) {
218 if (getN() > 1) {
219 stdDev = Math.sqrt(getVariance());
220 } else {
221 stdDev = 0.0;
222 }
223 }
224 return (stdDev);
225 }
226
227 /**
228 * Returns the variance of the values that have been added.
229 * <p>
230 * Double.NaN is returned if no values have been added.
231 * </p>
232 * @return the variance
233 */
234 public double getVariance() {
235 if (varianceImpl == variance) {
236 return new Variance(secondMoment).getResult();
237 } else {
238 return varianceImpl.getResult();
239 }
240 }
241
242 /**
243 * Returns the maximum of the values that have been added.
244 * <p>
245 * Double.NaN is returned if no values have been added.
246 * </p>
247 * @return the maximum
248 */
249 public double getMax() {
250 return maxImpl.getResult();
251 }
252
253 /**
254 * Returns the minimum of the values that have been added.
255 * <p>
256 * Double.NaN is returned if no values have been added.
257 * </p>
258 * @return the minimum
259 */
260 public double getMin() {
261 return minImpl.getResult();
262 }
263
264 /**
265 * Returns the geometric mean of the values that have been added.
266 * <p>
267 * Double.NaN is returned if no values have been added.
268 * </p>
269 * @return the geometric mean
270 */
271 public double getGeometricMean() {
272 return geoMeanImpl.getResult();
273 }
274
275 /**
276 * Returns the sum of the logs of the values that have been added.
277 * <p>
278 * Double.NaN is returned if no values have been added.
279 * </p>
280 * @return the sum of logs
281 * @since 1.2
282 */
283 public double getSumOfLogs() {
284 return sumLogImpl.getResult();
285 }
286
287 /**
288 * Returns a statistic related to the Second Central Moment. Specifically,
289 * what is returned is the sum of squared deviations from the sample mean
290 * among the values that have been added.
291 * <p>
292 * Returns <code>Double.NaN</code> if no data values have been added and
293 * returns <code>0</code> if there is just one value in the data set.</p>
294 * <p>
295 * @return second central moment statistic
296 * @since 2.0
297 */
298 public double getSecondMoment() {
299 return secondMoment.getResult();
300 }
301
302 /**
303 * Generates a text report displaying summary statistics from values that
304 * have been added.
305 * @return String with line feeds displaying statistics
306 * @since 1.2
307 */
308 @Override
309 public String toString() {
310 StringBuffer outBuffer = new StringBuffer();
311 String endl = "\n";
312 outBuffer.append("SummaryStatistics:").append(endl);
313 outBuffer.append("n: ").append(getN()).append(endl);
314 outBuffer.append("min: ").append(getMin()).append(endl);
315 outBuffer.append("max: ").append(getMax()).append(endl);
316 outBuffer.append("mean: ").append(getMean()).append(endl);
317 outBuffer.append("geometric mean: ").append(getGeometricMean())
318 .append(endl);
319 outBuffer.append("variance: ").append(getVariance()).append(endl);
320 outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
321 outBuffer.append("standard deviation: ").append(getStandardDeviation())
322 .append(endl);
323 outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
324 return outBuffer.toString();
325 }
326
327 /**
328 * Resets all statistics and storage
329 */
330 public void clear() {
331 this.n = 0;
332 minImpl.clear();
333 maxImpl.clear();
334 sumImpl.clear();
335 sumLogImpl.clear();
336 sumsqImpl.clear();
337 geoMeanImpl.clear();
338 secondMoment.clear();
339 if (meanImpl != mean) {
340 meanImpl.clear();
341 }
342 if (varianceImpl != variance) {
343 varianceImpl.clear();
344 }
345 }
346
347 /**
348 * Returns true iff <code>object</code> is a
349 * <code>SummaryStatistics</code> instance and all statistics have the
350 * same values as this.
351 * @param object the object to test equality against.
352 * @return true if object equals this
353 */
354 @Override
355 public boolean equals(Object object) {
356 if (object == this) {
357 return true;
358 }
359 if (object instanceof SummaryStatistics == false) {
360 return false;
361 }
362 SummaryStatistics stat = (SummaryStatistics)object;
363 return (MathUtils.equals(stat.getGeometricMean(), this.getGeometricMean()) &&
364 MathUtils.equals(stat.getMax(), this.getMax()) &&
365 MathUtils.equals(stat.getMean(), this.getMean()) &&
366 MathUtils.equals(stat.getMin(), this.getMin()) &&
367 MathUtils.equals(stat.getN(), this.getN()) &&
368 MathUtils.equals(stat.getSum(), this.getSum()) &&
369 MathUtils.equals(stat.getSumsq(), this.getSumsq()) &&
370 MathUtils.equals(stat.getVariance(),
371 this.getVariance()));
372 }
373
374 /**
375 * Returns hash code based on values of statistics
376 * @return hash code
377 */
378 @Override
379 public int hashCode() {
380 int result = 31 + MathUtils.hash(getGeometricMean());
381 result = result * 31 + MathUtils.hash(getGeometricMean());
382 result = result * 31 + MathUtils.hash(getMax());
383 result = result * 31 + MathUtils.hash(getMean());
384 result = result * 31 + MathUtils.hash(getMin());
385 result = result * 31 + MathUtils.hash(getN());
386 result = result * 31 + MathUtils.hash(getSum());
387 result = result * 31 + MathUtils.hash(getSumsq());
388 result = result * 31 + MathUtils.hash(getVariance());
389 return result;
390 }
391
392 // Getters and setters for statistics implementations
393 /**
394 * Returns the currently configured Sum implementation
395 * @return the StorelessUnivariateStatistic implementing the sum
396 * @since 1.2
397 */
398 public StorelessUnivariateStatistic getSumImpl() {
399 return sumImpl;
400 }
401
402 /**
403 * <p>
404 * Sets the implementation for the Sum.
405 * </p>
406 * <p>
407 * This method must be activated before any data has been added - i.e.,
408 * before {@link #addValue(double) addValue} has been used to add data;
409 * otherwise an IllegalStateException will be thrown.
410 * </p>
411 * @param sumImpl the StorelessUnivariateStatistic instance to use for
412 * computing the Sum
413 * @throws IllegalStateException if data has already been added (i.e if n >
414 * 0)
415 * @since 1.2
416 */
417 public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
418 checkEmpty();
419 this.sumImpl = sumImpl;
420 }
421
422 /**
423 * Returns the currently configured sum of squares implementation
424 * @return the StorelessUnivariateStatistic implementing the sum of squares
425 * @since 1.2
426 */
427 public StorelessUnivariateStatistic getSumsqImpl() {
428 return sumsqImpl;
429 }
430
431 /**
432 * <p>
433 * Sets the implementation for the sum of squares.
434 * </p>
435 * <p>
436 * This method must be activated before any data has been added - i.e.,
437 * before {@link #addValue(double) addValue} has been used to add data;
438 * otherwise an IllegalStateException will be thrown.
439 * </p>
440 * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
441 * computing the sum of squares
442 * @throws IllegalStateException if data has already been added (i.e if n >
443 * 0)
444 * @since 1.2
445 */
446 public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
447 checkEmpty();
448 this.sumsqImpl = sumsqImpl;
449 }
450
451 /**
452 * Returns the currently configured minimum implementation
453 * @return the StorelessUnivariateStatistic implementing the minimum
454 * @since 1.2
455 */
456 public StorelessUnivariateStatistic getMinImpl() {
457 return minImpl;
458 }
459
460 /**
461 * <p>
462 * Sets the implementation for the minimum.
463 * </p>
464 * <p>
465 * This method must be activated before any data has been added - i.e.,
466 * before {@link #addValue(double) addValue} has been used to add data;
467 * otherwise an IllegalStateException will be thrown.
468 * </p>
469 * @param minImpl the StorelessUnivariateStatistic instance to use for
470 * computing the minimum
471 * @throws IllegalStateException if data has already been added (i.e if n >
472 * 0)
473 * @since 1.2
474 */
475 public void setMinImpl(StorelessUnivariateStatistic minImpl) {
476 checkEmpty();
477 this.minImpl = minImpl;
478 }
479
480 /**
481 * Returns the currently configured maximum implementation
482 * @return the StorelessUnivariateStatistic implementing the maximum
483 * @since 1.2
484 */
485 public StorelessUnivariateStatistic getMaxImpl() {
486 return maxImpl;
487 }
488
489 /**
490 * <p>
491 * Sets the implementation for the maximum.
492 * </p>
493 * <p>
494 * This method must be activated before any data has been added - i.e.,
495 * before {@link #addValue(double) addValue} has been used to add data;
496 * otherwise an IllegalStateException will be thrown.
497 * </p>
498 * @param maxImpl the StorelessUnivariateStatistic instance to use for
499 * computing the maximum
500 * @throws IllegalStateException if data has already been added (i.e if n >
501 * 0)
502 * @since 1.2
503 */
504 public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
505 checkEmpty();
506 this.maxImpl = maxImpl;
507 }
508
509 /**
510 * Returns the currently configured sum of logs implementation
511 * @return the StorelessUnivariateStatistic implementing the log sum
512 * @since 1.2
513 */
514 public StorelessUnivariateStatistic getSumLogImpl() {
515 return sumLogImpl;
516 }
517
518 /**
519 * <p>
520 * Sets the implementation for the sum of logs.
521 * </p>
522 * <p>
523 * This method must be activated before any data has been added - i.e.,
524 * before {@link #addValue(double) addValue} has been used to add data;
525 * otherwise an IllegalStateException will be thrown.
526 * </p>
527 * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
528 * computing the log sum
529 * @throws IllegalStateException if data has already been added (i.e if n >
530 * 0)
531 * @since 1.2
532 */
533 public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
534 checkEmpty();
535 this.sumLogImpl = sumLogImpl;
536 geoMean.setSumLogImpl(sumLogImpl);
537 }
538
539 /**
540 * Returns the currently configured geometric mean implementation
541 * @return the StorelessUnivariateStatistic implementing the geometric mean
542 * @since 1.2
543 */
544 public StorelessUnivariateStatistic getGeoMeanImpl() {
545 return geoMeanImpl;
546 }
547
548 /**
549 * <p>
550 * Sets the implementation for the geometric mean.
551 * </p>
552 * <p>
553 * This method must be activated before any data has been added - i.e.,
554 * before {@link #addValue(double) addValue} has been used to add data;
555 * otherwise an IllegalStateException will be thrown.
556 * </p>
557 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
558 * computing the geometric mean
559 * @throws IllegalStateException if data has already been added (i.e if n >
560 * 0)
561 * @since 1.2
562 */
563 public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
564 checkEmpty();
565 this.geoMeanImpl = geoMeanImpl;
566 }
567
568 /**
569 * Returns the currently configured mean implementation
570 * @return the StorelessUnivariateStatistic implementing the mean
571 * @since 1.2
572 */
573 public StorelessUnivariateStatistic getMeanImpl() {
574 return meanImpl;
575 }
576
577 /**
578 * <p>
579 * Sets the implementation for the mean.
580 * </p>
581 * <p>
582 * This method must be activated before any data has been added - i.e.,
583 * before {@link #addValue(double) addValue} has been used to add data;
584 * otherwise an IllegalStateException will be thrown.
585 * </p>
586 * @param meanImpl the StorelessUnivariateStatistic instance to use for
587 * computing the mean
588 * @throws IllegalStateException if data has already been added (i.e if n >
589 * 0)
590 * @since 1.2
591 */
592 public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
593 checkEmpty();
594 this.meanImpl = meanImpl;
595 }
596
597 /**
598 * Returns the currently configured variance implementation
599 * @return the StorelessUnivariateStatistic implementing the variance
600 * @since 1.2
601 */
602 public StorelessUnivariateStatistic getVarianceImpl() {
603 return varianceImpl;
604 }
605
606 /**
607 * <p>
608 * Sets the implementation for the variance.
609 * </p>
610 * <p>
611 * This method must be activated before any data has been added - i.e.,
612 * before {@link #addValue(double) addValue} has been used to add data;
613 * otherwise an IllegalStateException will be thrown.
614 * </p>
615 * @param varianceImpl the StorelessUnivariateStatistic instance to use for
616 * computing the variance
617 * @throws IllegalStateException if data has already been added (i.e if n >
618 * 0)
619 * @since 1.2
620 */
621 public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
622 checkEmpty();
623 this.varianceImpl = varianceImpl;
624 }
625
626 /**
627 * Throws IllegalStateException if n > 0.
628 */
629 private void checkEmpty() {
630 if (n > 0) {
631 throw MathRuntimeException.createIllegalStateException(
632 "{0} values have been added before statistic is configured",
633 n);
634 }
635 }
636
637 /**
638 * Returns a copy of this SummaryStatistics instance with the same internal state.
639 *
640 * @return a copy of this
641 */
642 public SummaryStatistics copy() {
643 SummaryStatistics result = new SummaryStatistics();
644 copy(this, result);
645 return result;
646 }
647
648 /**
649 * Copies source to dest.
650 * <p>Neither source nor dest can be null.</p>
651 *
652 * @param source SummaryStatistics to copy
653 * @param dest SummaryStatistics to copy to
654 * @throws NullPointerException if either source or dest is null
655 */
656 public static void copy(SummaryStatistics source, SummaryStatistics dest) {
657 dest.maxImpl = source.maxImpl.copy();
658 dest.meanImpl = source.meanImpl.copy();
659 dest.minImpl = source.minImpl.copy();
660 dest.sumImpl = source.sumImpl.copy();
661 dest.varianceImpl = source.varianceImpl.copy();
662 dest.sumLogImpl = source.sumLogImpl.copy();
663 dest.sumsqImpl = source.sumsqImpl.copy();
664 if (source.getGeoMeanImpl() instanceof GeometricMean) {
665 // Keep geoMeanImpl, sumLogImpl in synch
666 dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
667 } else {
668 dest.geoMeanImpl = source.geoMeanImpl.copy();
669 }
670 SecondMoment.copy(source.secondMoment, dest.secondMoment);
671 dest.n = source.n;
672
673 // Make sure that if stat == statImpl in source, same
674 // holds in dest; otherwise copy stat
675 if (source.geoMean == source.geoMeanImpl) {
676 dest.geoMean = (GeometricMean) dest.geoMeanImpl;
677 } else {
678 GeometricMean.copy(source.geoMean, dest.geoMean);
679 }
680 if (source.max == source.maxImpl) {
681 dest.max = (Max) dest.maxImpl;
682 } else {
683 Max.copy(source.max, dest.max);
684 }
685 if (source.mean == source.meanImpl) {
686 dest.mean = (Mean) dest.meanImpl;
687 } else {
688 Mean.copy(source.mean, dest.mean);
689 }
690 if (source.min == source.minImpl) {
691 dest.min = (Min) dest.minImpl;
692 } else {
693 Min.copy(source.min, dest.min);
694 }
695 if (source.sum == source.sumImpl) {
696 dest.sum = (Sum) dest.sumImpl;
697 } else {
698 Sum.copy(source.sum, dest.sum);
699 }
700 if (source.variance == source.varianceImpl) {
701 dest.variance = (Variance) dest.varianceImpl;
702 } else {
703 Variance.copy(source.variance, dest.variance);
704 }
705 if (source.sumLog == source.sumLogImpl) {
706 dest.sumLog = (SumOfLogs) dest.sumLogImpl;
707 } else {
708 SumOfLogs.copy(source.sumLog, dest.sumLog);
709 }
710 if (source.sumsq == source.sumsqImpl) {
711 dest.sumsq = (SumOfSquares) dest.sumsqImpl;
712 } else {
713 SumOfSquares.copy(source.sumsq, dest.sumsq);
714 }
715 }
716 }