001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.descriptive;
018
019 import java.io.Serializable;
020 import java.util.Arrays;
021
022 import org.apache.commons.math.DimensionMismatchException;
023 import org.apache.commons.math.MathRuntimeException;
024 import org.apache.commons.math.linear.RealMatrix;
025 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
026 import org.apache.commons.math.stat.descriptive.moment.Mean;
027 import org.apache.commons.math.stat.descriptive.moment.VectorialCovariance;
028 import org.apache.commons.math.stat.descriptive.rank.Max;
029 import org.apache.commons.math.stat.descriptive.rank.Min;
030 import org.apache.commons.math.stat.descriptive.summary.Sum;
031 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
032 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
033 import org.apache.commons.math.util.MathUtils;
034
035 /**
036 * <p>Computes summary statistics for a stream of n-tuples added using the
037 * {@link #addValue(double[]) addValue} method. The data values are not stored
038 * in memory, so this class can be used to compute statistics for very large
039 * n-tuple streams.</p>
040 *
041 * <p>The {@link StorelessUnivariateStatistic} instances used to maintain
042 * summary state and compute statistics are configurable via setters.
043 * For example, the default implementation for the mean can be overridden by
044 * calling {@link #setMeanImpl(StorelessUnivariateStatistic[])}. Actual
045 * parameters to these methods must implement the
046 * {@link StorelessUnivariateStatistic} interface and configuration must be
047 * completed before <code>addValue</code> is called. No configuration is
048 * necessary to use the default, commons-math provided implementations.</p>
049 *
050 * <p>To compute statistics for a stream of n-tuples, construct a
051 * MultivariateStatistics instance with dimension n and then use
052 * {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code>
053 * methods where Xxx is a statistic return an array of <code>double</code>
054 * values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element is the
055 * value of the given statistic for data range consisting of the i<sup>th</sup> element of
056 * each of the input n-tuples. For example, if <code>addValue</code> is called
057 * with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8},
058 * <code>getSum</code> will return a three-element array with values
059 * {0+3+6, 1+4+7, 2+5+8}</p>
060 *
061 * <p>Note: This class is not thread-safe. Use
062 * {@link SynchronizedMultivariateSummaryStatistics} if concurrent access from multiple
063 * threads is required.</p>
064 *
065 * @since 1.2
066 * @version $Revision: 762116 $ $Date: 2009-04-05 12:48:53 -0400 (Sun, 05 Apr 2009) $
067 */
068 public class MultivariateSummaryStatistics
069 implements StatisticalMultivariateSummary, Serializable {
070
071 /** Serialization UID */
072 private static final long serialVersionUID = 2271900808994826718L;
073
074 /**
075 * Construct a MultivariateSummaryStatistics instance
076 * @param k dimension of the data
077 * @param isCovarianceBiasCorrected if true, the unbiased sample
078 * covariance is computed, otherwise the biased population covariance
079 * is computed
080 */
081 public MultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) {
082 this.k = k;
083
084 sumImpl = new StorelessUnivariateStatistic[k];
085 sumSqImpl = new StorelessUnivariateStatistic[k];
086 minImpl = new StorelessUnivariateStatistic[k];
087 maxImpl = new StorelessUnivariateStatistic[k];
088 sumLogImpl = new StorelessUnivariateStatistic[k];
089 geoMeanImpl = new StorelessUnivariateStatistic[k];
090 meanImpl = new StorelessUnivariateStatistic[k];
091
092 for (int i = 0; i < k; ++i) {
093 sumImpl[i] = new Sum();
094 sumSqImpl[i] = new SumOfSquares();
095 minImpl[i] = new Min();
096 maxImpl[i] = new Max();
097 sumLogImpl[i] = new SumOfLogs();
098 geoMeanImpl[i] = new GeometricMean();
099 meanImpl[i] = new Mean();
100 }
101
102 covarianceImpl =
103 new VectorialCovariance(k, isCovarianceBiasCorrected);
104
105 }
106
107 /** Dimension of the data. */
108 private int k;
109
110 /** Count of values that have been added */
111 private long n = 0;
112
113 /** Sum statistic implementation - can be reset by setter. */
114 private StorelessUnivariateStatistic[] sumImpl;
115
116 /** Sum of squares statistic implementation - can be reset by setter. */
117 private StorelessUnivariateStatistic[] sumSqImpl;
118
119 /** Minimum statistic implementation - can be reset by setter. */
120 private StorelessUnivariateStatistic[] minImpl;
121
122 /** Maximum statistic implementation - can be reset by setter. */
123 private StorelessUnivariateStatistic[] maxImpl;
124
125 /** Sum of log statistic implementation - can be reset by setter. */
126 private StorelessUnivariateStatistic[] sumLogImpl;
127
128 /** Geometric mean statistic implementation - can be reset by setter. */
129 private StorelessUnivariateStatistic[] geoMeanImpl;
130
131 /** Mean statistic implementation - can be reset by setter. */
132 private StorelessUnivariateStatistic[] meanImpl;
133
134 /** Covariance statistic implementation - cannot be reset. */
135 private VectorialCovariance covarianceImpl;
136
137 /**
138 * Add an n-tuple to the data
139 *
140 * @param value the n-tuple to add
141 * @throws DimensionMismatchException if the length of the array
142 * does not match the one used at construction
143 */
144 public void addValue(double[] value)
145 throws DimensionMismatchException {
146 checkDimension(value.length);
147 for (int i = 0; i < k; ++i) {
148 double v = value[i];
149 sumImpl[i].increment(v);
150 sumSqImpl[i].increment(v);
151 minImpl[i].increment(v);
152 maxImpl[i].increment(v);
153 sumLogImpl[i].increment(v);
154 geoMeanImpl[i].increment(v);
155 meanImpl[i].increment(v);
156 }
157 covarianceImpl.increment(value);
158 n++;
159 }
160
161 /**
162 * Returns the dimension of the data
163 * @return The dimension of the data
164 */
165 public int getDimension() {
166 return k;
167 }
168
169 /**
170 * Returns the number of available values
171 * @return The number of available values
172 */
173 public long getN() {
174 return n;
175 }
176
177 /**
178 * Returns an array of the results of a statistic.
179 * @param stats univariate statistic array
180 * @return results array
181 */
182 private double[] getResults(StorelessUnivariateStatistic[] stats) {
183 double[] results = new double[stats.length];
184 for (int i = 0; i < results.length; ++i) {
185 results[i] = stats[i].getResult();
186 }
187 return results;
188 }
189
190 /**
191 * Returns an array whose i<sup>th</sup> entry is the sum of the
192 * i<sup>th</sup> entries of the arrays that have been added using
193 * {@link #addValue(double[])}
194 *
195 * @return the array of component sums
196 */
197 public double[] getSum() {
198 return getResults(sumImpl);
199 }
200
201 /**
202 * Returns an array whose i<sup>th</sup> entry is the sum of squares of the
203 * i<sup>th</sup> entries of the arrays that have been added using
204 * {@link #addValue(double[])}
205 *
206 * @return the array of component sums of squares
207 */
208 public double[] getSumSq() {
209 return getResults(sumSqImpl);
210 }
211
212 /**
213 * Returns an array whose i<sup>th</sup> entry is the sum of logs of the
214 * i<sup>th</sup> entries of the arrays that have been added using
215 * {@link #addValue(double[])}
216 *
217 * @return the array of component log sums
218 */
219 public double[] getSumLog() {
220 return getResults(sumLogImpl);
221 }
222
223 /**
224 * Returns an array whose i<sup>th</sup> entry is the mean of the
225 * i<sup>th</sup> entries of the arrays that have been added using
226 * {@link #addValue(double[])}
227 *
228 * @return the array of component means
229 */
230 public double[] getMean() {
231 return getResults(meanImpl);
232 }
233
234 /**
235 * Returns an array whose i<sup>th</sup> entry is the standard deviation of the
236 * i<sup>th</sup> entries of the arrays that have been added using
237 * {@link #addValue(double[])}
238 *
239 * @return the array of component standard deviations
240 */
241 public double[] getStandardDeviation() {
242 double[] stdDev = new double[k];
243 if (getN() < 1) {
244 Arrays.fill(stdDev, Double.NaN);
245 } else if (getN() < 2) {
246 Arrays.fill(stdDev, 0.0);
247 } else {
248 RealMatrix matrix = covarianceImpl.getResult();
249 for (int i = 0; i < k; ++i) {
250 stdDev[i] = Math.sqrt(matrix.getEntry(i, i));
251 }
252 }
253 return stdDev;
254 }
255
256 /**
257 * Returns the covariance matrix of the values that have been added.
258 *
259 * @return the covariance matrix
260 */
261 public RealMatrix getCovariance() {
262 return covarianceImpl.getResult();
263 }
264
265 /**
266 * Returns an array whose i<sup>th</sup> entry is the maximum of the
267 * i<sup>th</sup> entries of the arrays that have been added using
268 * {@link #addValue(double[])}
269 *
270 * @return the array of component maxima
271 */
272 public double[] getMax() {
273 return getResults(maxImpl);
274 }
275
276 /**
277 * Returns an array whose i<sup>th</sup> entry is the minimum of the
278 * i<sup>th</sup> entries of the arrays that have been added using
279 * {@link #addValue(double[])}
280 *
281 * @return the array of component minima
282 */
283 public double[] getMin() {
284 return getResults(minImpl);
285 }
286
287 /**
288 * Returns an array whose i<sup>th</sup> entry is the geometric mean of the
289 * i<sup>th</sup> entries of the arrays that have been added using
290 * {@link #addValue(double[])}
291 *
292 * @return the array of component geometric means
293 */
294 public double[] getGeometricMean() {
295 return getResults(geoMeanImpl);
296 }
297
298 /**
299 * Generates a text report displaying
300 * summary statistics from values that
301 * have been added.
302 * @return String with line feeds displaying statistics
303 */
304 @Override
305 public String toString() {
306 StringBuffer outBuffer = new StringBuffer();
307 outBuffer.append("MultivariateSummaryStatistics:\n");
308 outBuffer.append("n: " + getN() + "\n");
309 append(outBuffer, getMin(), "min: ", ", ", "\n");
310 append(outBuffer, getMax(), "max: ", ", ", "\n");
311 append(outBuffer, getMean(), "mean: ", ", ", "\n");
312 append(outBuffer, getGeometricMean(), "geometric mean: ", ", ", "\n");
313 append(outBuffer, getSumSq(), "sum of squares: ", ", ", "\n");
314 append(outBuffer, getSumLog(), "sum of logarithms: ", ", ", "\n");
315 append(outBuffer, getStandardDeviation(), "standard deviation: ", ", ", "\n");
316 outBuffer.append("covariance: " + getCovariance().toString() + "\n");
317 return outBuffer.toString();
318 }
319
320 /**
321 * Append a text representation of an array to a buffer.
322 * @param buffer buffer to fill
323 * @param data data array
324 * @param prefix text prefix
325 * @param separator elements separator
326 * @param suffix text suffix
327 */
328 private void append(StringBuffer buffer, double[] data,
329 String prefix, String separator, String suffix) {
330 buffer.append(prefix);
331 for (int i = 0; i < data.length; ++i) {
332 if (i > 0) {
333 buffer.append(separator);
334 }
335 buffer.append(data[i]);
336 }
337 buffer.append(suffix);
338 }
339
340 /**
341 * Resets all statistics and storage
342 */
343 public void clear() {
344 this.n = 0;
345 for (int i = 0; i < k; ++i) {
346 minImpl[i].clear();
347 maxImpl[i].clear();
348 sumImpl[i].clear();
349 sumLogImpl[i].clear();
350 sumSqImpl[i].clear();
351 geoMeanImpl[i].clear();
352 meanImpl[i].clear();
353 }
354 covarianceImpl.clear();
355 }
356
357 /**
358 * Returns true iff <code>object</code> is a <code>SummaryStatistics</code>
359 * instance and all statistics have the same values as this.
360 * @param object the object to test equality against.
361 * @return true if object equals this
362 */
363 @Override
364 public boolean equals(Object object) {
365 if (object == this ) {
366 return true;
367 }
368 if (object instanceof MultivariateSummaryStatistics == false) {
369 return false;
370 }
371 MultivariateSummaryStatistics stat = (MultivariateSummaryStatistics) object;
372 return (MathUtils.equals(stat.getGeometricMean(),
373 this.getGeometricMean()) &&
374 MathUtils.equals(stat.getMax(), this.getMax()) &&
375 MathUtils.equals(stat.getMean(),this.getMean()) &&
376 MathUtils.equals(stat.getMin(),this.getMin()) &&
377 MathUtils.equals(stat.getN(), this.getN()) &&
378 MathUtils.equals(stat.getSum(), this.getSum()) &&
379 MathUtils.equals(stat.getSumSq(),this.getSumSq()) &&
380 MathUtils.equals(stat.getSumLog(),this.getSumLog()) &&
381 stat.getCovariance().equals(this.getCovariance()));
382 }
383
384 /**
385 * Returns hash code based on values of statistics
386 *
387 * @return hash code
388 */
389 @Override
390 public int hashCode() {
391 int result = 31 + MathUtils.hash(getGeometricMean());
392 result = result * 31 + MathUtils.hash(getGeometricMean());
393 result = result * 31 + MathUtils.hash(getMax());
394 result = result * 31 + MathUtils.hash(getMean());
395 result = result * 31 + MathUtils.hash(getMin());
396 result = result * 31 + MathUtils.hash(getN());
397 result = result * 31 + MathUtils.hash(getSum());
398 result = result * 31 + MathUtils.hash(getSumSq());
399 result = result * 31 + MathUtils.hash(getSumLog());
400 result = result * 31 + getCovariance().hashCode();
401 return result;
402 }
403
404 // Getters and setters for statistics implementations
405 /**
406 * Sets statistics implementations.
407 * @param newImpl new implementations for statistics
408 * @param oldImpl old implementations for statistics
409 * @throws DimensionMismatchException if the array dimension
410 * does not match the one used at construction
411 * @throws IllegalStateException if data has already been added
412 * (i.e if n > 0)
413 */
414 private void setImpl(StorelessUnivariateStatistic[] newImpl,
415 StorelessUnivariateStatistic[] oldImpl)
416 throws DimensionMismatchException, IllegalStateException {
417 checkEmpty();
418 checkDimension(newImpl.length);
419 System.arraycopy(newImpl, 0, oldImpl, 0, newImpl.length);
420 }
421
422 /**
423 * Returns the currently configured Sum implementation
424 *
425 * @return the StorelessUnivariateStatistic implementing the sum
426 */
427 public StorelessUnivariateStatistic[] getSumImpl() {
428 return sumImpl.clone();
429 }
430
431 /**
432 * <p>Sets the implementation for the Sum.</p>
433 * <p>This method must be activated before any data has been added - i.e.,
434 * before {@link #addValue(double[]) addValue} has been used to add data;
435 * otherwise an IllegalStateException will be thrown.</p>
436 *
437 * @param sumImpl the StorelessUnivariateStatistic instance to use
438 * for computing the Sum
439 * @throws DimensionMismatchException if the array dimension
440 * does not match the one used at construction
441 * @throws IllegalStateException if data has already been added
442 * (i.e if n > 0)
443 */
444 public void setSumImpl(StorelessUnivariateStatistic[] sumImpl)
445 throws DimensionMismatchException {
446 setImpl(sumImpl, this.sumImpl);
447 }
448
449 /**
450 * Returns the currently configured sum of squares implementation
451 *
452 * @return the StorelessUnivariateStatistic implementing the sum of squares
453 */
454 public StorelessUnivariateStatistic[] getSumsqImpl() {
455 return sumSqImpl.clone();
456 }
457
458 /**
459 * <p>Sets the implementation for the sum of squares.</p>
460 * <p>This method must be activated before any data has been added - i.e.,
461 * before {@link #addValue(double[]) addValue} has been used to add data;
462 * otherwise an IllegalStateException will be thrown.</p>
463 *
464 * @param sumsqImpl the StorelessUnivariateStatistic instance to use
465 * for computing the sum of squares
466 * @throws DimensionMismatchException if the array dimension
467 * does not match the one used at construction
468 * @throws IllegalStateException if data has already been added
469 * (i.e if n > 0)
470 */
471 public void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl)
472 throws DimensionMismatchException {
473 setImpl(sumsqImpl, this.sumSqImpl);
474 }
475
476 /**
477 * Returns the currently configured minimum implementation
478 *
479 * @return the StorelessUnivariateStatistic implementing the minimum
480 */
481 public StorelessUnivariateStatistic[] getMinImpl() {
482 return minImpl.clone();
483 }
484
485 /**
486 * <p>Sets the implementation for the minimum.</p>
487 * <p>This method must be activated before any data has been added - i.e.,
488 * before {@link #addValue(double[]) addValue} has been used to add data;
489 * otherwise an IllegalStateException will be thrown.</p>
490 *
491 * @param minImpl the StorelessUnivariateStatistic instance to use
492 * for computing the minimum
493 * @throws DimensionMismatchException if the array dimension
494 * does not match the one used at construction
495 * @throws IllegalStateException if data has already been added
496 * (i.e if n > 0)
497 */
498 public void setMinImpl(StorelessUnivariateStatistic[] minImpl)
499 throws DimensionMismatchException {
500 setImpl(minImpl, this.minImpl);
501 }
502
503 /**
504 * Returns the currently configured maximum implementation
505 *
506 * @return the StorelessUnivariateStatistic implementing the maximum
507 */
508 public StorelessUnivariateStatistic[] getMaxImpl() {
509 return maxImpl.clone();
510 }
511
512 /**
513 * <p>Sets the implementation for the maximum.</p>
514 * <p>This method must be activated before any data has been added - i.e.,
515 * before {@link #addValue(double[]) addValue} has been used to add data;
516 * otherwise an IllegalStateException will be thrown.</p>
517 *
518 * @param maxImpl the StorelessUnivariateStatistic instance to use
519 * for computing the maximum
520 * @throws DimensionMismatchException if the array dimension
521 * does not match the one used at construction
522 * @throws IllegalStateException if data has already been added
523 * (i.e if n > 0)
524 */
525 public void setMaxImpl(StorelessUnivariateStatistic[] maxImpl)
526 throws DimensionMismatchException {
527 setImpl(maxImpl, this.maxImpl);
528 }
529
530 /**
531 * Returns the currently configured sum of logs implementation
532 *
533 * @return the StorelessUnivariateStatistic implementing the log sum
534 */
535 public StorelessUnivariateStatistic[] getSumLogImpl() {
536 return sumLogImpl.clone();
537 }
538
539 /**
540 * <p>Sets the implementation for the sum of logs.</p>
541 * <p>This method must be activated before any data has been added - i.e.,
542 * before {@link #addValue(double[]) addValue} has been used to add data;
543 * otherwise an IllegalStateException will be thrown.</p>
544 *
545 * @param sumLogImpl the StorelessUnivariateStatistic instance to use
546 * for computing the log sum
547 * @throws DimensionMismatchException if the array dimension
548 * does not match the one used at construction
549 * @throws IllegalStateException if data has already been added
550 * (i.e if n > 0)
551 */
552 public void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl)
553 throws DimensionMismatchException {
554 setImpl(sumLogImpl, this.sumLogImpl);
555 }
556
557 /**
558 * Returns the currently configured geometric mean implementation
559 *
560 * @return the StorelessUnivariateStatistic implementing the geometric mean
561 */
562 public StorelessUnivariateStatistic[] getGeoMeanImpl() {
563 return geoMeanImpl.clone();
564 }
565
566 /**
567 * <p>Sets the implementation for the geometric mean.</p>
568 * <p>This method must be activated before any data has been added - i.e.,
569 * before {@link #addValue(double[]) addValue} has been used to add data;
570 * otherwise an IllegalStateException will be thrown.</p>
571 *
572 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use
573 * for computing the geometric mean
574 * @throws DimensionMismatchException if the array dimension
575 * does not match the one used at construction
576 * @throws IllegalStateException if data has already been added
577 * (i.e if n > 0)
578 */
579 public void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl)
580 throws DimensionMismatchException {
581 setImpl(geoMeanImpl, this.geoMeanImpl);
582 }
583
584 /**
585 * Returns the currently configured mean implementation
586 *
587 * @return the StorelessUnivariateStatistic implementing the mean
588 */
589 public StorelessUnivariateStatistic[] getMeanImpl() {
590 return meanImpl.clone();
591 }
592
593 /**
594 * <p>Sets the implementation for the mean.</p>
595 * <p>This method must be activated before any data has been added - i.e.,
596 * before {@link #addValue(double[]) addValue} has been used to add data;
597 * otherwise an IllegalStateException will be thrown.</p>
598 *
599 * @param meanImpl the StorelessUnivariateStatistic instance to use
600 * for computing the mean
601 * @throws DimensionMismatchException if the array dimension
602 * does not match the one used at construction
603 * @throws IllegalStateException if data has already been added
604 * (i.e if n > 0)
605 */
606 public void setMeanImpl(StorelessUnivariateStatistic[] meanImpl)
607 throws DimensionMismatchException {
608 setImpl(meanImpl, this.meanImpl);
609 }
610
611 /**
612 * Throws IllegalStateException if n > 0.
613 */
614 private void checkEmpty() {
615 if (n > 0) {
616 throw MathRuntimeException.createIllegalStateException(
617 "{0} values have been added before statistic is configured",
618 n);
619 }
620 }
621
622 /**
623 * Throws DimensionMismatchException if dimension != k.
624 * @param dimension dimension to check
625 * @throws DimensionMismatchException if dimension != k
626 */
627 private void checkDimension(int dimension)
628 throws DimensionMismatchException {
629 if (dimension != k) {
630 throw new DimensionMismatchException(dimension, k);
631 }
632 }
633
634 }