001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.descriptive.moment; 018 019 import java.io.Serializable; 020 021 import org.apache.commons.math.MathRuntimeException; 022 import org.apache.commons.math.stat.descriptive.WeightedEvaluation; 023 import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic; 024 025 /** 026 * Computes the variance of the available values. By default, the unbiased 027 * "sample variance" definitional formula is used: 028 * <p> 029 * variance = sum((x_i - mean)^2) / (n - 1) </p> 030 * <p> 031 * where mean is the {@link Mean} and <code>n</code> is the number 032 * of sample observations.</p> 033 * <p> 034 * The definitional formula does not have good numerical properties, so 035 * this implementation does not compute the statistic using the definitional 036 * formula. <ul> 037 * <li> The <code>getResult</code> method computes the variance using 038 * updating formulas based on West's algorithm, as described in 039 * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and 040 * J. G. Lewis 1979, <i>Communications of the ACM</i>, 041 * vol. 22 no. 9, pp. 526-531.</a></li> 042 * <li> The <code>evaluate</code> methods leverage the fact that they have the 043 * full array of values in memory to execute a two-pass algorithm. 044 * Specifically, these methods use the "corrected two-pass algorithm" from 045 * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>, 046 * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li></ul> 047 * Note that adding values using <code>increment</code> or 048 * <code>incrementAll</code> and then executing <code>getResult</code> will 049 * sometimes give a different, less accurate, result than executing 050 * <code>evaluate</code> with the full array of values. The former approach 051 * should only be used when the full array of values is not available.</p> 052 * <p> 053 * The "population variance" ( sum((x_i - mean)^2) / n ) can also 054 * be computed using this statistic. The <code>isBiasCorrected</code> 055 * property determines whether the "population" or "sample" value is 056 * returned by the <code>evaluate</code> and <code>getResult</code> methods. 057 * To compute population variances, set this property to <code>false.</code> 058 * </p> 059 * <p> 060 * <strong>Note that this implementation is not synchronized.</strong> If 061 * multiple threads access an instance of this class concurrently, and at least 062 * one of the threads invokes the <code>increment()</code> or 063 * <code>clear()</code> method, it must be synchronized externally.</p> 064 * 065 * @version $Revision: 908626 $ $Date: 2010-02-10 13:44:42 -0500 (Wed, 10 Feb 2010) $ 066 */ 067 public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable, WeightedEvaluation { 068 069 /** Serializable version identifier */ 070 private static final long serialVersionUID = -9111962718267217978L; 071 072 /** SecondMoment is used in incremental calculation of Variance*/ 073 protected SecondMoment moment = null; 074 075 /** 076 * Boolean test to determine if this Variance should also increment 077 * the second moment, this evaluates to false when this Variance is 078 * constructed with an external SecondMoment as a parameter. 079 */ 080 protected boolean incMoment = true; 081 082 /** 083 * Determines whether or not bias correction is applied when computing the 084 * value of the statisic. True means that bias is corrected. See 085 * {@link Variance} for details on the formula. 086 */ 087 private boolean isBiasCorrected = true; 088 089 /** 090 * Constructs a Variance with default (true) <code>isBiasCorrected</code> 091 * property. 092 */ 093 public Variance() { 094 moment = new SecondMoment(); 095 } 096 097 /** 098 * Constructs a Variance based on an external second moment. 099 * 100 * @param m2 the SecondMoment (Third or Fourth moments work 101 * here as well.) 102 */ 103 public Variance(final SecondMoment m2) { 104 incMoment = false; 105 this.moment = m2; 106 } 107 108 /** 109 * Constructs a Variance with the specified <code>isBiasCorrected</code> 110 * property 111 * 112 * @param isBiasCorrected setting for bias correction - true means 113 * bias will be corrected and is equivalent to using the argumentless 114 * constructor 115 */ 116 public Variance(boolean isBiasCorrected) { 117 moment = new SecondMoment(); 118 this.isBiasCorrected = isBiasCorrected; 119 } 120 121 /** 122 * Constructs a Variance with the specified <code>isBiasCorrected</code> 123 * property and the supplied external second moment. 124 * 125 * @param isBiasCorrected setting for bias correction - true means 126 * bias will be corrected 127 * @param m2 the SecondMoment (Third or Fourth moments work 128 * here as well.) 129 */ 130 public Variance(boolean isBiasCorrected, SecondMoment m2) { 131 incMoment = false; 132 this.moment = m2; 133 this.isBiasCorrected = isBiasCorrected; 134 } 135 136 /** 137 * Copy constructor, creates a new {@code Variance} identical 138 * to the {@code original} 139 * 140 * @param original the {@code Variance} instance to copy 141 */ 142 public Variance(Variance original) { 143 copy(original, this); 144 } 145 146 /** 147 * {@inheritDoc} 148 * <p>If all values are available, it is more accurate to use 149 * {@link #evaluate(double[])} rather than adding values one at a time 150 * using this method and then executing {@link #getResult}, since 151 * <code>evaluate</code> leverages the fact that is has the full 152 * list of values together to execute a two-pass algorithm. 153 * See {@link Variance}.</p> 154 */ 155 @Override 156 public void increment(final double d) { 157 if (incMoment) { 158 moment.increment(d); 159 } 160 } 161 162 /** 163 * {@inheritDoc} 164 */ 165 @Override 166 public double getResult() { 167 if (moment.n == 0) { 168 return Double.NaN; 169 } else if (moment.n == 1) { 170 return 0d; 171 } else { 172 if (isBiasCorrected) { 173 return moment.m2 / (moment.n - 1d); 174 } else { 175 return moment.m2 / (moment.n); 176 } 177 } 178 } 179 180 /** 181 * {@inheritDoc} 182 */ 183 public long getN() { 184 return moment.getN(); 185 } 186 187 /** 188 * {@inheritDoc} 189 */ 190 @Override 191 public void clear() { 192 if (incMoment) { 193 moment.clear(); 194 } 195 } 196 197 /** 198 * Returns the variance of the entries in the input array, or 199 * <code>Double.NaN</code> if the array is empty. 200 * <p> 201 * See {@link Variance} for details on the computing algorithm.</p> 202 * <p> 203 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 204 * <p> 205 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 206 * <p> 207 * Does not change the internal state of the statistic.</p> 208 * 209 * @param values the input array 210 * @return the variance of the values or Double.NaN if length = 0 211 * @throws IllegalArgumentException if the array is null 212 */ 213 @Override 214 public double evaluate(final double[] values) { 215 if (values == null) { 216 throw MathRuntimeException.createIllegalArgumentException("input values array is null"); 217 } 218 return evaluate(values, 0, values.length); 219 } 220 221 /** 222 * Returns the variance of the entries in the specified portion of 223 * the input array, or <code>Double.NaN</code> if the designated subarray 224 * is empty. 225 * <p> 226 * See {@link Variance} for details on the computing algorithm.</p> 227 * <p> 228 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 229 * <p> 230 * Does not change the internal state of the statistic.</p> 231 * <p> 232 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 233 * 234 * @param values the input array 235 * @param begin index of the first array element to include 236 * @param length the number of elements to include 237 * @return the variance of the values or Double.NaN if length = 0 238 * @throws IllegalArgumentException if the array is null or the array index 239 * parameters are not valid 240 */ 241 @Override 242 public double evaluate(final double[] values, final int begin, final int length) { 243 244 double var = Double.NaN; 245 246 if (test(values, begin, length)) { 247 clear(); 248 if (length == 1) { 249 var = 0.0; 250 } else if (length > 1) { 251 Mean mean = new Mean(); 252 double m = mean.evaluate(values, begin, length); 253 var = evaluate(values, m, begin, length); 254 } 255 } 256 return var; 257 } 258 259 /** 260 * <p>Returns the weighted variance of the entries in the specified portion of 261 * the input array, or <code>Double.NaN</code> if the designated subarray 262 * is empty.</p> 263 * <p> 264 * Uses the formula <pre> 265 * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1) 266 * </pre> 267 * where weightedMean is the weighted mean</p> 268 * <p> 269 * This formula will not return the same result as the unweighted variance when all 270 * weights are equal, unless all weights are equal to 1. The formula assumes that 271 * weights are to be treated as "expansion values," as will be the case if for example 272 * the weights represent frequency counts. To normalize weights so that the denominator 273 * in the variance computation equals the length of the input vector minus one, use <pre> 274 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length)); </code> 275 * </pre> 276 * <p> 277 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 278 * <p> 279 * Throws <code>IllegalArgumentException</code> if any of the following are true: 280 * <ul><li>the values array is null</li> 281 * <li>the weights array is null</li> 282 * <li>the weights array does not have the same length as the values array</li> 283 * <li>the weights array contains one or more infinite values</li> 284 * <li>the weights array contains one or more NaN values</li> 285 * <li>the weights array contains negative values</li> 286 * <li>the start and length arguments do not determine a valid array</li> 287 * </ul></p> 288 * <p> 289 * Does not change the internal state of the statistic.</p> 290 * <p> 291 * Throws <code>IllegalArgumentException</code> if either array is null.</p> 292 * 293 * @param values the input array 294 * @param weights the weights array 295 * @param begin index of the first array element to include 296 * @param length the number of elements to include 297 * @return the weighted variance of the values or Double.NaN if length = 0 298 * @throws IllegalArgumentException if the parameters are not valid 299 * @since 2.1 300 */ 301 public double evaluate(final double[] values, final double[] weights, 302 final int begin, final int length) { 303 304 double var = Double.NaN; 305 306 if (test(values, weights,begin, length)) { 307 clear(); 308 if (length == 1) { 309 var = 0.0; 310 } else if (length > 1) { 311 Mean mean = new Mean(); 312 double m = mean.evaluate(values, weights, begin, length); 313 var = evaluate(values, weights, m, begin, length); 314 } 315 } 316 return var; 317 } 318 319 /** 320 * <p> 321 * Returns the weighted variance of the entries in the the input array.</p> 322 * <p> 323 * Uses the formula <pre> 324 * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1) 325 * </pre> 326 * where weightedMean is the weighted mean</p> 327 * <p> 328 * This formula will not return the same result as the unweighted variance when all 329 * weights are equal, unless all weights are equal to 1. The formula assumes that 330 * weights are to be treated as "expansion values," as will be the case if for example 331 * the weights represent frequency counts. To normalize weights so that the denominator 332 * in the variance computation equals the length of the input vector minus one, use <pre> 333 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length)); </code> 334 * </pre> 335 * <p> 336 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 337 * <p> 338 * Throws <code>IllegalArgumentException</code> if any of the following are true: 339 * <ul><li>the values array is null</li> 340 * <li>the weights array is null</li> 341 * <li>the weights array does not have the same length as the values array</li> 342 * <li>the weights array contains one or more infinite values</li> 343 * <li>the weights array contains one or more NaN values</li> 344 * <li>the weights array contains negative values</li> 345 * </ul></p> 346 * <p> 347 * Does not change the internal state of the statistic.</p> 348 * <p> 349 * Throws <code>IllegalArgumentException</code> if either array is null.</p> 350 * 351 * @param values the input array 352 * @param weights the weights array 353 * @return the weighted variance of the values 354 * @throws IllegalArgumentException if the parameters are not valid 355 * @since 2.1 356 */ 357 public double evaluate(final double[] values, final double[] weights) { 358 return evaluate(values, weights, 0, values.length); 359 } 360 361 /** 362 * Returns the variance of the entries in the specified portion of 363 * the input array, using the precomputed mean value. Returns 364 * <code>Double.NaN</code> if the designated subarray is empty. 365 * <p> 366 * See {@link Variance} for details on the computing algorithm.</p> 367 * <p> 368 * The formula used assumes that the supplied mean value is the arithmetic 369 * mean of the sample data, not a known population parameter. This method 370 * is supplied only to save computation when the mean has already been 371 * computed.</p> 372 * <p> 373 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 374 * <p> 375 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 376 * <p> 377 * Does not change the internal state of the statistic.</p> 378 * 379 * @param values the input array 380 * @param mean the precomputed mean value 381 * @param begin index of the first array element to include 382 * @param length the number of elements to include 383 * @return the variance of the values or Double.NaN if length = 0 384 * @throws IllegalArgumentException if the array is null or the array index 385 * parameters are not valid 386 */ 387 public double evaluate(final double[] values, final double mean, 388 final int begin, final int length) { 389 390 double var = Double.NaN; 391 392 if (test(values, begin, length)) { 393 if (length == 1) { 394 var = 0.0; 395 } else if (length > 1) { 396 double accum = 0.0; 397 double dev = 0.0; 398 double accum2 = 0.0; 399 for (int i = begin; i < begin + length; i++) { 400 dev = values[i] - mean; 401 accum += dev * dev; 402 accum2 += dev; 403 } 404 double len = length; 405 if (isBiasCorrected) { 406 var = (accum - (accum2 * accum2 / len)) / (len - 1.0); 407 } else { 408 var = (accum - (accum2 * accum2 / len)) / len; 409 } 410 } 411 } 412 return var; 413 } 414 415 /** 416 * Returns the variance of the entries in the input array, using the 417 * precomputed mean value. Returns <code>Double.NaN</code> if the array 418 * is empty. 419 * <p> 420 * See {@link Variance} for details on the computing algorithm.</p> 421 * <p> 422 * If <code>isBiasCorrected</code> is <code>true</code> the formula used 423 * assumes that the supplied mean value is the arithmetic mean of the 424 * sample data, not a known population parameter. If the mean is a known 425 * population parameter, or if the "population" version of the variance is 426 * desired, set <code>isBiasCorrected</code> to <code>false</code> before 427 * invoking this method.</p> 428 * <p> 429 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 430 * <p> 431 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 432 * <p> 433 * Does not change the internal state of the statistic.</p> 434 * 435 * @param values the input array 436 * @param mean the precomputed mean value 437 * @return the variance of the values or Double.NaN if the array is empty 438 * @throws IllegalArgumentException if the array is null 439 */ 440 public double evaluate(final double[] values, final double mean) { 441 return evaluate(values, mean, 0, values.length); 442 } 443 444 /** 445 * Returns the weighted variance of the entries in the specified portion of 446 * the input array, using the precomputed weighted mean value. Returns 447 * <code>Double.NaN</code> if the designated subarray is empty. 448 * <p> 449 * Uses the formula <pre> 450 * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1) 451 * </pre></p> 452 * <p> 453 * The formula used assumes that the supplied mean value is the weighted arithmetic 454 * mean of the sample data, not a known population parameter. This method 455 * is supplied only to save computation when the mean has already been 456 * computed.</p> 457 * <p> 458 * This formula will not return the same result as the unweighted variance when all 459 * weights are equal, unless all weights are equal to 1. The formula assumes that 460 * weights are to be treated as "expansion values," as will be the case if for example 461 * the weights represent frequency counts. To normalize weights so that the denominator 462 * in the variance computation equals the length of the input vector minus one, use <pre> 463 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length), mean); </code> 464 * </pre> 465 * <p> 466 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 467 * <p> 468 * Throws <code>IllegalArgumentException</code> if any of the following are true: 469 * <ul><li>the values array is null</li> 470 * <li>the weights array is null</li> 471 * <li>the weights array does not have the same length as the values array</li> 472 * <li>the weights array contains one or more infinite values</li> 473 * <li>the weights array contains one or more NaN values</li> 474 * <li>the weights array contains negative values</li> 475 * <li>the start and length arguments do not determine a valid array</li> 476 * </ul></p> 477 * <p> 478 * Does not change the internal state of the statistic.</p> 479 * 480 * @param values the input array 481 * @param weights the weights array 482 * @param mean the precomputed weighted mean value 483 * @param begin index of the first array element to include 484 * @param length the number of elements to include 485 * @return the variance of the values or Double.NaN if length = 0 486 * @throws IllegalArgumentException if the parameters are not valid 487 * @since 2.1 488 */ 489 public double evaluate(final double[] values, final double[] weights, 490 final double mean, final int begin, final int length) { 491 492 double var = Double.NaN; 493 494 if (test(values, weights, begin, length)) { 495 if (length == 1) { 496 var = 0.0; 497 } else if (length > 1) { 498 double accum = 0.0; 499 double dev = 0.0; 500 double accum2 = 0.0; 501 for (int i = begin; i < begin + length; i++) { 502 dev = values[i] - mean; 503 accum += weights[i] * (dev * dev); 504 accum2 += weights[i] * dev; 505 } 506 507 double sumWts = 0; 508 for (int i = 0; i < weights.length; i++) { 509 sumWts += weights[i]; 510 } 511 512 if (isBiasCorrected) { 513 var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0); 514 } else { 515 var = (accum - (accum2 * accum2 / sumWts)) / sumWts; 516 } 517 } 518 } 519 return var; 520 } 521 522 /** 523 * <p>Returns the weighted variance of the values in the input array, using 524 * the precomputed weighted mean value.</p> 525 * <p> 526 * Uses the formula <pre> 527 * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1) 528 * </pre></p> 529 * <p> 530 * The formula used assumes that the supplied mean value is the weighted arithmetic 531 * mean of the sample data, not a known population parameter. This method 532 * is supplied only to save computation when the mean has already been 533 * computed.</p> 534 * <p> 535 * This formula will not return the same result as the unweighted variance when all 536 * weights are equal, unless all weights are equal to 1. The formula assumes that 537 * weights are to be treated as "expansion values," as will be the case if for example 538 * the weights represent frequency counts. To normalize weights so that the denominator 539 * in the variance computation equals the length of the input vector minus one, use <pre> 540 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length), mean); </code> 541 * </pre> 542 * <p> 543 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 544 * <p> 545 * Throws <code>IllegalArgumentException</code> if any of the following are true: 546 * <ul><li>the values array is null</li> 547 * <li>the weights array is null</li> 548 * <li>the weights array does not have the same length as the values array</li> 549 * <li>the weights array contains one or more infinite values</li> 550 * <li>the weights array contains one or more NaN values</li> 551 * <li>the weights array contains negative values</li> 552 * </ul></p> 553 * <p> 554 * Does not change the internal state of the statistic.</p> 555 * 556 * @param values the input array 557 * @param weights the weights array 558 * @param mean the precomputed weighted mean value 559 * @return the variance of the values or Double.NaN if length = 0 560 * @throws IllegalArgumentException if the parameters are not valid 561 * @since 2.1 562 */ 563 public double evaluate(final double[] values, final double[] weights, final double mean) { 564 return evaluate(values, weights, mean, 0, values.length); 565 } 566 567 /** 568 * @return Returns the isBiasCorrected. 569 */ 570 public boolean isBiasCorrected() { 571 return isBiasCorrected; 572 } 573 574 /** 575 * @param biasCorrected The isBiasCorrected to set. 576 */ 577 public void setBiasCorrected(boolean biasCorrected) { 578 this.isBiasCorrected = biasCorrected; 579 } 580 581 /** 582 * {@inheritDoc} 583 */ 584 @Override 585 public Variance copy() { 586 Variance result = new Variance(); 587 copy(this, result); 588 return result; 589 } 590 591 592 /** 593 * Copies source to dest. 594 * <p>Neither source nor dest can be null.</p> 595 * 596 * @param source Variance to copy 597 * @param dest Variance to copy to 598 * @throws NullPointerException if either source or dest is null 599 */ 600 public static void copy(Variance source, Variance dest) { 601 dest.moment = source.moment.copy(); 602 dest.isBiasCorrected = source.isBiasCorrected; 603 dest.incMoment = source.incMoment; 604 } 605 606 }