001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.inference; 018 019 import java.util.Collection; 020 021 import org.apache.commons.math.MathException; 022 import org.apache.commons.math.MathRuntimeException; 023 import org.apache.commons.math.distribution.FDistribution; 024 import org.apache.commons.math.distribution.FDistributionImpl; 025 import org.apache.commons.math.stat.descriptive.summary.Sum; 026 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 027 028 029 /** 030 * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl} 031 * interface. 032 * 033 * <p>Uses the 034 * {@link org.apache.commons.math.distribution.FDistribution 035 * commons-math F Distribution implementation} to estimate exact p-values.</p> 036 * 037 * <p>This implementation is based on a description at 038 * http://faculty.vassar.edu/lowry/ch13pt1.html</p> 039 * <pre> 040 * Abbreviations: bg = between groups, 041 * wg = within groups, 042 * ss = sum squared deviations 043 * </pre> 044 * 045 * @since 1.2 046 * @version $Revision: 825917 $ $Date: 2009-10-16 10:47:27 -0400 (Fri, 16 Oct 2009) $ 047 */ 048 public class OneWayAnovaImpl implements OneWayAnova { 049 050 /** 051 * Default constructor. 052 */ 053 public OneWayAnovaImpl() { 054 } 055 056 /** 057 * {@inheritDoc}<p> 058 * This implementation computes the F statistic using the definitional 059 * formula<pre> 060 * F = msbg/mswg</pre> 061 * where<pre> 062 * msbg = between group mean square 063 * mswg = within group mean square</pre> 064 * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html"> 065 * here</a></p> 066 */ 067 public double anovaFValue(Collection<double[]> categoryData) 068 throws IllegalArgumentException, MathException { 069 AnovaStats a = anovaStats(categoryData); 070 return a.F; 071 } 072 073 /** 074 * {@inheritDoc}<p> 075 * This implementation uses the 076 * {@link org.apache.commons.math.distribution.FDistribution 077 * commons-math F Distribution implementation} to estimate the exact 078 * p-value, using the formula<pre> 079 * p = 1 - cumulativeProbability(F)</pre> 080 * where <code>F</code> is the F value and <code>cumulativeProbability</code> 081 * is the commons-math implementation of the F distribution.</p> 082 */ 083 public double anovaPValue(Collection<double[]> categoryData) 084 throws IllegalArgumentException, MathException { 085 AnovaStats a = anovaStats(categoryData); 086 FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg); 087 return 1.0 - fdist.cumulativeProbability(a.F); 088 } 089 090 /** 091 * {@inheritDoc}<p> 092 * This implementation uses the 093 * {@link org.apache.commons.math.distribution.FDistribution 094 * commons-math F Distribution implementation} to estimate the exact 095 * p-value, using the formula<pre> 096 * p = 1 - cumulativeProbability(F)</pre> 097 * where <code>F</code> is the F value and <code>cumulativeProbability</code> 098 * is the commons-math implementation of the F distribution.</p> 099 * <p>True is returned iff the estimated p-value is less than alpha.</p> 100 */ 101 public boolean anovaTest(Collection<double[]> categoryData, double alpha) 102 throws IllegalArgumentException, MathException { 103 if ((alpha <= 0) || (alpha > 0.5)) { 104 throw MathRuntimeException.createIllegalArgumentException( 105 "out of bounds significance level {0}, must be between {1} and {2}", 106 alpha, 0, 0.5); 107 } 108 return anovaPValue(categoryData) < alpha; 109 } 110 111 112 /** 113 * This method actually does the calculations (except P-value). 114 * 115 * @param categoryData <code>Collection</code> of <code>double[]</code> 116 * arrays each containing data for one category 117 * @return computed AnovaStats 118 * @throws IllegalArgumentException if categoryData does not meet 119 * preconditions specified in the interface definition 120 * @throws MathException if an error occurs computing the Anova stats 121 */ 122 private AnovaStats anovaStats(Collection<double[]> categoryData) 123 throws IllegalArgumentException, MathException { 124 125 // check if we have enough categories 126 if (categoryData.size() < 2) { 127 throw MathRuntimeException.createIllegalArgumentException( 128 "two or more categories required, got {0}", 129 categoryData.size()); 130 } 131 132 // check if each category has enough data and all is double[] 133 for (double[] array : categoryData) { 134 if (array.length <= 1) { 135 throw MathRuntimeException.createIllegalArgumentException( 136 "two or more values required in each category, one has {0}", 137 array.length); 138 } 139 } 140 141 int dfwg = 0; 142 double sswg = 0; 143 Sum totsum = new Sum(); 144 SumOfSquares totsumsq = new SumOfSquares(); 145 int totnum = 0; 146 147 for (double[] data : categoryData) { 148 149 Sum sum = new Sum(); 150 SumOfSquares sumsq = new SumOfSquares(); 151 int num = 0; 152 153 for (int i = 0; i < data.length; i++) { 154 double val = data[i]; 155 156 // within category 157 num++; 158 sum.increment(val); 159 sumsq.increment(val); 160 161 // for all categories 162 totnum++; 163 totsum.increment(val); 164 totsumsq.increment(val); 165 } 166 dfwg += num - 1; 167 double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num; 168 sswg += ss; 169 } 170 double sst = totsumsq.getResult() - totsum.getResult() * 171 totsum.getResult()/totnum; 172 double ssbg = sst - sswg; 173 int dfbg = categoryData.size() - 1; 174 double msbg = ssbg/dfbg; 175 double mswg = sswg/dfwg; 176 double F = msbg/mswg; 177 178 return new AnovaStats(dfbg, dfwg, F); 179 } 180 181 /** 182 Convenience class to pass dfbg,dfwg,F values around within AnovaImpl. 183 No get/set methods provided. 184 */ 185 private static class AnovaStats { 186 187 /** Degrees of freedom in numerator (between groups). */ 188 private int dfbg; 189 190 /** Degrees of freedom in denominator (within groups). */ 191 private int dfwg; 192 193 /** Statistic. */ 194 private double F; 195 196 /** 197 * Constructor 198 * @param dfbg degrees of freedom in numerator (between groups) 199 * @param dfwg degrees of freedom in denominator (within groups) 200 * @param F statistic 201 */ 202 private AnovaStats(int dfbg, int dfwg, double F) { 203 this.dfbg = dfbg; 204 this.dfwg = dfwg; 205 this.F = F; 206 } 207 } 208 209 }