001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.inference;
018
019 import org.apache.commons.math.MathException;
020 import org.apache.commons.math.MathRuntimeException;
021 import org.apache.commons.math.distribution.ChiSquaredDistribution;
022 import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
023
024 /**
025 * Implements Chi-Square test statistics defined in the
026 * {@link UnknownDistributionChiSquareTest} interface.
027 *
028 * @version $Revision: 775470 $ $Date: 2009-05-16 10:29:07 -0400 (Sat, 16 May 2009) $
029 */
030 public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
031
032 /** Distribution used to compute inference statistics. */
033 private ChiSquaredDistribution distribution;
034
035 /**
036 * Construct a ChiSquareTestImpl
037 */
038 public ChiSquareTestImpl() {
039 this(new ChiSquaredDistributionImpl(1.0));
040 }
041
042 /**
043 * Create a test instance using the given distribution for computing
044 * inference statistics.
045 * @param x distribution used to compute inference statistics.
046 * @since 1.2
047 */
048 public ChiSquareTestImpl(ChiSquaredDistribution x) {
049 super();
050 setDistribution(x);
051 }
052 /**
053 * {@inheritDoc}
054 * <p><strong>Note: </strong>This implementation rescales the
055 * <code>expected</code> array if necessary to ensure that the sum of the
056 * expected and observed counts are equal.</p>
057 *
058 * @param observed array of observed frequency counts
059 * @param expected array of expected frequency counts
060 * @return chi-square test statistic
061 * @throws IllegalArgumentException if preconditions are not met
062 * or length is less than 2
063 */
064 public double chiSquare(double[] expected, long[] observed)
065 throws IllegalArgumentException {
066 if (expected.length < 2) {
067 throw MathRuntimeException.createIllegalArgumentException(
068 "expected array length = {0}, must be at least 2",
069 expected.length);
070 }
071 if (expected.length != observed.length) {
072 throw MathRuntimeException.createIllegalArgumentException(
073 "dimension mismatch {0} != {1}", expected.length, observed.length);
074 }
075 checkPositive(expected);
076 checkNonNegative(observed);
077 double sumExpected = 0d;
078 double sumObserved = 0d;
079 for (int i = 0; i < observed.length; i++) {
080 sumExpected += expected[i];
081 sumObserved += observed[i];
082 }
083 double ratio = 1.0d;
084 boolean rescale = false;
085 if (Math.abs(sumExpected - sumObserved) > 10E-6) {
086 ratio = sumObserved / sumExpected;
087 rescale = true;
088 }
089 double sumSq = 0.0d;
090 double dev = 0.0d;
091 for (int i = 0; i < observed.length; i++) {
092 if (rescale) {
093 dev = (observed[i] - ratio * expected[i]);
094 sumSq += dev * dev / (ratio * expected[i]);
095 } else {
096 dev = (observed[i] - expected[i]);
097 sumSq += dev * dev / expected[i];
098 }
099 }
100 return sumSq;
101 }
102
103 /**
104 * {@inheritDoc}
105 * <p><strong>Note: </strong>This implementation rescales the
106 * <code>expected</code> array if necessary to ensure that the sum of the
107 * expected and observed counts are equal.</p>
108 *
109 * @param observed array of observed frequency counts
110 * @param expected array of expected frequency counts
111 * @return p-value
112 * @throws IllegalArgumentException if preconditions are not met
113 * @throws MathException if an error occurs computing the p-value
114 */
115 public double chiSquareTest(double[] expected, long[] observed)
116 throws IllegalArgumentException, MathException {
117 distribution.setDegreesOfFreedom(expected.length - 1.0);
118 return 1.0 - distribution.cumulativeProbability(
119 chiSquare(expected, observed));
120 }
121
122 /**
123 * {@inheritDoc}
124 * <p><strong>Note: </strong>This implementation rescales the
125 * <code>expected</code> array if necessary to ensure that the sum of the
126 * expected and observed counts are equal.</p>
127 *
128 * @param observed array of observed frequency counts
129 * @param expected array of expected frequency counts
130 * @param alpha significance level of the test
131 * @return true iff null hypothesis can be rejected with confidence
132 * 1 - alpha
133 * @throws IllegalArgumentException if preconditions are not met
134 * @throws MathException if an error occurs performing the test
135 */
136 public boolean chiSquareTest(double[] expected, long[] observed,
137 double alpha) throws IllegalArgumentException, MathException {
138 if ((alpha <= 0) || (alpha > 0.5)) {
139 throw MathRuntimeException.createIllegalArgumentException(
140 "out of bounds significance level {0}, must be between {1} and {2}",
141 alpha, 0, 0.5);
142 }
143 return (chiSquareTest(expected, observed) < alpha);
144 }
145
146 /**
147 * @param counts array representation of 2-way table
148 * @return chi-square test statistic
149 * @throws IllegalArgumentException if preconditions are not met
150 */
151 public double chiSquare(long[][] counts) throws IllegalArgumentException {
152
153 checkArray(counts);
154 int nRows = counts.length;
155 int nCols = counts[0].length;
156
157 // compute row, column and total sums
158 double[] rowSum = new double[nRows];
159 double[] colSum = new double[nCols];
160 double total = 0.0d;
161 for (int row = 0; row < nRows; row++) {
162 for (int col = 0; col < nCols; col++) {
163 rowSum[row] += counts[row][col];
164 colSum[col] += counts[row][col];
165 total += counts[row][col];
166 }
167 }
168
169 // compute expected counts and chi-square
170 double sumSq = 0.0d;
171 double expected = 0.0d;
172 for (int row = 0; row < nRows; row++) {
173 for (int col = 0; col < nCols; col++) {
174 expected = (rowSum[row] * colSum[col]) / total;
175 sumSq += ((counts[row][col] - expected) *
176 (counts[row][col] - expected)) / expected;
177 }
178 }
179 return sumSq;
180 }
181
182 /**
183 * @param counts array representation of 2-way table
184 * @return p-value
185 * @throws IllegalArgumentException if preconditions are not met
186 * @throws MathException if an error occurs computing the p-value
187 */
188 public double chiSquareTest(long[][] counts)
189 throws IllegalArgumentException, MathException {
190 checkArray(counts);
191 double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
192 distribution.setDegreesOfFreedom(df);
193 return 1 - distribution.cumulativeProbability(chiSquare(counts));
194 }
195
196 /**
197 * @param counts array representation of 2-way table
198 * @param alpha significance level of the test
199 * @return true iff null hypothesis can be rejected with confidence
200 * 1 - alpha
201 * @throws IllegalArgumentException if preconditions are not met
202 * @throws MathException if an error occurs performing the test
203 */
204 public boolean chiSquareTest(long[][] counts, double alpha)
205 throws IllegalArgumentException, MathException {
206 if ((alpha <= 0) || (alpha > 0.5)) {
207 throw MathRuntimeException.createIllegalArgumentException(
208 "out of bounds significance level {0}, must be between {1} and {2}",
209 alpha, 0.0, 0.5);
210 }
211 return (chiSquareTest(counts) < alpha);
212 }
213
214 /**
215 * @param observed1 array of observed frequency counts of the first data set
216 * @param observed2 array of observed frequency counts of the second data set
217 * @return chi-square test statistic
218 * @throws IllegalArgumentException if preconditions are not met
219 * @since 1.2
220 */
221 public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
222 throws IllegalArgumentException {
223
224 // Make sure lengths are same
225 if (observed1.length < 2) {
226 throw MathRuntimeException.createIllegalArgumentException(
227 "observed array length = {0}, must be at least 2",
228 observed1.length);
229 }
230 if (observed1.length != observed2.length) {
231 throw MathRuntimeException.createIllegalArgumentException(
232 "dimension mismatch {0} != {1}",
233 observed1.length, observed2.length);
234 }
235
236 // Ensure non-negative counts
237 checkNonNegative(observed1);
238 checkNonNegative(observed2);
239
240 // Compute and compare count sums
241 long countSum1 = 0;
242 long countSum2 = 0;
243 boolean unequalCounts = false;
244 double weight = 0.0;
245 for (int i = 0; i < observed1.length; i++) {
246 countSum1 += observed1[i];
247 countSum2 += observed2[i];
248 }
249 // Ensure neither sample is uniformly 0
250 if (countSum1 == 0) {
251 throw MathRuntimeException.createIllegalArgumentException(
252 "observed counts are all 0 in first observed array");
253 }
254 if (countSum2 == 0) {
255 throw MathRuntimeException.createIllegalArgumentException(
256 "observed counts are all 0 in second observed array");
257 }
258 // Compare and compute weight only if different
259 unequalCounts = (countSum1 != countSum2);
260 if (unequalCounts) {
261 weight = Math.sqrt((double) countSum1 / (double) countSum2);
262 }
263 // Compute ChiSquare statistic
264 double sumSq = 0.0d;
265 double dev = 0.0d;
266 double obs1 = 0.0d;
267 double obs2 = 0.0d;
268 for (int i = 0; i < observed1.length; i++) {
269 if (observed1[i] == 0 && observed2[i] == 0) {
270 throw MathRuntimeException.createIllegalArgumentException(
271 "observed counts are both zero for entry {0}", i);
272 } else {
273 obs1 = observed1[i];
274 obs2 = observed2[i];
275 if (unequalCounts) { // apply weights
276 dev = obs1/weight - obs2 * weight;
277 } else {
278 dev = obs1 - obs2;
279 }
280 sumSq += (dev * dev) / (obs1 + obs2);
281 }
282 }
283 return sumSq;
284 }
285
286 /**
287 * @param observed1 array of observed frequency counts of the first data set
288 * @param observed2 array of observed frequency counts of the second data set
289 * @return p-value
290 * @throws IllegalArgumentException if preconditions are not met
291 * @throws MathException if an error occurs computing the p-value
292 * @since 1.2
293 */
294 public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
295 throws IllegalArgumentException, MathException {
296 distribution.setDegreesOfFreedom((double) observed1.length - 1);
297 return 1 - distribution.cumulativeProbability(
298 chiSquareDataSetsComparison(observed1, observed2));
299 }
300
301 /**
302 * @param observed1 array of observed frequency counts of the first data set
303 * @param observed2 array of observed frequency counts of the second data set
304 * @param alpha significance level of the test
305 * @return true iff null hypothesis can be rejected with confidence
306 * 1 - alpha
307 * @throws IllegalArgumentException if preconditions are not met
308 * @throws MathException if an error occurs performing the test
309 * @since 1.2
310 */
311 public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
312 double alpha) throws IllegalArgumentException, MathException {
313 if ((alpha <= 0) || (alpha > 0.5)) {
314 throw MathRuntimeException.createIllegalArgumentException(
315 "out of bounds significance level {0}, must be between {1} and {2}",
316 alpha, 0.0, 0.5);
317 }
318 return (chiSquareTestDataSetsComparison(observed1, observed2) < alpha);
319 }
320
321 /**
322 * Checks to make sure that the input long[][] array is rectangular,
323 * has at least 2 rows and 2 columns, and has all non-negative entries,
324 * throwing IllegalArgumentException if any of these checks fail.
325 *
326 * @param in input 2-way table to check
327 * @throws IllegalArgumentException if the array is not valid
328 */
329 private void checkArray(long[][] in) throws IllegalArgumentException {
330
331 if (in.length < 2) {
332 throw MathRuntimeException.createIllegalArgumentException(
333 "invalid row dimension: {0} (must be at least 2)",
334 in.length);
335 }
336
337 if (in[0].length < 2) {
338 throw MathRuntimeException.createIllegalArgumentException(
339 "invalid column dimension: {0} (must be at least 2)",
340 in[0].length);
341 }
342
343 checkRectangular(in);
344 checkNonNegative(in);
345
346 }
347
348 //--------------------- Private array methods -- should find a utility home for these
349
350 /**
351 * Throws IllegalArgumentException if the input array is not rectangular.
352 *
353 * @param in array to be tested
354 * @throws NullPointerException if input array is null
355 * @throws IllegalArgumentException if input array is not rectangular
356 */
357 private void checkRectangular(long[][] in) {
358 for (int i = 1; i < in.length; i++) {
359 if (in[i].length != in[0].length) {
360 throw MathRuntimeException.createIllegalArgumentException(
361 "some rows have length {0} while others have length {1}",
362 in[i].length, in[0].length);
363 }
364 }
365 }
366
367 /**
368 * Check all entries of the input array are > 0.
369 *
370 * @param in array to be tested
371 * @exception IllegalArgumentException if one entry is not positive
372 */
373 private void checkPositive(double[] in) throws IllegalArgumentException {
374 for (int i = 0; i < in.length; i++) {
375 if (in[i] <= 0) {
376 throw MathRuntimeException.createIllegalArgumentException(
377 "element {0} is not positive: {1}",
378 i, in[i]);
379 }
380 }
381 }
382
383 /**
384 * Check all entries of the input array are >= 0.
385 *
386 * @param in array to be tested
387 * @exception IllegalArgumentException if one entry is negative
388 */
389 private void checkNonNegative(long[] in) throws IllegalArgumentException {
390 for (int i = 0; i < in.length; i++) {
391 if (in[i] < 0) {
392 throw MathRuntimeException.createIllegalArgumentException(
393 "element {0} is negative: {1}",
394 i, in[i]);
395 }
396 }
397 }
398
399 /**
400 * Check all entries of the input array are >= 0.
401 *
402 * @param in array to be tested
403 * @exception IllegalArgumentException if one entry is negative
404 */
405 private void checkNonNegative(long[][] in) throws IllegalArgumentException {
406 for (int i = 0; i < in.length; i ++) {
407 for (int j = 0; j < in[i].length; j++) {
408 if (in[i][j] < 0) {
409 throw MathRuntimeException.createIllegalArgumentException(
410 "element ({0}, {1}) is negative: {2}",
411 i, j, in[i][j]);
412 }
413 }
414 }
415 }
416
417 /**
418 * Modify the distribution used to compute inference statistics.
419 *
420 * @param value
421 * the new distribution
422 * @since 1.2
423 */
424 public void setDistribution(ChiSquaredDistribution value) {
425 distribution = value;
426 }
427 }