001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math.distribution;
019
020 import java.io.Serializable;
021
022 import org.apache.commons.math.MathRuntimeException;
023 import org.apache.commons.math.util.MathUtils;
024
025 /**
026 * The default implementation of {@link HypergeometricDistribution}.
027 *
028 * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
029 */
030 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
031 implements HypergeometricDistribution, Serializable
032 {
033
034 /** Serializable version identifier */
035 private static final long serialVersionUID = -436928820673516179L;
036
037 /** The number of successes in the population. */
038 private int numberOfSuccesses;
039
040 /** The population size. */
041 private int populationSize;
042
043 /** The sample size. */
044 private int sampleSize;
045
046 /**
047 * Construct a new hypergeometric distribution with the given the population
048 * size, the number of successes in the population, and the sample size.
049 * @param populationSize the population size.
050 * @param numberOfSuccesses number of successes in the population.
051 * @param sampleSize the sample size.
052 */
053 public HypergeometricDistributionImpl(int populationSize,
054 int numberOfSuccesses, int sampleSize) {
055 super();
056 if (numberOfSuccesses > populationSize) {
057 throw MathRuntimeException.createIllegalArgumentException(
058 "number of successes ({0}) must be less than or equal to population size ({1})",
059 numberOfSuccesses, populationSize);
060 }
061 if (sampleSize > populationSize) {
062 throw MathRuntimeException.createIllegalArgumentException(
063 "sample size ({0}) must be less than or equal to population size ({1})",
064 sampleSize, populationSize);
065 }
066 setPopulationSize(populationSize);
067 setSampleSize(sampleSize);
068 setNumberOfSuccesses(numberOfSuccesses);
069 }
070
071 /**
072 * For this distribution, X, this method returns P(X ≤ x).
073 * @param x the value at which the PDF is evaluated.
074 * @return PDF for this distribution.
075 */
076 @Override
077 public double cumulativeProbability(int x) {
078 double ret;
079
080 int n = getPopulationSize();
081 int m = getNumberOfSuccesses();
082 int k = getSampleSize();
083
084 int[] domain = getDomain(n, m, k);
085 if (x < domain[0]) {
086 ret = 0.0;
087 } else if(x >= domain[1]) {
088 ret = 1.0;
089 } else {
090 ret = innerCumulativeProbability(domain[0], x, 1, n, m, k);
091 }
092
093 return ret;
094 }
095
096 /**
097 * Return the domain for the given hypergeometric distribution parameters.
098 * @param n the population size.
099 * @param m number of successes in the population.
100 * @param k the sample size.
101 * @return a two element array containing the lower and upper bounds of the
102 * hypergeometric distribution.
103 */
104 private int[] getDomain(int n, int m, int k){
105 return new int[]{
106 getLowerDomain(n, m, k),
107 getUpperDomain(m, k)
108 };
109 }
110
111 /**
112 * Access the domain value lower bound, based on <code>p</code>, used to
113 * bracket a PDF root.
114 *
115 * @param p the desired probability for the critical value
116 * @return domain value lower bound, i.e.
117 * P(X < <i>lower bound</i>) < <code>p</code>
118 */
119 @Override
120 protected int getDomainLowerBound(double p) {
121 return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(),
122 getSampleSize());
123 }
124
125 /**
126 * Access the domain value upper bound, based on <code>p</code>, used to
127 * bracket a PDF root.
128 *
129 * @param p the desired probability for the critical value
130 * @return domain value upper bound, i.e.
131 * P(X < <i>upper bound</i>) > <code>p</code>
132 */
133 @Override
134 protected int getDomainUpperBound(double p) {
135 return getUpperDomain(getSampleSize(), getNumberOfSuccesses());
136 }
137
138 /**
139 * Return the lowest domain value for the given hypergeometric distribution
140 * parameters.
141 * @param n the population size.
142 * @param m number of successes in the population.
143 * @param k the sample size.
144 * @return the lowest domain value of the hypergeometric distribution.
145 */
146 private int getLowerDomain(int n, int m, int k) {
147 return Math.max(0, m - (n - k));
148 }
149
150 /**
151 * Access the number of successes.
152 * @return the number of successes.
153 */
154 public int getNumberOfSuccesses() {
155 return numberOfSuccesses;
156 }
157
158 /**
159 * Access the population size.
160 * @return the population size.
161 */
162 public int getPopulationSize() {
163 return populationSize;
164 }
165
166 /**
167 * Access the sample size.
168 * @return the sample size.
169 */
170 public int getSampleSize() {
171 return sampleSize;
172 }
173
174 /**
175 * Return the highest domain value for the given hypergeometric distribution
176 * parameters.
177 * @param m number of successes in the population.
178 * @param k the sample size.
179 * @return the highest domain value of the hypergeometric distribution.
180 */
181 private int getUpperDomain(int m, int k){
182 return Math.min(k, m);
183 }
184
185 /**
186 * For this distribution, X, this method returns P(X = x).
187 *
188 * @param x the value at which the PMF is evaluated.
189 * @return PMF for this distribution.
190 */
191 public double probability(int x) {
192 double ret;
193
194 int n = getPopulationSize();
195 int m = getNumberOfSuccesses();
196 int k = getSampleSize();
197
198 int[] domain = getDomain(n, m, k);
199 if(x < domain[0] || x > domain[1]){
200 ret = 0.0;
201 } else {
202 ret = probability(n, m, k, x);
203 }
204
205 return ret;
206 }
207
208 /**
209 * For the distribution, X, defined by the given hypergeometric distribution
210 * parameters, this method returns P(X = x).
211 *
212 * @param n the population size.
213 * @param m number of successes in the population.
214 * @param k the sample size.
215 * @param x the value at which the PMF is evaluated.
216 * @return PMF for the distribution.
217 */
218 private double probability(int n, int m, int k, int x) {
219 return Math.exp(MathUtils.binomialCoefficientLog(m, x) +
220 MathUtils.binomialCoefficientLog(n - m, k - x) -
221 MathUtils.binomialCoefficientLog(n, k));
222 }
223
224 /**
225 * Modify the number of successes.
226 * @param num the new number of successes.
227 * @throws IllegalArgumentException if <code>num</code> is negative.
228 */
229 public void setNumberOfSuccesses(int num) {
230 if(num < 0){
231 throw MathRuntimeException.createIllegalArgumentException(
232 "number of successes must be non-negative ({0})",
233 num);
234 }
235 numberOfSuccesses = num;
236 }
237
238 /**
239 * Modify the population size.
240 * @param size the new population size.
241 * @throws IllegalArgumentException if <code>size</code> is not positive.
242 */
243 public void setPopulationSize(int size) {
244 if(size <= 0){
245 throw MathRuntimeException.createIllegalArgumentException(
246 "population size must be positive ({0})",
247 size);
248 }
249 populationSize = size;
250 }
251
252 /**
253 * Modify the sample size.
254 * @param size the new sample size.
255 * @throws IllegalArgumentException if <code>size</code> is negative.
256 */
257 public void setSampleSize(int size) {
258 if (size < 0) {
259 throw MathRuntimeException.createIllegalArgumentException(
260 "sample size must be positive ({0})",
261 size);
262 }
263 sampleSize = size;
264 }
265
266 /**
267 * For this distribution, X, this method returns P(X ≥ x).
268 * @param x the value at which the CDF is evaluated.
269 * @return upper tail CDF for this distribution.
270 * @since 1.1
271 */
272 public double upperCumulativeProbability(int x) {
273 double ret;
274
275 int n = getPopulationSize();
276 int m = getNumberOfSuccesses();
277 int k = getSampleSize();
278
279 int[] domain = getDomain(n, m, k);
280 if (x < domain[0]) {
281 ret = 1.0;
282 } else if(x > domain[1]) {
283 ret = 0.0;
284 } else {
285 ret = innerCumulativeProbability(domain[1], x, -1, n, m, k);
286 }
287
288 return ret;
289 }
290
291 /**
292 * For this distribution, X, this method returns P(x0 ≤ X ≤ x1). This
293 * probability is computed by summing the point probabilities for the values
294 * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx.
295 * @param x0 the inclusive, lower bound
296 * @param x1 the inclusive, upper bound
297 * @param dx the direction of summation. 1 indicates summing from x0 to x1.
298 * 0 indicates summing from x1 to x0.
299 * @param n the population size.
300 * @param m number of successes in the population.
301 * @param k the sample size.
302 * @return P(x0 ≤ X ≤ x1).
303 */
304 private double innerCumulativeProbability(
305 int x0, int x1, int dx, int n, int m, int k)
306 {
307 double ret = probability(n, m, k, x0);
308 while (x0 != x1) {
309 x0 += dx;
310 ret += probability(n, m, k, x0);
311 }
312 return ret;
313 }
314 }