1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.math.distribution;
19
20 import java.io.Serializable;
21
22 import org.apache.commons.math.util.MathUtils;
23
24 /**
25 * The default implementation of {@link HypergeometricDistribution}.
26 *
27 * @version $Revision: 480440 $ $Date: 2006-11-29 00:14:12 -0700 (Wed, 29 Nov 2006) $
28 */
29 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
30 implements HypergeometricDistribution, Serializable
31 {
32
33 /** Serializable version identifier */
34 private static final long serialVersionUID = -436928820673516179L;
35
36 /** The number of successes in the population. */
37 private int numberOfSuccesses;
38
39 /** The population size. */
40 private int populationSize;
41
42 /** The sample size. */
43 private int sampleSize;
44
45 /**
46 * Construct a new hypergeometric distribution with the given the population
47 * size, the number of successes in the population, and the sample size.
48 * @param populationSize the population size.
49 * @param numberOfSuccesses number of successes in the population.
50 * @param sampleSize the sample size.
51 */
52 public HypergeometricDistributionImpl(int populationSize,
53 int numberOfSuccesses, int sampleSize) {
54 super();
55 if (numberOfSuccesses > populationSize) {
56 throw new IllegalArgumentException(
57 "number of successes must be less than or equal to " +
58 "population size");
59 }
60 if (sampleSize > populationSize) {
61 throw new IllegalArgumentException(
62 "sample size must be less than or equal to population size");
63 }
64 setPopulationSize(populationSize);
65 setSampleSize(sampleSize);
66 setNumberOfSuccesses(numberOfSuccesses);
67 }
68
69 /**
70 * For this disbution, X, this method returns P(X ≤ x).
71 * @param x the value at which the PDF is evaluated.
72 * @return PDF for this distribution.
73 */
74 public double cumulativeProbability(int x) {
75 double ret;
76
77 int n = getPopulationSize();
78 int m = getNumberOfSuccesses();
79 int k = getSampleSize();
80
81 int[] domain = getDomain(n, m, k);
82 if (x < domain[0]) {
83 ret = 0.0;
84 } else if(x >= domain[1]) {
85 ret = 1.0;
86 } else {
87 ret = innerCumulativeProbability(domain[0], x, 1, n, m, k);
88 }
89
90 return ret;
91 }
92
93 /**
94 * Return the domain for the given hypergeometric distribution parameters.
95 * @param n the population size.
96 * @param m number of successes in the population.
97 * @param k the sample size.
98 * @return a two element array containing the lower and upper bounds of the
99 * hypergeometric distribution.
100 */
101 private int[] getDomain(int n, int m, int k){
102 return new int[]{
103 getLowerDomain(n, m, k),
104 getUpperDomain(m, k)
105 };
106 }
107
108 /**
109 * Access the domain value lower bound, based on <code>p</code>, used to
110 * bracket a PDF root.
111 *
112 * @param p the desired probability for the critical value
113 * @return domain value lower bound, i.e.
114 * P(X < <i>lower bound</i>) < <code>p</code>
115 */
116 protected int getDomainLowerBound(double p) {
117 return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(),
118 getSampleSize());
119 }
120
121 /**
122 * Access the domain value upper bound, based on <code>p</code>, used to
123 * bracket a PDF root.
124 *
125 * @param p the desired probability for the critical value
126 * @return domain value upper bound, i.e.
127 * P(X < <i>upper bound</i>) > <code>p</code>
128 */
129 protected int getDomainUpperBound(double p) {
130 return getUpperDomain(getSampleSize(), getNumberOfSuccesses());
131 }
132
133 /**
134 * Return the lowest domain value for the given hypergeometric distribution
135 * parameters.
136 * @param n the population size.
137 * @param m number of successes in the population.
138 * @param k the sample size.
139 * @return the lowest domain value of the hypergeometric distribution.
140 */
141 private int getLowerDomain(int n, int m, int k) {
142 return Math.max(0, m - (n - k));
143 }
144
145 /**
146 * Access the number of successes.
147 * @return the number of successes.
148 */
149 public int getNumberOfSuccesses() {
150 return numberOfSuccesses;
151 }
152
153 /**
154 * Access the population size.
155 * @return the population size.
156 */
157 public int getPopulationSize() {
158 return populationSize;
159 }
160
161 /**
162 * Access the sample size.
163 * @return the sample size.
164 */
165 public int getSampleSize() {
166 return sampleSize;
167 }
168
169 /**
170 * Return the highest domain value for the given hypergeometric distribution
171 * parameters.
172 * @param m number of successes in the population.
173 * @param k the sample size.
174 * @return the highest domain value of the hypergeometric distribution.
175 */
176 private int getUpperDomain(int m, int k){
177 return Math.min(k, m);
178 }
179
180 /**
181 * For this disbution, X, this method returns P(X = x).
182 *
183 * @param x the value at which the PMF is evaluated.
184 * @return PMF for this distribution.
185 */
186 public double probability(int x) {
187 double ret;
188
189 int n = getPopulationSize();
190 int m = getNumberOfSuccesses();
191 int k = getSampleSize();
192
193 int[] domain = getDomain(n, m, k);
194 if(x < domain[0] || x > domain[1]){
195 ret = 0.0;
196 } else {
197 ret = probability(n, m, k, x);
198 }
199
200 return ret;
201 }
202
203 /**
204 * For the disbution, X, defined by the given hypergeometric distribution
205 * parameters, this method returns P(X = x).
206 *
207 * @param n the population size.
208 * @param m number of successes in the population.
209 * @param k the sample size.
210 * @param x the value at which the PMF is evaluated.
211 * @return PMF for the distribution.
212 */
213 private double probability(int n, int m, int k, int x) {
214 return Math.exp(MathUtils.binomialCoefficientLog(m, x) +
215 MathUtils.binomialCoefficientLog(n - m, k - x) -
216 MathUtils.binomialCoefficientLog(n, k));
217 }
218
219 /**
220 * Modify the number of successes.
221 * @param num the new number of successes.
222 * @throws IllegalArgumentException if <code>num</code> is negative.
223 */
224 public void setNumberOfSuccesses(int num) {
225 if(num < 0){
226 throw new IllegalArgumentException(
227 "number of successes must be non-negative.");
228 }
229 numberOfSuccesses = num;
230 }
231
232 /**
233 * Modify the population size.
234 * @param size the new population size.
235 * @throws IllegalArgumentException if <code>size</code> is not positive.
236 */
237 public void setPopulationSize(int size) {
238 if(size <= 0){
239 throw new IllegalArgumentException(
240 "population size must be positive.");
241 }
242 populationSize = size;
243 }
244
245 /**
246 * Modify the sample size.
247 * @param size the new sample size.
248 * @throws IllegalArgumentException if <code>size</code> is negative.
249 */
250 public void setSampleSize(int size) {
251 if (size < 0) {
252 throw new IllegalArgumentException(
253 "sample size must be non-negative.");
254 }
255 sampleSize = size;
256 }
257
258 /**
259 * For this disbution, X, this method returns P(X ≥ x).
260 * @param x the value at which the CDF is evaluated.
261 * @return upper tail CDF for this distribution.
262 * @since 1.1
263 */
264 public double upperCumulativeProbability(int x) {
265 double ret;
266
267 int n = getPopulationSize();
268 int m = getNumberOfSuccesses();
269 int k = getSampleSize();
270
271 int[] domain = getDomain(n, m, k);
272 if (x < domain[0]) {
273 ret = 1.0;
274 } else if(x > domain[1]) {
275 ret = 0.0;
276 } else {
277 ret = innerCumulativeProbability(domain[1], x, -1, n, m, k);
278 }
279
280 return ret;
281 }
282
283 /**
284 * For this disbution, X, this method returns P(x0 ≤ X ≤ x1). This
285 * probability is computed by summing the point probabilities for the values
286 * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx.
287 * @param x0 the inclusive, lower bound
288 * @param x1 the inclusive, upper bound
289 * @param dx the direction of summation. 1 indicates summing from x0 to x1.
290 * 0 indicates summing from x1 to x0.
291 * @param n the population size.
292 * @param m number of successes in the population.
293 * @param k the sample size.
294 * @return P(x0 ≤ X ≤ x1).
295 */
296 private double innerCumulativeProbability(
297 int x0, int x1, int dx, int n, int m, int k)
298 {
299 double ret = probability(n, m, k, x0);
300 while (x0 != x1) {
301 x0 += dx;
302 ret += probability(n, m, k, x0);
303 }
304 return ret;
305 }
306 }