1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat.inference;
18
19 import org.apache.commons.math.MathException;
20
21 /**
22 * An interface for Chi-Square tests.
23 * <p>This interface handles only known distributions. If the distribution is
24 * unknown and should be provided by a sample, then the {@link UnknownDistributionChiSquareTest
25 * UnknownDistributionChiSquareTest} extended interface should be used instead.</p>
26 * @version $Revision: 617953 $ $Date: 2008-02-02 22:54:00 -0700 (Sat, 02 Feb 2008) $
27 */
28 public interface ChiSquareTest {
29
30 /**
31 * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
32 * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
33 * frequency counts.
34 * <p>
35 * This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
36 * the observed counts follow the expected distribution.</p>
37 * <p>
38 * <strong>Preconditions</strong>: <ul>
39 * <li>Expected counts must all be positive.
40 * </li>
41 * <li>Observed counts must all be >= 0.
42 * </li>
43 * <li>The observed and expected arrays must have the same length and
44 * their common length must be at least 2.
45 * </li></ul></p><p>
46 * If any of the preconditions are not met, an
47 * <code>IllegalArgumentException</code> is thrown.</p>
48 *
49 * @param observed array of observed frequency counts
50 * @param expected array of expected frequency counts
51 * @return chiSquare statistic
52 * @throws IllegalArgumentException if preconditions are not met
53 */
54 double chiSquare(double[] expected, long[] observed)
55 throws IllegalArgumentException;
56
57 /**
58 * Returns the <i>observed significance level</i>, or <a href=
59 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
60 * p-value</a>, associated with a
61 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
62 * Chi-square goodness of fit test</a> comparing the <code>observed</code>
63 * frequency counts to those in the <code>expected</code> array.
64 * <p>
65 * The number returned is the smallest significance level at which one can reject
66 * the null hypothesis that the observed counts conform to the frequency distribution
67 * described by the expected counts.</p>
68 * <p>
69 * <strong>Preconditions</strong>: <ul>
70 * <li>Expected counts must all be positive.
71 * </li>
72 * <li>Observed counts must all be >= 0.
73 * </li>
74 * <li>The observed and expected arrays must have the same length and
75 * their common length must be at least 2.
76 * </li></ul></p><p>
77 * If any of the preconditions are not met, an
78 * <code>IllegalArgumentException</code> is thrown.</p>
79 *
80 * @param observed array of observed frequency counts
81 * @param expected array of expected frequency counts
82 * @return p-value
83 * @throws IllegalArgumentException if preconditions are not met
84 * @throws MathException if an error occurs computing the p-value
85 */
86 double chiSquareTest(double[] expected, long[] observed)
87 throws IllegalArgumentException, MathException;
88
89 /**
90 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
91 * Chi-square goodness of fit test</a> evaluating the null hypothesis that the observed counts
92 * conform to the frequency distribution described by the expected counts, with
93 * significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected
94 * with 100 * (1 - alpha) percent confidence.
95 * <p>
96 * <strong>Example:</strong><br>
97 * To test the hypothesis that <code>observed</code> follows
98 * <code>expected</code> at the 99% level, use </p><p>
99 * <code>chiSquareTest(expected, observed, 0.01) </code></p>
100 * <p>
101 * <strong>Preconditions</strong>: <ul>
102 * <li>Expected counts must all be positive.
103 * </li>
104 * <li>Observed counts must all be >= 0.
105 * </li>
106 * <li>The observed and expected arrays must have the same length and
107 * their common length must be at least 2.
108 * <li> <code> 0 < alpha < 0.5 </code>
109 * </li></ul></p><p>
110 * If any of the preconditions are not met, an
111 * <code>IllegalArgumentException</code> is thrown.</p>
112 *
113 * @param observed array of observed frequency counts
114 * @param expected array of expected frequency counts
115 * @param alpha significance level of the test
116 * @return true iff null hypothesis can be rejected with confidence
117 * 1 - alpha
118 * @throws IllegalArgumentException if preconditions are not met
119 * @throws MathException if an error occurs performing the test
120 */
121 boolean chiSquareTest(double[] expected, long[] observed, double alpha)
122 throws IllegalArgumentException, MathException;
123
124 /**
125 * Computes the Chi-Square statistic associated with a
126 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
127 * chi-square test of independence</a> based on the input <code>counts</code>
128 * array, viewed as a two-way table.
129 * <p>
130 * The rows of the 2-way table are
131 * <code>count[0], ... , count[count.length - 1] </code></p>
132 * <p>
133 * <strong>Preconditions</strong>: <ul>
134 * <li>All counts must be >= 0.
135 * </li>
136 * <li>The count array must be rectangular (i.e. all count[i] subarrays
137 * must have the same length).
138 * </li>
139 * <li>The 2-way table represented by <code>counts</code> must have at
140 * least 2 columns and at least 2 rows.
141 * </li>
142 * </li></ul></p><p>
143 * If any of the preconditions are not met, an
144 * <code>IllegalArgumentException</code> is thrown.</p>
145 *
146 * @param counts array representation of 2-way table
147 * @return chiSquare statistic
148 * @throws IllegalArgumentException if preconditions are not met
149 */
150 double chiSquare(long[][] counts)
151 throws IllegalArgumentException;
152
153 /**
154 * Returns the <i>observed significance level</i>, or <a href=
155 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
156 * p-value</a>, associated with a
157 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
158 * chi-square test of independence</a> based on the input <code>counts</code>
159 * array, viewed as a two-way table.
160 * <p>
161 * The rows of the 2-way table are
162 * <code>count[0], ... , count[count.length - 1] </code></p>
163 * <p>
164 * <strong>Preconditions</strong>: <ul>
165 * <li>All counts must be >= 0.
166 * </li>
167 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
168 * </li>
169 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
170 * at least 2 rows.
171 * </li>
172 * </li></ul></p><p>
173 * If any of the preconditions are not met, an
174 * <code>IllegalArgumentException</code> is thrown.</p>
175 *
176 * @param counts array representation of 2-way table
177 * @return p-value
178 * @throws IllegalArgumentException if preconditions are not met
179 * @throws MathException if an error occurs computing the p-value
180 */
181 double chiSquareTest(long[][] counts)
182 throws IllegalArgumentException, MathException;
183
184 /**
185 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
186 * chi-square test of independence</a> evaluating the null hypothesis that the classifications
187 * represented by the counts in the columns of the input 2-way table are independent of the rows,
188 * with significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected
189 * with 100 * (1 - alpha) percent confidence.
190 * <p>
191 * The rows of the 2-way table are
192 * <code>count[0], ... , count[count.length - 1] </code></p>
193 * <p>
194 * <strong>Example:</strong><br>
195 * To test the null hypothesis that the counts in
196 * <code>count[0], ... , count[count.length - 1] </code>
197 * all correspond to the same underlying probability distribution at the 99% level, use </p><p>
198 * <code>chiSquareTest(counts, 0.01) </code></p>
199 * <p>
200 * <strong>Preconditions</strong>: <ul>
201 * <li>All counts must be >= 0.
202 * </li>
203 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
204 * </li>
205 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
206 * at least 2 rows.
207 * </li>
208 * </li></ul></p><p>
209 * If any of the preconditions are not met, an
210 * <code>IllegalArgumentException</code> is thrown.</p>
211 *
212 * @param counts array representation of 2-way table
213 * @param alpha significance level of the test
214 * @return true iff null hypothesis can be rejected with confidence
215 * 1 - alpha
216 * @throws IllegalArgumentException if preconditions are not met
217 * @throws MathException if an error occurs performing the test
218 */
219 boolean chiSquareTest(long[][] counts, double alpha)
220 throws IllegalArgumentException, MathException;
221
222 }