1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat.inference;
18
19 import org.apache.commons.math.MathException;
20 import org.apache.commons.math.stat.descriptive.summary.Sum;
21 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
22
23 import org.apache.commons.math.distribution.FDistribution;
24 import org.apache.commons.math.distribution.FDistributionImpl;
25
26 import java.util.Collection;
27 import java.util.Iterator;
28
29
30 /**
31 * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl}
32 * interface.
33 *
34 * <p>Uses the
35 * {@link org.apache.commons.math.distribution.FDistribution
36 * commons-math F Distribution implementation} to estimate exact p-values.</p>
37 *
38 * <p>This implementation is based on a description at
39 * http://faculty.vassar.edu/lowry/ch13pt1.html</p>
40 * <pre>
41 * Abbreviations: bg = between groups,
42 * wg = within groups,
43 * ss = sum squared deviations
44 * </pre>
45 *
46 * @since 1.2
47 * @version $Revision$ $Date$
48 */
49 public class OneWayAnovaImpl implements OneWayAnova {
50
51 /**
52 * Default constructor.
53 */
54 public OneWayAnovaImpl() {
55 }
56
57 /**
58 * {@inheritDoc}<p>
59 * This implementation computes the F statistic using the definitional
60 * formula<pre>
61 * F = msbg/mswg</pre>
62 * where<pre>
63 * msbg = between group mean square
64 * mswg = within group mean square</pre>
65 * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
66 * here</a></p>
67 */
68 public double anovaFValue(Collection categoryData)
69 throws IllegalArgumentException, MathException {
70 AnovaStats a = anovaStats(categoryData);
71 return a.F;
72 }
73
74 /**
75 * {@inheritDoc}<p>
76 * This implementation uses the
77 * {@link org.apache.commons.math.distribution.FDistribution
78 * commons-math F Distribution implementation} to estimate the exact
79 * p-value, using the formula<pre>
80 * p = 1 - cumulativeProbability(F)</pre>
81 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
82 * is the commons-math implementation of the F distribution.</p>
83 */
84 public double anovaPValue(Collection categoryData)
85 throws IllegalArgumentException, MathException {
86 AnovaStats a = anovaStats(categoryData);
87 FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg);
88 return 1.0 - fdist.cumulativeProbability(a.F);
89 }
90
91 /**
92 * {@inheritDoc}<p>
93 * This implementation uses the
94 * {@link org.apache.commons.math.distribution.FDistribution
95 * commons-math F Distribution implementation} to estimate the exact
96 * p-value, using the formula<pre>
97 * p = 1 - cumulativeProbability(F)</pre>
98 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
99 * is the commons-math implementation of the F distribution.</p>
100 * <p>True is returned iff the estimated p-value is less than alpha.</p>
101 */
102 public boolean anovaTest(Collection categoryData, double alpha)
103 throws IllegalArgumentException, MathException {
104 if ((alpha <= 0) || (alpha > 0.5)) {
105 throw new IllegalArgumentException("bad significance level: " + alpha);
106 }
107 return (anovaPValue(categoryData) < alpha);
108 }
109
110
111 /**
112 * This method actually does the calculations (except P-value).
113 *
114 * @param categoryData <code>Collection</code> of <code>double[]</code>
115 * arrays each containing data for one category
116 * @return computed AnovaStats
117 * @throws IllegalArgumentException if categoryData does not meet
118 * preconditions specified in the interface definition
119 * @throws MathException if an error occurs computing the Anova stats
120 */
121 private AnovaStats anovaStats(Collection categoryData)
122 throws IllegalArgumentException, MathException {
123
124 // check if we have enough categories
125 if (categoryData.size() < 2) {
126 throw new IllegalArgumentException(
127 "ANOVA: two or more categories required");
128 }
129
130 // check if each category has enough data and all is double[]
131 for (Iterator iterator = categoryData.iterator(); iterator.hasNext();) {
132 double[] array;
133 try {
134 array = (double[])iterator.next();
135 } catch (ClassCastException ex) {
136 throw new IllegalArgumentException(
137 "ANOVA: categoryData contains non-double[] elements.");
138 }
139 if (array.length <= 1) {
140 throw new IllegalArgumentException(
141 "ANOVA: one element of categoryData has fewer than 2 values.");
142 }
143 }
144
145 int dfwg = 0;
146 double sswg = 0;
147 Sum totsum = new Sum();
148 SumOfSquares totsumsq = new SumOfSquares();
149 int totnum = 0;
150
151 for (Iterator iterator = categoryData.iterator(); iterator.hasNext();) {
152 double[] data = (double[])iterator.next();
153
154 Sum sum = new Sum();
155 SumOfSquares sumsq = new SumOfSquares();
156 int num = 0;
157
158 for (int i = 0; i < data.length; i++) {
159 double val = data[i];
160
161 // within category
162 num++;
163 sum.increment(val);
164 sumsq.increment(val);
165
166 // for all categories
167 totnum++;
168 totsum.increment(val);
169 totsumsq.increment(val);
170 }
171 dfwg += num - 1;
172 double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num;
173 sswg += ss;
174 }
175 double sst = totsumsq.getResult() - totsum.getResult() *
176 totsum.getResult()/totnum;
177 double ssbg = sst - sswg;
178 int dfbg = categoryData.size() - 1;
179 double msbg = ssbg/dfbg;
180 double mswg = sswg/dfwg;
181 double F = msbg/mswg;
182
183 return new AnovaStats(dfbg, dfwg, F);
184 }
185
186 /**
187 Convenience class to pass dfbg,dfwg,F values around within AnovaImpl.
188 No get/set methods provided.
189 */
190 private static class AnovaStats {
191 private int dfbg;
192 private int dfwg;
193 private double F;
194
195 /**
196 * Constructor
197 * @param dfbg degrees of freedom in numerator (between groups)
198 * @param dfwg degrees of freedom in denominator (within groups)
199 * @param F statistic
200 */
201 AnovaStats(int dfbg, int dfwg, double F) {
202 this.dfbg = dfbg;
203 this.dfwg = dfwg;
204 this.F = F;
205 }
206 }
207
208 }