1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat.descriptive;
18
19 import java.io.Serializable;
20
21 import org.apache.commons.discovery.tools.DiscoverClass;
22 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
23 import org.apache.commons.math.stat.descriptive.moment.Mean;
24 import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
25 import org.apache.commons.math.stat.descriptive.moment.Variance;
26 import org.apache.commons.math.stat.descriptive.rank.Max;
27 import org.apache.commons.math.stat.descriptive.rank.Min;
28 import org.apache.commons.math.stat.descriptive.summary.Sum;
29 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
30 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
31 import org.apache.commons.math.util.MathUtils;
32
33 /**
34 * <p>Computes summary statistics for a stream of data values added using the
35 * {@link #addValue(double) addValue} method. The data values are not stored in
36 * memory, so this class can be used to compute statistics for very large
37 * data streams.</p>
38 *
39 * <p>The {@link StorelessUnivariateStatistic} instances used to maintain
40 * summary state and compute statistics are configurable via setters.
41 * For example, the default implementation for the variance can be overridden by
42 * calling {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual
43 * parameters to these methods must implement the
44 * {@link StorelessUnivariateStatistic} interface and configuration must be
45 * completed before <code>addValue</code> is called. No configuration is
46 * necessary to use the default, commons-math provided implementations.</p>
47 *
48 * <p>Note: This class is not thread-safe. Use
49 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
50 * threads is required.</p>
51 *
52 * @version $Revision: 620312 $ $Date: 2008-02-10 12:28:59 -0700 (Sun, 10 Feb 2008) $
53 */
54 public class SummaryStatistics implements StatisticalSummary, Serializable {
55
56 /** Serialization UID */
57 private static final long serialVersionUID = -3346512372447011854L;
58
59 /**
60 * Create an instance of a <code>SummaryStatistics</code>
61 *
62 * @param cls the type of <code>SummaryStatistics</code> object to
63 * create.
64 * @return a new instance.
65 * @deprecated to be removed in commons-math 2.0
66 * @throws InstantiationException is thrown if the object can not be
67 * created.
68 * @throws IllegalAccessException is thrown if the type's default
69 * constructor is not accessible.
70 */
71 public static SummaryStatistics newInstance(Class cls) throws
72 InstantiationException, IllegalAccessException {
73 return (SummaryStatistics)cls.newInstance();
74 }
75
76 /**
77 * Create an instance of a <code>SummaryStatistics</code>
78 *
79 * @return a new SummaryStatistics instance.
80 * @deprecated to be removed in commons-math 2.0
81 */
82 public static SummaryStatistics newInstance() {
83 SummaryStatistics instance = null;
84 try {
85 DiscoverClass dc = new DiscoverClass();
86 instance = (SummaryStatistics) dc.newInstance(
87 SummaryStatistics.class,
88 "org.apache.commons.math.stat.descriptive.SummaryStatisticsImpl");
89 } catch(Throwable t) {
90 return new SummaryStatisticsImpl();
91 }
92 return instance;
93 }
94
95 /**
96 * Construct a SummaryStatistics instance
97 */
98 public SummaryStatistics() {
99 }
100
101 /** count of values that have been added */
102 protected long n = 0;
103
104 /** SecondMoment is used to compute the mean and variance */
105 protected SecondMoment secondMoment = new SecondMoment();
106
107 /** sum of values that have been added */
108 protected Sum sum = new Sum();
109
110 /** sum of the square of each value that has been added */
111 protected SumOfSquares sumsq = new SumOfSquares();
112
113 /** min of values that have been added */
114 protected Min min = new Min();
115
116 /** max of values that have been added */
117 protected Max max = new Max();
118
119 /** sumLog of values that have been added */
120 protected SumOfLogs sumLog = new SumOfLogs();
121
122 /** geoMean of values that have been added */
123 protected GeometricMean geoMean = new GeometricMean(sumLog);
124
125 /** mean of values that have been added */
126 protected Mean mean = new Mean();
127
128 /** variance of values that have been added */
129 protected Variance variance = new Variance();
130
131 /** Sum statistic implementation - can be reset by setter. */
132 private StorelessUnivariateStatistic sumImpl = sum;
133
134 /** Sum of squares statistic implementation - can be reset by setter. */
135 private StorelessUnivariateStatistic sumsqImpl = sumsq;
136
137 /** Minimum statistic implementation - can be reset by setter. */
138 private StorelessUnivariateStatistic minImpl = min;
139
140 /** Maximum statistic implementation - can be reset by setter. */
141 private StorelessUnivariateStatistic maxImpl = max;
142
143 /** Sum of log statistic implementation - can be reset by setter. */
144 private StorelessUnivariateStatistic sumLogImpl = sumLog;
145
146 /** Geometric mean statistic implementation - can be reset by setter. */
147 private StorelessUnivariateStatistic geoMeanImpl = geoMean;
148
149 /** Mean statistic implementation - can be reset by setter. */
150 private StorelessUnivariateStatistic meanImpl = mean;
151
152 /** Variance statistic implementation - can be reset by setter. */
153 private StorelessUnivariateStatistic varianceImpl = variance;
154
155 /**
156 * Return a {@link StatisticalSummaryValues} instance reporting current
157 * statistics.
158 *
159 * @return Current values of statistics
160 */
161 public StatisticalSummary getSummary() {
162 return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
163 getMax(), getMin(), getSum());
164 }
165
166 /**
167 * Add a value to the data
168 *
169 * @param value the value to add
170 */
171 public void addValue(double value) {
172 sumImpl.increment(value);
173 sumsqImpl.increment(value);
174 minImpl.increment(value);
175 maxImpl.increment(value);
176 sumLogImpl.increment(value);
177 secondMoment.increment(value);
178 // If mean, variance or geomean have been overridden,
179 // need to increment these
180 if (!(meanImpl instanceof Mean)) {
181 meanImpl.increment(value);
182 }
183 if (!(varianceImpl instanceof Variance)) {
184 varianceImpl.increment(value);
185 }
186 if (!(geoMeanImpl instanceof GeometricMean)) {
187 geoMeanImpl.increment(value);
188 }
189 n++;
190 }
191
192 /**
193 * Returns the number of available values
194 * @return The number of available values
195 */
196 public long getN() {
197 return n;
198 }
199
200 /**
201 * Returns the sum of the values that have been added
202 * @return The sum or <code>Double.NaN</code> if no values have been added
203 */
204 public double getSum() {
205 return sumImpl.getResult();
206 }
207
208 /**
209 * Returns the sum of the squares of the values that have been added.
210 * <p>
211 * Double.NaN is returned if no values have been added.</p>
212 *
213 * @return The sum of squares
214 */
215 public double getSumsq() {
216 return sumsqImpl.getResult();
217 }
218
219 /**
220 * Returns the mean of the values that have been added.
221 * <p>
222 * Double.NaN is returned if no values have been added.</p>
223 *
224 * @return the mean
225 */
226 public double getMean() {
227 if (mean == meanImpl) {
228 return new Mean(secondMoment).getResult();
229 } else {
230 return meanImpl.getResult();
231 }
232 }
233
234 /**
235 * Returns the standard deviation of the values that have been added.
236 * <p>
237 * Double.NaN is returned if no values have been added.</p>
238 *
239 * @return the standard deviation
240 */
241 public double getStandardDeviation() {
242 double stdDev = Double.NaN;
243 if (getN() > 0) {
244 if (getN() > 1) {
245 stdDev = Math.sqrt(getVariance());
246 } else {
247 stdDev = 0.0;
248 }
249 }
250 return (stdDev);
251 }
252
253 /**
254 * Returns the variance of the values that have been added.
255 * <p>
256 * Double.NaN is returned if no values have been added.</p>
257 *
258 * @return the variance
259 */
260 public double getVariance() {
261 if (varianceImpl == variance) {
262 return new Variance(secondMoment).getResult();
263 } else {
264 return varianceImpl.getResult();
265 }
266 }
267
268 /**
269 * Returns the maximum of the values that have been added.
270 * <p>
271 * Double.NaN is returned if no values have been added.</p>
272 *
273 * @return the maximum
274 */
275 public double getMax() {
276 return maxImpl.getResult();
277 }
278
279 /**
280 * Returns the minimum of the values that have been added.
281 * <p>
282 * Double.NaN is returned if no values have been added.</p>
283 *
284 * @return the minimum
285 */
286 public double getMin() {
287 return minImpl.getResult();
288 }
289
290 /**
291 * Returns the geometric mean of the values that have been added.
292 * <p>
293 * Double.NaN is returned if no values have been added.</p>
294 *
295 * @return the geometric mean
296 */
297 public double getGeometricMean() {
298 return geoMeanImpl.getResult();
299 }
300
301 /**
302 * Returns the sum of the logs of the values that have been added.
303 * <p>
304 * Double.NaN is returned if no values have been added.</p>
305 *
306 * @return the sum of logs
307 * @since 1.2
308 */
309 public double getSumOfLogs() {
310 return sumLogImpl.getResult();
311 }
312
313 /**
314 * Generates a text report displaying
315 * summary statistics from values that
316 * have been added.
317 * @return String with line feeds displaying statistics
318 * @since 1.2
319 */
320 public String toString() {
321 StringBuffer outBuffer = new StringBuffer();
322 outBuffer.append("SummaryStatistics:\n");
323 outBuffer.append("n: " + getN() + "\n");
324 outBuffer.append("min: " + getMin() + "\n");
325 outBuffer.append("max: " + getMax() + "\n");
326 outBuffer.append("mean: " + getMean() + "\n");
327 outBuffer.append("geometric mean: " + getGeometricMean() + "\n");
328 outBuffer.append("variance: " + getVariance() + "\n");
329 outBuffer.append("sum of squares: " + getSumsq() + "\n");
330 outBuffer.append("standard deviation: " + getStandardDeviation() + "\n");
331 outBuffer.append("sum of logs: " + getSumOfLogs() + "\n");
332 return outBuffer.toString();
333 }
334
335 /**
336 * Resets all statistics and storage
337 */
338 public void clear() {
339 this.n = 0;
340 minImpl.clear();
341 maxImpl.clear();
342 sumImpl.clear();
343 sumLogImpl.clear();
344 sumsqImpl.clear();
345 geoMeanImpl.clear();
346 secondMoment.clear();
347 if (meanImpl != mean) {
348 meanImpl.clear();
349 }
350 if (varianceImpl != variance) {
351 varianceImpl.clear();
352 }
353 }
354
355 /**
356 * Returns true iff <code>object</code> is a <code>SummaryStatistics</code>
357 * instance and all statistics have the same values as this.
358 * @param object the object to test equality against.
359 * @return true if object equals this
360 */
361 public boolean equals(Object object) {
362 if (object == this ) {
363 return true;
364 }
365 if (object instanceof SummaryStatistics == false) {
366 return false;
367 }
368 SummaryStatistics stat = (SummaryStatistics) object;
369 return (MathUtils.equals(stat.getGeometricMean(),
370 this.getGeometricMean()) &&
371 MathUtils.equals(stat.getMax(), this.getMax()) &&
372 MathUtils.equals(stat.getMean(),this.getMean()) &&
373 MathUtils.equals(stat.getMin(),this.getMin()) &&
374 MathUtils.equals(stat.getN(), this.getN()) &&
375 MathUtils.equals(stat.getSum(), this.getSum()) &&
376 MathUtils.equals(stat.getSumsq(),this.getSumsq()) &&
377 MathUtils.equals(stat.getVariance(),this.getVariance()));
378 }
379
380 /**
381 * Returns hash code based on values of statistics
382 *
383 * @return hash code
384 */
385 public int hashCode() {
386 int result = 31 + MathUtils.hash(getGeometricMean());
387 result = result * 31 + MathUtils.hash(getGeometricMean());
388 result = result * 31 + MathUtils.hash(getMax());
389 result = result * 31 + MathUtils.hash(getMean());
390 result = result * 31 + MathUtils.hash(getMin());
391 result = result * 31 + MathUtils.hash(getN());
392 result = result * 31 + MathUtils.hash(getSum());
393 result = result * 31 + MathUtils.hash(getSumsq());
394 result = result * 31 + MathUtils.hash(getVariance());
395 return result;
396 }
397
398 // Getters and setters for statistics implementations
399 /**
400 * Returns the currently configured Sum implementation
401 *
402 * @return the StorelessUnivariateStatistic implementing the sum
403 * @since 1.2
404 */
405 public StorelessUnivariateStatistic getSumImpl() {
406 return sumImpl;
407 }
408
409 /**
410 * <p>Sets the implementation for the Sum.</p>
411 * <p>This method must be activated before any data has been added - i.e.,
412 * before {@link #addValue(double) addValue} has been used to add data;
413 * otherwise an IllegalStateException will be thrown.</p>
414 *
415 * @param sumImpl the StorelessUnivariateStatistic instance to use
416 * for computing the Sum
417 * @throws IllegalStateException if data has already been added
418 * (i.e if n > 0)
419 * @since 1.2
420 */
421 public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
422 checkEmpty();
423 this.sumImpl = sumImpl;
424 }
425
426 /**
427 * Returns the currently configured sum of squares implementation
428 *
429 * @return the StorelessUnivariateStatistic implementing the sum of squares
430 * @since 1.2
431 */
432 public StorelessUnivariateStatistic getSumsqImpl() {
433 return sumsqImpl;
434 }
435
436 /**
437 * <p>Sets the implementation for the sum of squares.</p>
438 * <p>This method must be activated before any data has been added - i.e.,
439 * before {@link #addValue(double) addValue} has been used to add data;
440 * otherwise an IllegalStateException will be thrown.</p>
441 *
442 * @param sumsqImpl the StorelessUnivariateStatistic instance to use
443 * for computing the sum of squares
444 * @throws IllegalStateException if data has already been added
445 * (i.e if n > 0)
446 * @since 1.2
447 */
448 public void setSumsqImpl(
449 StorelessUnivariateStatistic sumsqImpl) {
450 checkEmpty();
451 this.sumsqImpl = sumsqImpl;
452 }
453
454 /**
455 * Returns the currently configured minimum implementation
456 *
457 * @return the StorelessUnivariateStatistic implementing the minimum
458 * @since 1.2
459 */
460 public StorelessUnivariateStatistic getMinImpl() {
461 return minImpl;
462 }
463
464 /**
465 * <p>Sets the implementation for the minimum.</p>
466 * <p>This method must be activated before any data has been added - i.e.,
467 * before {@link #addValue(double) addValue} has been used to add data;
468 * otherwise an IllegalStateException will be thrown.</p>
469 *
470 * @param minImpl the StorelessUnivariateStatistic instance to use
471 * for computing the minimum
472 * @throws IllegalStateException if data has already been added
473 * (i.e if n > 0)
474 * @since 1.2
475 */
476 public void setMinImpl(StorelessUnivariateStatistic minImpl) {
477 checkEmpty();
478 this.minImpl = minImpl;
479 }
480
481 /**
482 * Returns the currently configured maximum implementation
483 *
484 * @return the StorelessUnivariateStatistic implementing the maximum
485 * @since 1.2
486 */
487 public StorelessUnivariateStatistic getMaxImpl() {
488 return maxImpl;
489 }
490
491 /**
492 * <p>Sets the implementation for the maximum.</p>
493 * <p>This method must be activated before any data has been added - i.e.,
494 * before {@link #addValue(double) addValue} has been used to add data;
495 * otherwise an IllegalStateException will be thrown.</p>
496 *
497 * @param maxImpl the StorelessUnivariateStatistic instance to use
498 * for computing the maximum
499 * @throws IllegalStateException if data has already been added
500 * (i.e if n > 0)
501 * @since 1.2
502 */
503 public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
504 checkEmpty();
505 this.maxImpl = maxImpl;
506 }
507
508 /**
509 * Returns the currently configured sum of logs implementation
510 *
511 * @return the StorelessUnivariateStatistic implementing the log sum
512 * @since 1.2
513 */
514 public StorelessUnivariateStatistic getSumLogImpl() {
515 return sumLogImpl;
516 }
517
518 /**
519 * <p>Sets the implementation for the sum of logs.</p>
520 * <p>This method must be activated before any data has been added - i.e.,
521 * before {@link #addValue(double) addValue} has been used to add data;
522 * otherwise an IllegalStateException will be thrown.</p>
523 *
524 * @param sumLogImpl the StorelessUnivariateStatistic instance to use
525 * for computing the log sum
526 * @throws IllegalStateException if data has already been added
527 * (i.e if n > 0)
528 * @since 1.2
529 */
530 public void setSumLogImpl(
531 StorelessUnivariateStatistic sumLogImpl) {
532 checkEmpty();
533 this.sumLogImpl = sumLogImpl;
534 geoMean.setSumLogImpl(sumLogImpl);
535 }
536
537 /**
538 * Returns the currently configured geometric mean implementation
539 *
540 * @return the StorelessUnivariateStatistic implementing the geometric mean
541 * @since 1.2
542 */
543 public StorelessUnivariateStatistic getGeoMeanImpl() {
544 return geoMeanImpl;
545 }
546
547 /**
548 * <p>Sets the implementation for the geometric mean.</p>
549 * <p>This method must be activated before any data has been added - i.e.,
550 * before {@link #addValue(double) addValue} has been used to add data;
551 * otherwise an IllegalStateException will be thrown.</p>
552 *
553 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use
554 * for computing the geometric mean
555 * @throws IllegalStateException if data has already been added
556 * (i.e if n > 0)
557 * @since 1.2
558 */
559 public void setGeoMeanImpl(
560 StorelessUnivariateStatistic geoMeanImpl) {
561 checkEmpty();
562 this.geoMeanImpl = geoMeanImpl;
563 }
564
565 /**
566 * Returns the currently configured mean implementation
567 *
568 * @return the StorelessUnivariateStatistic implementing the mean
569 * @since 1.2
570 */
571 public StorelessUnivariateStatistic getMeanImpl() {
572 return meanImpl;
573 }
574
575 /**
576 * <p>Sets the implementation for the mean.</p>
577 * <p>This method must be activated before any data has been added - i.e.,
578 * before {@link #addValue(double) addValue} has been used to add data;
579 * otherwise an IllegalStateException will be thrown.</p>
580 *
581 * @param meanImpl the StorelessUnivariateStatistic instance to use
582 * for computing the mean
583 * @throws IllegalStateException if data has already been added
584 * (i.e if n > 0)
585 * @since 1.2
586 */
587 public void setMeanImpl(
588 StorelessUnivariateStatistic meanImpl) {
589 checkEmpty();
590 this.meanImpl = meanImpl;
591 }
592
593 /**
594 * Returns the currently configured variance implementation
595 *
596 * @return the StorelessUnivariateStatistic implementing the variance
597 * @since 1.2
598 */
599 public StorelessUnivariateStatistic getVarianceImpl() {
600 return varianceImpl;
601 }
602
603 /**
604 * <p>Sets the implementation for the variance.</p>
605 * <p>This method must be activated before any data has been added - i.e.,
606 * before {@link #addValue(double) addValue} has been used to add data;
607 * otherwise an IllegalStateException will be thrown.</p>
608 *
609 * @param varianceImpl the StorelessUnivariateStatistic instance to use
610 * for computing the variance
611 * @throws IllegalStateException if data has already been added
612 * (i.e if n > 0)
613 * @since 1.2
614 */
615 public void setVarianceImpl(
616 StorelessUnivariateStatistic varianceImpl) {
617 checkEmpty();
618 this.varianceImpl = varianceImpl;
619 }
620
621 /**
622 * Throws IllegalStateException if n > 0.
623 */
624 private void checkEmpty() {
625 if (n > 0) {
626 throw new IllegalStateException(
627 "Implementations must be configured before values are added.");
628 }
629 }
630
631 }