1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat.descriptive;
18
19 import java.io.Serializable;
20 import java.lang.reflect.InvocationTargetException;
21 import java.util.Arrays;
22
23 import org.apache.commons.discovery.tools.DiscoverClass;
24 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
25 import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
26 import org.apache.commons.math.stat.descriptive.moment.Mean;
27 import org.apache.commons.math.stat.descriptive.moment.Skewness;
28 import org.apache.commons.math.stat.descriptive.moment.Variance;
29 import org.apache.commons.math.stat.descriptive.rank.Max;
30 import org.apache.commons.math.stat.descriptive.rank.Min;
31 import org.apache.commons.math.stat.descriptive.rank.Percentile;
32 import org.apache.commons.math.stat.descriptive.summary.Sum;
33 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
34 import org.apache.commons.math.util.ResizableDoubleArray;
35
36
37 /**
38 * Maintains a dataset of values of a single variable and computes descriptive
39 * statistics based on stored data. The {@link #getWindowSize() windowSize}
40 * property sets a limit on the number of values that can be stored in the
41 * dataset. The default value, INFINITE_WINDOW, puts no limit on the size of
42 * the dataset. This value should be used with caution, as the backing store
43 * will grow without bound in this case. For very large datasets,
44 * {@link SummaryStatistics}, which does not store the dataset, should be used
45 * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and
46 * more values are added than can be stored in the dataset, new values are
47 * added in a "rolling" manner, with new values replacing the "oldest" values
48 * in the dataset.
49 *
50 * <p>Note: this class is not threadsafe. Use
51 * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple
52 * threads is required.</p>
53 *
54 * @version $Revision: 620318 $ $Date: 2008-02-10 13:17:24 -0700 (Sun, 10 Feb 2008) $
55 */
56 public class DescriptiveStatistics implements StatisticalSummary, Serializable {
57
58 /** Serialization UID */
59 private static final long serialVersionUID = -2734185686570407433L;
60
61 /** hold the window size **/
62 protected int windowSize = INFINITE_WINDOW;
63
64 /**
65 * Stored data values
66 */
67 protected ResizableDoubleArray eDA = new ResizableDoubleArray();
68
69 /** Mean statistic implementation - can be reset by setter. */
70 private UnivariateStatistic meanImpl = new Mean();
71
72 /** Geometric mean statistic implementation - can be reset by setter. */
73 private UnivariateStatistic geometricMeanImpl = new GeometricMean();
74
75 /** Kurtosis statistic implementation - can be reset by setter. */
76 private UnivariateStatistic kurtosisImpl = new Kurtosis();
77
78 /** Maximum statistic implementation - can be reset by setter. */
79 private UnivariateStatistic maxImpl = new Max();
80
81 /** Minimum statistic implementation - can be reset by setter. */
82 private UnivariateStatistic minImpl = new Min();
83
84 /** Percentile statistic implementation - can be reset by setter. */
85 private UnivariateStatistic percentileImpl = new Percentile();
86
87 /** Skewness statistic implementation - can be reset by setter. */
88 private UnivariateStatistic skewnessImpl = new Skewness();
89
90 /** Variance statistic implementation - can be reset by setter. */
91 private UnivariateStatistic varianceImpl = new Variance();
92
93 /** Sum of squares statistic implementation - can be reset by setter. */
94 private UnivariateStatistic sumsqImpl = new SumOfSquares();
95
96 /** Sum statistic implementation - can be reset by setter. */
97 private UnivariateStatistic sumImpl = new Sum();
98
99 /**
100 * Construct a DescriptiveStatistics instance with an infinite window
101 */
102 public DescriptiveStatistics() {
103 }
104
105 /**
106 * Construct a DescriptiveStatistics instance with the specified window
107 *
108 * @param window the window size.
109 */
110 public DescriptiveStatistics(int window) {
111 super();
112 setWindowSize(window);
113 }
114
115 /**
116 * Create an instance of a <code>DescriptiveStatistics</code>
117 * @param cls the type of <code>DescriptiveStatistics</code> object to
118 * create.
119 * @return a new instance.
120 * @throws InstantiationException is thrown if the object can not be
121 * created.
122 * @throws IllegalAccessException is thrown if the type's default
123 * constructor is not accessible.
124 * @deprecated to be removed in commons-math 2.0
125 */
126 public static DescriptiveStatistics newInstance(Class cls) throws InstantiationException, IllegalAccessException {
127 return (DescriptiveStatistics)cls.newInstance();
128 }
129
130 /**
131 * Create an instance of a <code>DescriptiveStatistics</code>
132 * @return a new DescriptiveStatistics instance.
133 * @deprecated to be removed in commons-math 2.0
134 */
135 public static DescriptiveStatistics newInstance() {
136 DescriptiveStatistics factory = null;
137 try {
138 DiscoverClass dc = new DiscoverClass();
139 factory = (DescriptiveStatistics) dc.newInstance(
140 DescriptiveStatistics.class,
141 "org.apache.commons.math.stat.descriptive.DescriptiveStatisticsImpl");
142 } catch(Throwable t) {
143 return new DescriptiveStatisticsImpl();
144 }
145 return factory;
146 }
147
148 /**
149 * Represents an infinite window size. When the {@link #getWindowSize()}
150 * returns this value, there is no limit to the number of data values
151 * that can be stored in the dataset.
152 */
153 public static final int INFINITE_WINDOW = -1;
154
155 /**
156 * Adds the value to the dataset. If the dataset is at the maximum size
157 * (i.e., the number of stored elements equals the currently configured
158 * windowSize), the first (oldest) element in the dataset is discarded
159 * to make room for the new value.
160 *
161 * @param v the value to be added
162 */
163 public void addValue(double v) {
164 if (windowSize != INFINITE_WINDOW) {
165 if (getN() == windowSize) {
166 eDA.addElementRolling(v);
167 } else if (getN() < windowSize) {
168 eDA.addElement(v);
169 }
170 } else {
171 eDA.addElement(v);
172 }
173 }
174
175 /**
176 * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
177 * arithmetic mean </a> of the available values
178 * @return The mean or Double.NaN if no values have been added.
179 */
180 public double getMean() {
181 return apply(meanImpl);
182 }
183
184 /**
185 * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
186 * geometric mean </a> of the available values
187 * @return The geometricMean, Double.NaN if no values have been added,
188 * or if the productof the available values is less than or equal to 0.
189 */
190 public double getGeometricMean() {
191 return apply(geometricMeanImpl);
192 }
193
194 /**
195 * Returns the variance of the available values.
196 * @return The variance, Double.NaN if no values have been added
197 * or 0.0 for a single value set.
198 */
199 public double getVariance() {
200 return apply(varianceImpl);
201 }
202
203 /**
204 * Returns the standard deviation of the available values.
205 * @return The standard deviation, Double.NaN if no values have been added
206 * or 0.0 for a single value set.
207 */
208 public double getStandardDeviation() {
209 double stdDev = Double.NaN;
210 if (getN() > 0) {
211 if (getN() > 1) {
212 stdDev = Math.sqrt(getVariance());
213 } else {
214 stdDev = 0.0;
215 }
216 }
217 return (stdDev);
218 }
219
220 /**
221 * Returns the skewness of the available values. Skewness is a
222 * measure of the asymmetry of a given distribution.
223 * @return The skewness, Double.NaN if no values have been added
224 * or 0.0 for a value set <=2.
225 */
226 public double getSkewness() {
227 return apply(skewnessImpl);
228 }
229
230 /**
231 * Returns the Kurtosis of the available values. Kurtosis is a
232 * measure of the "peakedness" of a distribution
233 * @return The kurtosis, Double.NaN if no values have been added, or 0.0
234 * for a value set <=3.
235 */
236 public double getKurtosis() {
237 return apply(kurtosisImpl);
238 }
239
240 /**
241 * Returns the maximum of the available values
242 * @return The max or Double.NaN if no values have been added.
243 */
244 public double getMax() {
245 return apply(maxImpl);
246 }
247
248 /**
249 * Returns the minimum of the available values
250 * @return The min or Double.NaN if no values have been added.
251 */
252 public double getMin() {
253 return apply(minImpl);
254 }
255
256 /**
257 * Returns the number of available values
258 * @return The number of available values
259 */
260 public long getN() {
261 return eDA.getNumElements();
262 }
263
264 /**
265 * Returns the sum of the values that have been added to Univariate.
266 * @return The sum or Double.NaN if no values have been added
267 */
268 public double getSum() {
269 return apply(sumImpl);
270 }
271
272 /**
273 * Returns the sum of the squares of the available values.
274 * @return The sum of the squares or Double.NaN if no
275 * values have been added.
276 */
277 public double getSumsq() {
278 return apply(sumsqImpl);
279 }
280
281 /**
282 * Resets all statistics and storage
283 */
284 public void clear() {
285 eDA.clear();
286 }
287
288
289 /**
290 * Returns the maximum number of values that can be stored in the
291 * dataset, or INFINITE_WINDOW (-1) if there is no limit.
292 *
293 * @return The current window size or -1 if its Infinite.
294 */
295 public int getWindowSize() {
296 return windowSize;
297 }
298
299 /**
300 * WindowSize controls the number of values which contribute
301 * to the reported statistics. For example, if
302 * windowSize is set to 3 and the values {1,2,3,4,5}
303 * have been added <strong> in that order</strong>
304 * then the <i>available values</i> are {3,4,5} and all
305 * reported statistics will be based on these values
306 * @param windowSize sets the size of the window.
307 */
308 public void setWindowSize(int windowSize) {
309 if (windowSize < 1) {
310 if (windowSize != INFINITE_WINDOW) {
311 throw new IllegalArgumentException("window size must be positive.");
312 }
313 }
314
315 this.windowSize = windowSize;
316
317 // We need to check to see if we need to discard elements
318 // from the front of the array. If the windowSize is less than
319 // the current number of elements.
320 if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
321 eDA.discardFrontElements(eDA.getNumElements() - windowSize);
322 }
323 }
324
325 /**
326 * Returns the current set of values in an array of double primitives.
327 * The order of addition is preserved. The returned array is a fresh
328 * copy of the underlying data -- i.e., it is not a reference to the
329 * stored data.
330 *
331 * @return returns the current set of numbers in the order in which they
332 * were added to this set
333 */
334 public double[] getValues() {
335 double[] copiedArray = new double[eDA.getNumElements()];
336 System.arraycopy(eDA.getElements(), 0, copiedArray,
337 0, eDA.getNumElements());
338 return copiedArray;
339 }
340
341 /**
342 * Returns the current set of values in an array of double primitives,
343 * sorted in ascending order. The returned array is a fresh
344 * copy of the underlying data -- i.e., it is not a reference to the
345 * stored data.
346 * @return returns the current set of
347 * numbers sorted in ascending order
348 */
349 public double[] getSortedValues() {
350 double[] sort = getValues();
351 Arrays.sort(sort);
352 return sort;
353 }
354
355 /**
356 * Returns the element at the specified index
357 * @param index The Index of the element
358 * @return return the element at the specified index
359 */
360 public double getElement(int index) {
361 return eDA.getElement(index);
362 }
363
364 /**
365 * Returns an estimate for the pth percentile of the stored values.
366 * <p>
367 * The implementation provided here follows the first estimation procedure presented
368 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
369 * </p><p>
370 * <strong>Preconditions</strong>:<ul>
371 * <li><code>0 < p < 100</code> (otherwise an
372 * <code>IllegalArgumentException</code> is thrown)</li>
373 * <li>at least one value must be stored (returns <code>Double.NaN
374 * </code> otherwise)</li>
375 * </ul></p>
376 *
377 * @param p the requested percentile (scaled from 0 - 100)
378 * @return An estimate for the pth percentile of the stored data
379 * @throws IllegalStateException if percentile implementation has been
380 * overridden and the supplied implementation does not support setQuantile
381 * values
382 */
383 public double getPercentile(double p) {
384 if (percentileImpl instanceof Percentile) {
385 ((Percentile) percentileImpl).setQuantile(p);
386 } else {
387 try {
388 percentileImpl.getClass().getMethod("setQuantile",
389 new Class[] {Double.TYPE}).invoke(percentileImpl,
390 new Object[] {new Double(p)});
391 } catch (NoSuchMethodException e1) { // Setter guard should prevent
392 throw new IllegalArgumentException(
393 "Percentile implementation does not support setQuantile");
394 } catch (IllegalAccessException e2) {
395 throw new IllegalArgumentException(
396 "IllegalAccessException setting quantile");
397 } catch (InvocationTargetException e3) {
398 throw new IllegalArgumentException(
399 "Error setting quantile" + e3.toString());
400 }
401 }
402 return apply(percentileImpl);
403 }
404
405 /**
406 * Generates a text report displaying univariate statistics from values
407 * that have been added. Each statistic is displayed on a separate
408 * line.
409 *
410 * @return String with line feeds displaying statistics
411 */
412 public String toString() {
413 StringBuffer outBuffer = new StringBuffer();
414 outBuffer.append("DescriptiveStatistics:\n");
415 outBuffer.append("n: " + getN() + "\n");
416 outBuffer.append("min: " + getMin() + "\n");
417 outBuffer.append("max: " + getMax() + "\n");
418 outBuffer.append("mean: " + getMean() + "\n");
419 outBuffer.append("std dev: " + getStandardDeviation() + "\n");
420 outBuffer.append("median: " + getPercentile(50) + "\n");
421 outBuffer.append("skewness: " + getSkewness() + "\n");
422 outBuffer.append("kurtosis: " + getKurtosis() + "\n");
423 return outBuffer.toString();
424 }
425
426 /**
427 * Apply the given statistic to the data associated with this set of statistics.
428 * @param stat the statistic to apply
429 * @return the computed value of the statistic.
430 */
431 public double apply(UnivariateStatistic stat) {
432 return stat.evaluate(eDA.getValues(), eDA.start(), eDA.getNumElements());
433 }
434
435 // Implementation getters and setter
436
437 /**
438 * Returns the currently configured mean implementation.
439 *
440 * @return the UnivariateStatistic implementing the mean
441 * @since 1.2
442 */
443 public synchronized UnivariateStatistic getMeanImpl() {
444 return meanImpl;
445 }
446
447 /**
448 * <p>Sets the implementation for the mean.</p>
449 *
450 * @param meanImpl the UnivariateStatistic instance to use
451 * for computing the mean
452 * @since 1.2
453 */
454 public synchronized void setMeanImpl(UnivariateStatistic meanImpl) {
455 this.meanImpl = meanImpl;
456 }
457
458 /**
459 * Returns the currently configured geometric mean implementation.
460 *
461 * @return the UnivariateStatistic implementing the geometric mean
462 * @since 1.2
463 */
464 public synchronized UnivariateStatistic getGeometricMeanImpl() {
465 return geometricMeanImpl;
466 }
467
468 /**
469 * <p>Sets the implementation for the gemoetric mean.</p>
470 *
471 * @param geometricMeanImpl the UnivariateStatistic instance to use
472 * for computing the geometric mean
473 * @since 1.2
474 */
475 public synchronized void setGeometricMeanImpl(
476 UnivariateStatistic geometricMeanImpl) {
477 this.geometricMeanImpl = geometricMeanImpl;
478 }
479
480 /**
481 * Returns the currently configured kurtosis implementation.
482 *
483 * @return the UnivariateStatistic implementing the kurtosis
484 * @since 1.2
485 */
486 public synchronized UnivariateStatistic getKurtosisImpl() {
487 return kurtosisImpl;
488 }
489
490 /**
491 * <p>Sets the implementation for the kurtosis.</p>
492 *
493 * @param kurtosisImpl the UnivariateStatistic instance to use
494 * for computing the kurtosis
495 * @since 1.2
496 */
497 public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) {
498 this.kurtosisImpl = kurtosisImpl;
499 }
500
501 /**
502 * Returns the currently configured maximum implementation.
503 *
504 * @return the UnivariateStatistic implementing the maximum
505 * @since 1.2
506 */
507 public synchronized UnivariateStatistic getMaxImpl() {
508 return maxImpl;
509 }
510
511 /**
512 * <p>Sets the implementation for the maximum.</p>
513 *
514 * @param maxImpl the UnivariateStatistic instance to use
515 * for computing the maximum
516 * @since 1.2
517 */
518 public synchronized void setMaxImpl(UnivariateStatistic maxImpl) {
519 this.maxImpl = maxImpl;
520 }
521
522 /**
523 * Returns the currently configured minimum implementation.
524 *
525 * @return the UnivariateStatistic implementing the minimum
526 * @since 1.2
527 */
528 public synchronized UnivariateStatistic getMinImpl() {
529 return minImpl;
530 }
531
532 /**
533 * <p>Sets the implementation for the minimum.</p>
534 *
535 * @param minImpl the UnivariateStatistic instance to use
536 * for computing the minimum
537 * @since 1.2
538 */
539 public synchronized void setMinImpl(UnivariateStatistic minImpl) {
540 this.minImpl = minImpl;
541 }
542
543 /**
544 * Returns the currently configured percentile implementation.
545 *
546 * @return the UnivariateStatistic implementing the percentile
547 * @since 1.2
548 */
549 public synchronized UnivariateStatistic getPercentileImpl() {
550 return percentileImpl;
551 }
552
553 /**
554 * Sets the implementation to be used by {@link #getPercentile(double)}.
555 * The supplied <code>UnivariateStatistic</code> must provide a
556 * <code>setQuantile(double)</code> method; otherwise
557 * <code>IllegalArgumentException</code> is thrown.
558 *
559 * @param percentileImpl the percentileImpl to set
560 * @throws IllegalArgumentException if the supplied implementation does not
561 * provide a <code>setQuantile</code> method
562 * @since 1.2
563 */
564 public synchronized void setPercentileImpl(
565 UnivariateStatistic percentileImpl) {
566 try {
567 percentileImpl.getClass().getMethod("setQuantile",
568 new Class[] {Double.TYPE}).invoke(percentileImpl,
569 new Object[] {new Double(50.0d)});
570 } catch (NoSuchMethodException e1) {
571 throw new IllegalArgumentException(
572 "Percentile implementation does not support setQuantile");
573 } catch (IllegalAccessException e2) {
574 throw new IllegalArgumentException(
575 "IllegalAccessException setting quantile");
576 } catch (InvocationTargetException e3) {
577 throw new IllegalArgumentException(
578 "Error setting quantile" + e3.toString());
579 }
580 this.percentileImpl = percentileImpl;
581 }
582
583 /**
584 * Returns the currently configured skewness implementation.
585 *
586 * @return the UnivariateStatistic implementing the skewness
587 * @since 1.2
588 */
589 public synchronized UnivariateStatistic getSkewnessImpl() {
590 return skewnessImpl;
591 }
592
593 /**
594 * <p>Sets the implementation for the skewness.</p>
595 *
596 * @param skewnessImpl the UnivariateStatistic instance to use
597 * for computing the skewness
598 * @since 1.2
599 */
600 public synchronized void setSkewnessImpl(
601 UnivariateStatistic skewnessImpl) {
602 this.skewnessImpl = skewnessImpl;
603 }
604
605 /**
606 * Returns the currently configured variance implementation.
607 *
608 * @return the UnivariateStatistic implementing the variance
609 * @since 1.2
610 */
611 public synchronized UnivariateStatistic getVarianceImpl() {
612 return varianceImpl;
613 }
614
615 /**
616 * <p>Sets the implementation for the variance.</p>
617 *
618 * @param varianceImpl the UnivariateStatistic instance to use
619 * for computing the variance
620 * @since 1.2
621 */
622 public synchronized void setVarianceImpl(
623 UnivariateStatistic varianceImpl) {
624 this.varianceImpl = varianceImpl;
625 }
626
627 /**
628 * Returns the currently configured sum of squares implementation.
629 *
630 * @return the UnivariateStatistic implementing the sum of squares
631 * @since 1.2
632 */
633 public synchronized UnivariateStatistic getSumsqImpl() {
634 return sumsqImpl;
635 }
636
637 /**
638 * <p>Sets the implementation for the sum of squares.</p>
639 *
640 * @param sumsqImpl the UnivariateStatistic instance to use
641 * for computing the sum of squares
642 * @since 1.2
643 */
644 public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) {
645 this.sumsqImpl = sumsqImpl;
646 }
647
648 /**
649 * Returns the currently configured sum implementation.
650 *
651 * @return the UnivariateStatistic implementing the sum
652 * @since 1.2
653 */
654 public synchronized UnivariateStatistic getSumImpl() {
655 return sumImpl;
656 }
657
658 /**
659 * <p>Sets the implementation for the sum.</p>
660 *
661 * @param sumImpl the UnivariateStatistic instance to use
662 * for computing the sum
663 * @since 1.2
664 */
665 public synchronized void setSumImpl(UnivariateStatistic sumImpl) {
666 this.sumImpl = sumImpl;
667 }
668 }