1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat;
18
19 import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
20 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
21 import org.apache.commons.math.stat.descriptive.moment.Mean;
22 import org.apache.commons.math.stat.descriptive.moment.Variance;
23 import org.apache.commons.math.stat.descriptive.rank.Max;
24 import org.apache.commons.math.stat.descriptive.rank.Min;
25 import org.apache.commons.math.stat.descriptive.rank.Percentile;
26 import org.apache.commons.math.stat.descriptive.summary.Product;
27 import org.apache.commons.math.stat.descriptive.summary.Sum;
28 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
29 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
30
31 /**
32 * StatUtils provides static methods for computing statistics based on data
33 * stored in double[] arrays.
34 *
35 * @version $Revision: 617953 $ $Date: 2008-02-02 22:54:00 -0700 (Sat, 02 Feb 2008) $
36 */
37 public final class StatUtils {
38
39 /** sum */
40 private static UnivariateStatistic sum = new Sum();
41
42 /** sumSq */
43 private static UnivariateStatistic sumSq = new SumOfSquares();
44
45 /** prod */
46 private static UnivariateStatistic prod = new Product();
47
48 /** sumLog */
49 private static UnivariateStatistic sumLog = new SumOfLogs();
50
51 /** min */
52 private static UnivariateStatistic min = new Min();
53
54 /** max */
55 private static UnivariateStatistic max = new Max();
56
57 /** mean */
58 private static UnivariateStatistic mean = new Mean();
59
60 /** variance */
61 private static Variance variance = new Variance();
62
63 /** percentile */
64 private static Percentile percentile = new Percentile();
65
66 /** geometric mean */
67 private static GeometricMean geometricMean = new GeometricMean();
68
69 /**
70 * Private Constructor
71 */
72 private StatUtils() {
73 }
74
75 /**
76 * Returns the sum of the values in the input array, or
77 * <code>Double.NaN</code> if the array is empty.
78 * <p>
79 * Throws <code>IllegalArgumentException</code> if the input array
80 * is null.</p>
81 *
82 * @param values array of values to sum
83 * @return the sum of the values or <code>Double.NaN</code> if the array
84 * is empty
85 * @throws IllegalArgumentException if the array is null
86 */
87 public static double sum(final double[] values) {
88 return sum.evaluate(values);
89 }
90
91 /**
92 * Returns the sum of the entries in the specified portion of
93 * the input array, or <code>Double.NaN</code> if the designated subarray
94 * is empty.
95 * <p>
96 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
97 *
98 * @param values the input array
99 * @param begin index of the first array element to include
100 * @param length the number of elements to include
101 * @return the sum of the values or Double.NaN if length = 0
102 * @throws IllegalArgumentException if the array is null or the array index
103 * parameters are not valid
104 */
105 public static double sum(final double[] values, final int begin,
106 final int length) {
107 return sum.evaluate(values, begin, length);
108 }
109
110 /**
111 * Returns the sum of the squares of the entries in the input array, or
112 * <code>Double.NaN</code> if the array is empty.
113 * <p>
114 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
115 *
116 * @param values input array
117 * @return the sum of the squared values or <code>Double.NaN</code> if the
118 * array is empty
119 * @throws IllegalArgumentException if the array is null
120 */
121 public static double sumSq(final double[] values) {
122 return sumSq.evaluate(values);
123 }
124
125 /**
126 * Returns the sum of the squares of the entries in the specified portion of
127 * the input array, or <code>Double.NaN</code> if the designated subarray
128 * is empty.
129 * <p>
130 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
131 *
132 * @param values the input array
133 * @param begin index of the first array element to include
134 * @param length the number of elements to include
135 * @return the sum of the squares of the values or Double.NaN if length = 0
136 * @throws IllegalArgumentException if the array is null or the array index
137 * parameters are not valid
138 */
139 public static double sumSq(final double[] values, final int begin,
140 final int length) {
141 return sumSq.evaluate(values, begin, length);
142 }
143
144 /**
145 * Returns the product of the entries in the input array, or
146 * <code>Double.NaN</code> if the array is empty.
147 * <p>
148 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
149 *
150 * @param values the input array
151 * @return the product of the values or Double.NaN if the array is empty
152 * @throws IllegalArgumentException if the array is null
153 */
154 public static double product(final double[] values) {
155 return prod.evaluate(values);
156 }
157
158 /**
159 * Returns the product of the entries in the specified portion of
160 * the input array, or <code>Double.NaN</code> if the designated subarray
161 * is empty.
162 * <p>
163 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
164 *
165 * @param values the input array
166 * @param begin index of the first array element to include
167 * @param length the number of elements to include
168 * @return the product of the values or Double.NaN if length = 0
169 * @throws IllegalArgumentException if the array is null or the array index
170 * parameters are not valid
171 */
172 public static double product(final double[] values, final int begin,
173 final int length) {
174 return prod.evaluate(values, begin, length);
175 }
176
177 /**
178 * Returns the sum of the natural logs of the entries in the input array, or
179 * <code>Double.NaN</code> if the array is empty.
180 * <p>
181 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
182 * <p>
183 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
184 * </p>
185 *
186 * @param values the input array
187 * @return the sum of the natural logs of the values or Double.NaN if
188 * the array is empty
189 * @throws IllegalArgumentException if the array is null
190 */
191 public static double sumLog(final double[] values) {
192 return sumLog.evaluate(values);
193 }
194
195 /**
196 * Returns the sum of the natural logs of the entries in the specified portion of
197 * the input array, or <code>Double.NaN</code> if the designated subarray
198 * is empty.
199 * <p>
200 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
201 * <p>
202 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
203 * </p>
204 *
205 * @param values the input array
206 * @param begin index of the first array element to include
207 * @param length the number of elements to include
208 * @return the sum of the natural logs of the values or Double.NaN if
209 * length = 0
210 * @throws IllegalArgumentException if the array is null or the array index
211 * parameters are not valid
212 */
213 public static double sumLog(final double[] values, final int begin,
214 final int length) {
215 return sumLog.evaluate(values, begin, length);
216 }
217
218 /**
219 * Returns the arithmetic mean of the entries in the input array, or
220 * <code>Double.NaN</code> if the array is empty.
221 * <p>
222 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
223 * <p>
224 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
225 * details on the computing algorithm.</p>
226 *
227 * @param values the input array
228 * @return the mean of the values or Double.NaN if the array is empty
229 * @throws IllegalArgumentException if the array is null
230 */
231 public static double mean(final double[] values) {
232 return mean.evaluate(values);
233 }
234
235 /**
236 * Returns the arithmetic mean of the entries in the specified portion of
237 * the input array, or <code>Double.NaN</code> if the designated subarray
238 * is empty.
239 * <p>
240 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
241 * <p>
242 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
243 * details on the computing algorithm.</p>
244 *
245 * @param values the input array
246 * @param begin index of the first array element to include
247 * @param length the number of elements to include
248 * @return the mean of the values or Double.NaN if length = 0
249 * @throws IllegalArgumentException if the array is null or the array index
250 * parameters are not valid
251 */
252 public static double mean(final double[] values, final int begin,
253 final int length) {
254 return mean.evaluate(values, begin, length);
255 }
256
257 /**
258 * Returns the geometric mean of the entries in the input array, or
259 * <code>Double.NaN</code> if the array is empty.
260 * <p>
261 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
262 * <p>
263 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
264 * for details on the computing algorithm.</p>
265 *
266 * @param values the input array
267 * @return the geometric mean of the values or Double.NaN if the array is empty
268 * @throws IllegalArgumentException if the array is null
269 */
270 public static double geometricMean(final double[] values) {
271 return geometricMean.evaluate(values);
272 }
273
274 /**
275 * Returns the geometric mean of the entries in the specified portion of
276 * the input array, or <code>Double.NaN</code> if the designated subarray
277 * is empty.
278 * <p>
279 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
280 * <p>
281 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
282 * for details on the computing algorithm.</p>
283 *
284 * @param values the input array
285 * @param begin index of the first array element to include
286 * @param length the number of elements to include
287 * @return the geometric mean of the values or Double.NaN if length = 0
288 * @throws IllegalArgumentException if the array is null or the array index
289 * parameters are not valid
290 */
291 public static double geometricMean(final double[] values, final int begin,
292 final int length) {
293 return geometricMean.evaluate(values, begin, length);
294 }
295
296
297 /**
298 * Returns the variance of the entries in the input array, or
299 * <code>Double.NaN</code> if the array is empty.
300 * <p>
301 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
302 * details on the computing algorithm.</p>
303 * <p>
304 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
305 * <p>
306 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
307 *
308 * @param values the input array
309 * @return the variance of the values or Double.NaN if the array is empty
310 * @throws IllegalArgumentException if the array is null
311 */
312 public static double variance(final double[] values) {
313 return variance.evaluate(values);
314 }
315
316 /**
317 * Returns the variance of the entries in the specified portion of
318 * the input array, or <code>Double.NaN</code> if the designated subarray
319 * is empty.
320 * <p>
321 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
322 * details on the computing algorithm.</p>
323 * <p>
324 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
325 * <p>
326 * Throws <code>IllegalArgumentException</code> if the array is null or the
327 * array index parameters are not valid.</p>
328 *
329 * @param values the input array
330 * @param begin index of the first array element to include
331 * @param length the number of elements to include
332 * @return the variance of the values or Double.NaN if length = 0
333 * @throws IllegalArgumentException if the array is null or the array index
334 * parameters are not valid
335 */
336 public static double variance(final double[] values, final int begin,
337 final int length) {
338 return variance.evaluate(values, begin, length);
339 }
340
341 /**
342 * Returns the variance of the entries in the specified portion of
343 * the input array, using the precomputed mean value. Returns
344 * <code>Double.NaN</code> if the designated subarray is empty.
345 * <p>
346 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
347 * details on the computing algorithm.</p>
348 * <p>
349 * The formula used assumes that the supplied mean value is the arithmetic
350 * mean of the sample data, not a known population parameter. This method
351 * is supplied only to save computation when the mean has already been
352 * computed.</p>
353 * <p>
354 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
355 * <p>
356 * Throws <code>IllegalArgumentException</code> if the array is null or the
357 * array index parameters are not valid.</p>
358 *
359 * @param values the input array
360 * @param mean the precomputed mean value
361 * @param begin index of the first array element to include
362 * @param length the number of elements to include
363 * @return the variance of the values or Double.NaN if length = 0
364 * @throws IllegalArgumentException if the array is null or the array index
365 * parameters are not valid
366 */
367 public static double variance(final double[] values, final double mean,
368 final int begin, final int length) {
369 return variance.evaluate(values, mean, begin, length);
370 }
371
372 /**
373 * Returns the variance of the entries in the input array, using the
374 * precomputed mean value. Returns <code>Double.NaN</code> if the array
375 * is empty.
376 * <p>
377 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
378 * details on the computing algorithm.</p>
379 * <p>
380 * The formula used assumes that the supplied mean value is the arithmetic
381 * mean of the sample data, not a known population parameter. This method
382 * is supplied only to save computation when the mean has already been
383 * computed.</p>
384 * <p>
385 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
386 * <p>
387 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
388 *
389 * @param values the input array
390 * @param mean the precomputed mean value
391 * @return the variance of the values or Double.NaN if the array is empty
392 * @throws IllegalArgumentException if the array is null
393 */
394 public static double variance(final double[] values, final double mean) {
395 return variance.evaluate(values, mean);
396 }
397
398 /**
399 * Returns the maximum of the entries in the input array, or
400 * <code>Double.NaN</code> if the array is empty.
401 * <p>
402 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
403 * <p>
404 * <ul>
405 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
406 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
407 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
408 * the result is <code>Double.POSITIVE_INFINITY.</code></li>
409 * </ul></p>
410 *
411 * @param values the input array
412 * @return the maximum of the values or Double.NaN if the array is empty
413 * @throws IllegalArgumentException if the array is null
414 */
415 public static double max(final double[] values) {
416 return max.evaluate(values);
417 }
418
419 /**
420 * Returns the maximum of the entries in the specified portion of
421 * the input array, or <code>Double.NaN</code> if the designated subarray
422 * is empty.
423 * <p>
424 * Throws <code>IllegalArgumentException</code> if the array is null or
425 * the array index parameters are not valid.</p>
426 * <p>
427 * <ul>
428 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
429 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
430 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
431 * the result is <code>Double.POSITIVE_INFINITY.</code></li>
432 * </ul></p>
433 *
434 * @param values the input array
435 * @param begin index of the first array element to include
436 * @param length the number of elements to include
437 * @return the maximum of the values or Double.NaN if length = 0
438 * @throws IllegalArgumentException if the array is null or the array index
439 * parameters are not valid
440 */
441 public static double max(final double[] values, final int begin,
442 final int length) {
443 return max.evaluate(values, begin, length);
444 }
445
446 /**
447 * Returns the minimum of the entries in the input array, or
448 * <code>Double.NaN</code> if the array is empty.
449 * <p>
450 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
451 * <p>
452 * <ul>
453 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
454 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
455 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
456 * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
457 * </ul> </p>
458 *
459 * @param values the input array
460 * @return the minimum of the values or Double.NaN if the array is empty
461 * @throws IllegalArgumentException if the array is null
462 */
463 public static double min(final double[] values) {
464 return min.evaluate(values);
465 }
466
467 /**
468 * Returns the minimum of the entries in the specified portion of
469 * the input array, or <code>Double.NaN</code> if the designated subarray
470 * is empty.
471 * <p>
472 * Throws <code>IllegalArgumentException</code> if the array is null or
473 * the array index parameters are not valid.</p>
474 * <p>
475 * <ul>
476 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
477 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
478 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
479 * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
480 * </ul></p>
481 *
482 * @param values the input array
483 * @param begin index of the first array element to include
484 * @param length the number of elements to include
485 * @return the minimum of the values or Double.NaN if length = 0
486 * @throws IllegalArgumentException if the array is null or the array index
487 * parameters are not valid
488 */
489 public static double min(final double[] values, final int begin,
490 final int length) {
491 return min.evaluate(values, begin, length);
492 }
493
494 /**
495 * Returns an estimate of the <code>p</code>th percentile of the values
496 * in the <code>values</code> array.
497 * <p>
498 * <ul>
499 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
500 * <code>0</code></li></p>
501 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
502 * if <code>values</code> has length <code>1</code></li>
503 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
504 * is null or p is not a valid quantile value (p must be greater than 0
505 * and less than or equal to 100)</li>
506 * </ul></p>
507 * <p>
508 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
509 * a description of the percentile estimation algorithm used.</p>
510 *
511 * @param values input array of values
512 * @param p the percentile value to compute
513 * @return the percentile value or Double.NaN if the array is empty
514 * @throws IllegalArgumentException if <code>values</code> is null
515 * or p is invalid
516 */
517 public static double percentile(final double[] values, final double p) {
518 return percentile.evaluate(values,p);
519 }
520
521 /**
522 * Returns an estimate of the <code>p</code>th percentile of the values
523 * in the <code>values</code> array, starting with the element in (0-based)
524 * position <code>begin</code> in the array and including <code>length</code>
525 * values.
526 * <p>
527 * <ul>
528 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
529 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
530 * if <code>length = 1 </code></li>
531 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
532 * is null , <code>begin</code> or <code>length</code> is invalid, or
533 * <code>p</code> is not a valid quantile value (p must be greater than 0
534 * and less than or equal to 100)</li>
535 * </ul></p>
536 * <p>
537 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
538 * a description of the percentile estimation algorithm used.</p>
539 *
540 * @param values array of input values
541 * @param p the percentile to compute
542 * @param begin the first (0-based) element to include in the computation
543 * @param length the number of array elements to include
544 * @return the percentile value
545 * @throws IllegalArgumentException if the parameters are not valid or the
546 * input array is null
547 */
548 public static double percentile(final double[] values, final int begin,
549 final int length, final double p) {
550 return percentile.evaluate(values, begin, length, p);
551 }
552
553 /**
554 * Returns the sum of the (signed) differences between corresponding elements of the
555 * input arrays -- i.e., sum(sample1[i] - sample2[i]).
556 *
557 * @param sample1 the first array
558 * @param sample2 the second array
559 * @return sum of paired differences
560 * @throws IllegalArgumentException if the arrays do not have the same
561 * (positive) length
562 */
563 public static double sumDifference(final double[] sample1, final double[] sample2)
564 throws IllegalArgumentException {
565 int n = sample1.length;
566 if (n != sample2.length || n < 1) {
567 throw new IllegalArgumentException
568 ("Input arrays must have the same (positive) length.");
569 }
570 double result = 0;
571 for (int i = 0; i < n; i++) {
572 result += sample1[i] - sample2[i];
573 }
574 return result;
575 }
576
577 /**
578 * Returns the mean of the (signed) differences between corresponding elements of the
579 * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
580 *
581 * @param sample1 the first array
582 * @param sample2 the second array
583 * @return mean of paired differences
584 * @throws IllegalArgumentException if the arrays do not have the same
585 * (positive) length
586 */
587 public static double meanDifference(final double[] sample1, final double[] sample2)
588 throws IllegalArgumentException {
589 return sumDifference(sample1, sample2) / (double) sample1.length;
590 }
591
592 /**
593 * Returns the variance of the (signed) differences between corresponding elements of the
594 * input arrays -- i.e., var(sample1[i] - sample2[i]).
595 *
596 * @param sample1 the first array
597 * @param sample2 the second array
598 * @param meanDifference the mean difference between corresponding entries
599 * @see #meanDifference(double[],double[])
600 * @return variance of paired differences
601 * @throws IllegalArgumentException if the arrays do not have the same
602 * length or their common length is less than 2.
603 */
604 public static double varianceDifference(final double[] sample1, final double[] sample2,
605 double meanDifference) throws IllegalArgumentException {
606 double sum1 = 0d;
607 double sum2 = 0d;
608 double diff = 0d;
609 int n = sample1.length;
610 if (n < 2 || n != sample2.length) {
611 throw new IllegalArgumentException("Input array lengths must be equal and at least 2.");
612 }
613 for (int i = 0; i < n; i++) {
614 diff = sample1[i] - sample2[i];
615 sum1 += (diff - meanDifference) *(diff - meanDifference);
616 sum2 += diff - meanDifference;
617 }
618 return (sum1 - (sum2 * sum2 / (double) n)) / (double) (n - 1);
619 }
620
621 }