1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat.descriptive.rank;
18
19 import java.io.Serializable;
20 import java.util.Arrays;
21 import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic;
22
23 /**
24 * Provides percentile computation.
25 * <p>
26 * There are several commonly used methods for estimating percentiles (a.k.a.
27 * quantiles) based on sample data. For large samples, the different methods
28 * agree closely, but when sample sizes are small, different methods will give
29 * significantly different results. The algorithm implemented here works as follows:
30 * <ol>
31 * <li>Let <code>n</code> be the length of the (sorted) array and
32 * <code>0 < p <= 100</code> be the desired percentile.</li>
33 * <li>If <code> n = 1 </code> return the unique array element (regardless of
34 * the value of <code>p</code>); otherwise </li>
35 * <li>Compute the estimated percentile position
36 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
37 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
38 * part of <code>pos</code>). If <code>pos >= n</code> return the largest
39 * element in the array; otherwise</li>
40 * <li>Let <code>lower</code> be the element in position
41 * <code>floor(pos)</code> in the array and let <code>upper</code> be the
42 * next element in the array. Return <code>lower + d * (upper - lower)</code>
43 * </li>
44 * </ol></p>
45 * <p>
46 * To compute percentiles, the data must be (totally) ordered. Input arrays
47 * are copied and then sorted using {@link java.util.Arrays#sort(double[])}.
48 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
49 * by {@link java.lang.Double#compareTo(Double)}. This ordering makes
50 * <code>Double.NaN</code> larger than any other value (including
51 * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median
52 * (50th percentile) of
53 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p>
54 * <p>
55 * Since percentile estimation usually involves interpolation between array
56 * elements, arrays containing <code>NaN</code> or infinite values will often
57 * result in <code>NaN<code> or infinite values returned.</p>
58 * <p>
59 * <strong>Note that this implementation is not synchronized.</strong> If
60 * multiple threads access an instance of this class concurrently, and at least
61 * one of the threads invokes the <code>increment()</code> or
62 * <code>clear()</code> method, it must be synchronized externally.</p>
63 *
64 * @version $Revision: 617953 $ $Date: 2008-02-02 22:54:00 -0700 (Sat, 02 Feb 2008) $
65 */
66 public class Percentile extends AbstractUnivariateStatistic implements Serializable {
67
68 /** Serializable version identifier */
69 private static final long serialVersionUID = -8091216485095130416L;
70
71 /** Determines what percentile is computed when evaluate() is activated
72 * with no quantile argument */
73 private double quantile = 0.0;
74
75 /**
76 * Constructs a Percentile with a default quantile
77 * value of 50.0.
78 */
79 public Percentile() {
80 this(50.0);
81 }
82
83 /**
84 * Constructs a Percentile with the specific quantile value.
85 * @param p the quantile
86 * @throws IllegalArgumentException if p is not greater than 0 and less
87 * than or equal to 100
88 */
89 public Percentile(final double p) {
90 setQuantile(p);
91 }
92
93 /**
94 * Returns an estimate of the <code>p</code>th percentile of the values
95 * in the <code>values</code> array.
96 * <p>
97 * Calls to this method do not modify the internal <code>quantile</code>
98 * state of this statistic.</p>
99 * <p>
100 * <ul>
101 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
102 * <code>0</code></li>
103 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
104 * if <code>values</code> has length <code>1</code></li>
105 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
106 * is null or p is not a valid quantile value (p must be greater than 0
107 * and less than or equal to 100) </li>
108 * </ul></p>
109 * <p>
110 * See {@link Percentile} for a description of the percentile estimation
111 * algorithm used.</p>
112 *
113 * @param values input array of values
114 * @param p the percentile value to compute
115 * @return the percentile value or Double.NaN if the array is empty
116 * @throws IllegalArgumentException if <code>values</code> is null
117 * or p is invalid
118 */
119 public double evaluate(final double[] values, final double p) {
120 test(values, 0, 0);
121 return evaluate(values, 0, values.length, p);
122 }
123
124 /**
125 * Returns an estimate of the <code>quantile</code>th percentile of the
126 * designated values in the <code>values</code> array. The quantile
127 * estimated is determined by the <code>quantile</code> property.
128 * <p>
129 * <ul>
130 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
131 * <li>Returns (for any value of <code>quantile</code>)
132 * <code>values[begin]</code> if <code>length = 1 </code></li>
133 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
134 * is null, or <code>start</code> or <code>length</code>
135 * is invalid</li>
136 * </ul></p>
137 * <p>
138 * See {@link Percentile} for a description of the percentile estimation
139 * algorithm used.</p>
140 *
141 * @param values the input array
142 * @param start index of the first array element to include
143 * @param length the number of elements to include
144 * @return the percentile value
145 * @throws IllegalArgumentException if the parameters are not valid
146 *
147 */
148 public double evaluate( final double[] values, final int start, final int length) {
149 return evaluate(values, start, length, quantile);
150 }
151
152 /**
153 * Returns an estimate of the <code>p</code>th percentile of the values
154 * in the <code>values</code> array, starting with the element in (0-based)
155 * position <code>begin</code> in the array and including <code>length</code>
156 * values.
157 * <p>
158 * Calls to this method do not modify the internal <code>quantile</code>
159 * state of this statistic.</p>
160 * <p>
161 * <ul>
162 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
163 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
164 * if <code>length = 1 </code></li>
165 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
166 * is null , <code>begin</code> or <code>length</code> is invalid, or
167 * <code>p</code> is not a valid quantile value (p must be greater than 0
168 * and less than or equal to 100)</li>
169 * </ul></p>
170 * <p>
171 * See {@link Percentile} for a description of the percentile estimation
172 * algorithm used.</p>
173 *
174 * @param values array of input values
175 * @param p the percentile to compute
176 * @param begin the first (0-based) element to include in the computation
177 * @param length the number of array elements to include
178 * @return the percentile value
179 * @throws IllegalArgumentException if the parameters are not valid or the
180 * input array is null
181 */
182 public double evaluate(final double[] values, final int begin,
183 final int length, final double p) {
184
185 test(values, begin, length);
186
187 if ((p > 100) || (p <= 0)) {
188 throw new IllegalArgumentException("invalid quantile value: " + p);
189 }
190 if (length == 0) {
191 return Double.NaN;
192 }
193 if (length == 1) {
194 return values[begin]; // always return single value for n = 1
195 }
196 double n = (double) length;
197 double pos = p * (n + 1) / 100;
198 double fpos = Math.floor(pos);
199 int intPos = (int) fpos;
200 double dif = pos - fpos;
201 double[] sorted = new double[length];
202 System.arraycopy(values, begin, sorted, 0, length);
203 Arrays.sort(sorted);
204
205 if (pos < 1) {
206 return sorted[0];
207 }
208 if (pos >= n) {
209 return sorted[length - 1];
210 }
211 double lower = sorted[intPos - 1];
212 double upper = sorted[intPos];
213 return lower + dif * (upper - lower);
214 }
215
216 /**
217 * Returns the value of the quantile field (determines what percentile is
218 * computed when evaluate() is called with no quantile argument).
219 *
220 * @return quantile
221 */
222 public double getQuantile() {
223 return quantile;
224 }
225
226 /**
227 * Sets the value of the quantile field (determines what percentile is
228 * computed when evaluate() is called with no quantile argument).
229 *
230 * @param p a value between 0 < p <= 100
231 * @throws IllegalArgumentException if p is not greater than 0 and less
232 * than or equal to 100
233 */
234 public void setQuantile(final double p) {
235 if (p <= 0 || p > 100) {
236 throw new IllegalArgumentException("Illegal quantile value: " + p);
237 }
238 quantile = p;
239 }
240
241 }