%line | %branch | |||||||||
---|---|---|---|---|---|---|---|---|---|---|
org.apache.commons.validator.UrlValidator |
|
|
1 | /* |
|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
|
3 | * contributor license agreements. See the NOTICE file distributed with |
|
4 | * this work for additional information regarding copyright ownership. |
|
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
|
6 | * (the "License"); you may not use this file except in compliance with |
|
7 | * the License. You may obtain a copy of the License at |
|
8 | * |
|
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
10 | * |
|
11 | * Unless required by applicable law or agreed to in writing, software |
|
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 | * See the License for the specific language governing permissions and |
|
15 | * limitations under the License. |
|
16 | */ |
|
17 | package org.apache.commons.validator; |
|
18 | ||
19 | import java.io.Serializable; |
|
20 | import java.util.Arrays; |
|
21 | import java.util.HashSet; |
|
22 | import java.util.Set; |
|
23 | ||
24 | import org.apache.commons.validator.util.Flags; |
|
25 | import org.apache.oro.text.perl.Perl5Util; |
|
26 | ||
27 | /** |
|
28 | * <p>Validates URLs.</p> |
|
29 | * Behavour of validation is modified by passing in options: |
|
30 | * <li>ALLOW_2_SLASHES - [FALSE] Allows double '/' characters in the path |
|
31 | * component.</li> |
|
32 | * <li>NO_FRAGMENT- [FALSE] By default fragments are allowed, if this option is |
|
33 | * included then fragments are flagged as illegal.</li> |
|
34 | * <li>ALLOW_ALL_SCHEMES - [FALSE] By default only http, https, and ftp are |
|
35 | * considered valid schemes. Enabling this option will let any scheme pass validation.</li> |
|
36 | * |
|
37 | * <p>Originally based in on php script by Debbie Dyer, validation.php v1.2b, Date: 03/07/02, |
|
38 | * http://javascript.internet.com. However, this validation now bears little resemblance |
|
39 | * to the php original.</p> |
|
40 | * <pre> |
|
41 | * Example of usage: |
|
42 | * Construct a UrlValidator with valid schemes of "http", and "https". |
|
43 | * |
|
44 | * String[] schemes = {"http","https"}. |
|
45 | * UrlValidator urlValidator = new UrlValidator(schemes); |
|
46 | * if (urlValidator.isValid("ftp://foo.bar.com/")) { |
|
47 | * System.out.println("url is valid"); |
|
48 | * } else { |
|
49 | * System.out.println("url is invalid"); |
|
50 | * } |
|
51 | * |
|
52 | * prints "url is invalid" |
|
53 | * If instead the default constructor is used. |
|
54 | * |
|
55 | * UrlValidator urlValidator = new UrlValidator(); |
|
56 | * if (urlValidator.isValid("ftp://foo.bar.com/")) { |
|
57 | * System.out.println("url is valid"); |
|
58 | * } else { |
|
59 | * System.out.println("url is invalid"); |
|
60 | * } |
|
61 | * |
|
62 | * prints out "url is valid" |
|
63 | * </pre> |
|
64 | * |
|
65 | * @see |
|
66 | * <a href='http://www.ietf.org/rfc/rfc2396.txt' > |
|
67 | * Uniform Resource Identifiers (URI): Generic Syntax |
|
68 | * </a> |
|
69 | * |
|
70 | * @version $Revision: 478334 $ $Date: 2006-11-22 21:31:54 +0000 (Wed, 22 Nov 2006) $ |
|
71 | * @since Validator 1.1 |
|
72 | */ |
|
73 | public class UrlValidator implements Serializable { |
|
74 | ||
75 | /** |
|
76 | * Allows all validly formatted schemes to pass validation instead of |
|
77 | * supplying a set of valid schemes. |
|
78 | */ |
|
79 | public static final int ALLOW_ALL_SCHEMES = 1 << 0; |
|
80 | ||
81 | /** |
|
82 | * Allow two slashes in the path component of the URL. |
|
83 | */ |
|
84 | public static final int ALLOW_2_SLASHES = 1 << 1; |
|
85 | ||
86 | /** |
|
87 | * Enabling this options disallows any URL fragments. |
|
88 | */ |
|
89 | public static final int NO_FRAGMENTS = 1 << 2; |
|
90 | ||
91 | private static final String ALPHA_CHARS = "a-zA-Z"; |
|
92 | ||
93 | private static final String ALPHA_NUMERIC_CHARS = ALPHA_CHARS + "\\d"; |
|
94 | ||
95 | private static final String SPECIAL_CHARS = ";/@&=,.?:+$"; |
|
96 | ||
97 | private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]"; |
|
98 | ||
99 | private static final String SCHEME_CHARS = ALPHA_CHARS; |
|
100 | ||
101 | // Drop numeric, and "+-." for now |
|
102 | private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\."; |
|
103 | ||
104 | private static final String ATOM = VALID_CHARS + '+'; |
|
105 | ||
106 | /** |
|
107 | * This expression derived/taken from the BNF for URI (RFC2396). |
|
108 | */ |
|
109 | private static final String URL_PATTERN = |
|
110 | "/^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?/"; |
|
111 | // 12 3 4 5 6 7 8 9 |
|
112 | ||
113 | /** |
|
114 | * Schema/Protocol (ie. http:, ftp:, file:, etc). |
|
115 | */ |
|
116 | private static final int PARSE_URL_SCHEME = 2; |
|
117 | ||
118 | /** |
|
119 | * Includes hostname/ip and port number. |
|
120 | */ |
|
121 | private static final int PARSE_URL_AUTHORITY = 4; |
|
122 | ||
123 | private static final int PARSE_URL_PATH = 5; |
|
124 | ||
125 | private static final int PARSE_URL_QUERY = 7; |
|
126 | ||
127 | private static final int PARSE_URL_FRAGMENT = 9; |
|
128 | ||
129 | /** |
|
130 | * Protocol (ie. http:, ftp:,https:). |
|
131 | */ |
|
132 | private static final String SCHEME_PATTERN = "/^[" + SCHEME_CHARS + "]/"; |
|
133 | ||
134 | private static final String AUTHORITY_PATTERN = |
|
135 | "/^([" + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?/"; |
|
136 | // 1 2 3 4 |
|
137 | ||
138 | private static final int PARSE_AUTHORITY_HOST_IP = 1; |
|
139 | ||
140 | private static final int PARSE_AUTHORITY_PORT = 2; |
|
141 | ||
142 | /** |
|
143 | * Should always be empty. |
|
144 | */ |
|
145 | private static final int PARSE_AUTHORITY_EXTRA = 3; |
|
146 | ||
147 | private static final String PATH_PATTERN = "/^(/[-\\w:@&?=+,.!/~*'%$_;]*)?$/"; |
|
148 | ||
149 | private static final String QUERY_PATTERN = "/^(.*)$/"; |
|
150 | ||
151 | private static final String LEGAL_ASCII_PATTERN = "/^[\\000-\\177]+$/"; |
|
152 | ||
153 | private static final String IP_V4_DOMAIN_PATTERN = |
|
154 | "/^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$/"; |
|
155 | ||
156 | private static final String DOMAIN_PATTERN = |
|
157 | "/^" + ATOM + "(\\." + ATOM + ")*$/"; |
|
158 | ||
159 | private static final String PORT_PATTERN = "/^:(\\d{1,5})$/"; |
|
160 | ||
161 | private static final String ATOM_PATTERN = "/(" + ATOM + ")/"; |
|
162 | ||
163 | private static final String ALPHA_PATTERN = "/^[" + ALPHA_CHARS + "]/"; |
|
164 | ||
165 | /** |
|
166 | * Holds the set of current validation options. |
|
167 | */ |
|
168 | 20 | private Flags options = null; |
169 | ||
170 | /** |
|
171 | * The set of schemes that are allowed to be in a URL. |
|
172 | */ |
|
173 | 20 | private Set allowedSchemes = new HashSet(); |
174 | ||
175 | /** |
|
176 | * If no schemes are provided, default to this set. |
|
177 | */ |
|
178 | 20 | protected String[] defaultSchemes = {"http", "https", "ftp"}; |
179 | ||
180 | /** |
|
181 | * Create a UrlValidator with default properties. |
|
182 | */ |
|
183 | public UrlValidator() { |
|
184 | 15 | this(null); |
185 | 15 | } |
186 | ||
187 | /** |
|
188 | * Behavior of validation is modified by passing in several strings options: |
|
189 | * @param schemes Pass in one or more url schemes to consider valid, passing in |
|
190 | * a null will default to "http,https,ftp" being valid. |
|
191 | * If a non-null schemes is specified then all valid schemes must |
|
192 | * be specified. Setting the ALLOW_ALL_SCHEMES option will |
|
193 | * ignore the contents of schemes. |
|
194 | */ |
|
195 | public UrlValidator(String[] schemes) { |
|
196 | 16 | this(schemes, 0); |
197 | 16 | } |
198 | ||
199 | /** |
|
200 | * Initialize a UrlValidator with the given validation options. |
|
201 | * @param options The options should be set using the public constants declared in |
|
202 | * this class. To set multiple options you simply add them together. For example, |
|
203 | * ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options. |
|
204 | */ |
|
205 | public UrlValidator(int options) { |
|
206 | 0 | this(null, options); |
207 | 0 | } |
208 | ||
209 | /** |
|
210 | * Behavour of validation is modified by passing in options: |
|
211 | * @param schemes The set of valid schemes. |
|
212 | * @param options The options should be set using the public constants declared in |
|
213 | * this class. To set multiple options you simply add them together. For example, |
|
214 | * ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options. |
|
215 | */ |
|
216 | 20 | public UrlValidator(String[] schemes, int options) { |
217 | 20 | this.options = new Flags(options); |
218 | ||
219 | 20 | if (this.options.isOn(ALLOW_ALL_SCHEMES)) { |
220 | 2 | return; |
221 | } |
|
222 | ||
223 | 18 | if (schemes == null) { |
224 | 15 | schemes = this.defaultSchemes; |
225 | } |
|
226 | ||
227 | 18 | this.allowedSchemes.addAll(Arrays.asList(schemes)); |
228 | 18 | } |
229 | ||
230 | /** |
|
231 | * <p>Checks if a field has a valid url address.</p> |
|
232 | * |
|
233 | * @param value The value validation is being performed on. A <code>null</code> |
|
234 | * value is considered invalid. |
|
235 | * @return true if the url is valid. |
|
236 | */ |
|
237 | public boolean isValid(String value) { |
|
238 | 75606 | if (value == null) { |
239 | 0 | return false; |
240 | } |
|
241 | ||
242 | 75606 | Perl5Util matchUrlPat = new Perl5Util(); |
243 | 75606 | Perl5Util matchAsciiPat = new Perl5Util(); |
244 | ||
245 | 75606 | if (!matchAsciiPat.match(LEGAL_ASCII_PATTERN, value)) { |
246 | 0 | return false; |
247 | } |
|
248 | ||
249 | // Check the whole url address structure |
|
250 | 75606 | if (!matchUrlPat.match(URL_PATTERN, value)) { |
251 | 0 | return false; |
252 | } |
|
253 | ||
254 | 75606 | if (!isValidScheme(matchUrlPat.group(PARSE_URL_SCHEME))) { |
255 | 28350 | return false; |
256 | } |
|
257 | ||
258 | 47256 | if (!isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY))) { |
259 | 39375 | return false; |
260 | } |
|
261 | ||
262 | 7881 | if (!isValidPath(matchUrlPat.group(PARSE_URL_PATH))) { |
263 | 2520 | return false; |
264 | } |
|
265 | ||
266 | 5361 | if (!isValidQuery(matchUrlPat.group(PARSE_URL_QUERY))) { |
267 | 0 | return false; |
268 | } |
|
269 | ||
270 | 5361 | if (!isValidFragment(matchUrlPat.group(PARSE_URL_FRAGMENT))) { |
271 | 630 | return false; |
272 | } |
|
273 | ||
274 | 4731 | return true; |
275 | } |
|
276 | ||
277 | /** |
|
278 | * Validate scheme. If schemes[] was initialized to a non null, |
|
279 | * then only those scheme's are allowed. Note this is slightly different |
|
280 | * than for the constructor. |
|
281 | * @param scheme The scheme to validate. A <code>null</code> value is considered |
|
282 | * invalid. |
|
283 | * @return true if valid. |
|
284 | */ |
|
285 | protected boolean isValidScheme(String scheme) { |
|
286 | 75610 | if (scheme == null) { |
287 | 18900 | return false; |
288 | } |
|
289 | ||
290 | 56710 | Perl5Util schemeMatcher = new Perl5Util(); |
291 | 56710 | if (!schemeMatcher.match(SCHEME_PATTERN, scheme)) { |
292 | 9450 | return false; |
293 | } |
|
294 | ||
295 | 47260 | if (this.options.isOff(ALLOW_ALL_SCHEMES)) { |
296 | ||
297 | 6 | if (!this.allowedSchemes.contains(scheme)) { |
298 | 3 | return false; |
299 | } |
|
300 | } |
|
301 | ||
302 | 47257 | return true; |
303 | } |
|
304 | ||
305 | /** |
|
306 | * Returns true if the authority is properly formatted. An authority is the combination |
|
307 | * of hostname and port. A <code>null</code> authority value is considered invalid. |
|
308 | * @param authority Authority value to validate. |
|
309 | * @return true if authority (hostname and port) is valid. |
|
310 | */ |
|
311 | protected boolean isValidAuthority(String authority) { |
|
312 | 47256 | if (authority == null) { |
313 | 18831 | return false; |
314 | } |
|
315 | ||
316 | 28425 | Perl5Util authorityMatcher = new Perl5Util(); |
317 | 28425 | Perl5Util matchIPV4Pat = new Perl5Util(); |
318 | ||
319 | 28425 | if (!authorityMatcher.match(AUTHORITY_PATTERN, authority)) { |
320 | 0 | return false; |
321 | } |
|
322 | ||
323 | 28425 | boolean ipV4Address = false; |
324 | 28425 | boolean hostname = false; |
325 | // check if authority is IP address or hostname |
|
326 | 28425 | String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP); |
327 | 28425 | ipV4Address = matchIPV4Pat.match(IP_V4_DOMAIN_PATTERN, hostIP); |
328 | ||
329 | 28425 | if (ipV4Address) { |
330 | // this is an IP address so check components |
|
331 | 17325 | for (int i = 1; i <= 4; i++) { |
332 | 14175 | String ipSegment = matchIPV4Pat.group(i); |
333 | 14175 | if (ipSegment == null || ipSegment.length() <= 0) { |
334 | 0 | return false; |
335 | } |
|
336 | ||
337 | try { |
|
338 | 14175 | if (Integer.parseInt(ipSegment) > 255) { |
339 | 1575 | return false; |
340 | } |
|
341 | 12600 | } catch(NumberFormatException e) { |
342 | 0 | return false; |
343 | } |
|
344 | ||
345 | } |
|
346 | } else { |
|
347 | // Domain is hostname name |
|
348 | 23700 | Perl5Util domainMatcher = new Perl5Util(); |
349 | 23700 | hostname = domainMatcher.match(DOMAIN_PATTERN, hostIP); |
350 | } |
|
351 | ||
352 | //rightmost hostname will never start with a digit. |
|
353 | 26850 | if (hostname) { |
354 | // LOW-TECH FIX FOR VALIDATOR-202 |
|
355 | // TODO: Rewrite to use ArrayList and .add semantics: see VALIDATOR-203 |
|
356 | 15789 | char[] class="keyword">chars = hostIP.toCharArray(); |
357 | 15789 | int size = 1; |
358 | 118890 | for(int i=0; i<chars.length; i++) { |
359 | 103101 | if(chars[i] == '.') { |
360 | 22119 | size++; |
361 | } |
|
362 | } |
|
363 | 15789 | String[] domainSegment = new String[size]; |
364 | 15789 | boolean match = true; |
365 | 15789 | int segmentCount = 0; |
366 | 15789 | int segmentLength = 0; |
367 | 15789 | Perl5Util atomMatcher = new Perl5Util(); |
368 | ||
369 | 85275 | while (match) { |
370 | 53697 | match = atomMatcher.match(ATOM_PATTERN, hostIP); |
371 | 53697 | if (match) { |
372 | 37908 | domainSegment[segmentCount] = atomMatcher.group(1); |
373 | 37908 | segmentLength = domainSegment[segmentCount].length() + 1; |
374 | 37908 | hostIP = |
375 | (segmentLength >= hostIP.length()) |
|
376 | ? "" |
|
377 | : hostIP.substring(segmentLength); |
|
378 | ||
379 | 37908 | segmentCount++; |
380 | } |
|
381 | } |
|
382 | 15789 | String topLevel = domainSegment[segmentCount - 1]; |
383 | 15789 | if (topLevel.length() < 2 || topLevel.length() > 4) { |
384 | 4749 | return false; |
385 | } |
|
386 | ||
387 | // First letter of top level must be a alpha |
|
388 | 11040 | Perl5Util alphaMatcher = new Perl5Util(); |
389 | 11040 | if (!alphaMatcher.match(ALPHA_PATTERN, topLevel.substring(0, 1))) { |
390 | 1575 | return false; |
391 | } |
|
392 | ||
393 | // Make sure there's a host name preceding the authority. |
|
394 | 9465 | if (segmentCount < 2) { |
395 | 1584 | return false; |
396 | } |
|
397 | } |
|
398 | ||
399 | 18942 | if (!hostname && !ipV4Address) { |
400 | 7911 | return false; |
401 | } |
|
402 | ||
403 | 11031 | String port = authorityMatcher.group(PARSE_AUTHORITY_PORT); |
404 | 11031 | if (port != null) { |
405 | 9450 | Perl5Util portMatcher = new Perl5Util(); |
406 | 9450 | if (!portMatcher.match(PORT_PATTERN, port)) { |
407 | 1575 | return false; |
408 | } |
|
409 | } |
|
410 | ||
411 | 9456 | String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA); |
412 | 9456 | if (!GenericValidator.isBlankOrNull(extra)) { |
413 | 1575 | return false; |
414 | } |
|
415 | ||
416 | 7881 | return true; |
417 | } |
|
418 | ||
419 | /** |
|
420 | * Returns true if the path is valid. A <code>null</code> value is considered invalid. |
|
421 | * @param path Path value to validate. |
|
422 | * @return true if path is valid. |
|
423 | */ |
|
424 | protected boolean isValidPath(String path) { |
|
425 | 7881 | if (path == null) { |
426 | 0 | return false; |
427 | } |
|
428 | ||
429 | 7881 | Perl5Util pathMatcher = new Perl5Util(); |
430 | ||
431 | 7881 | if (!pathMatcher.match(PATH_PATTERN, path)) { |
432 | 0 | return false; |
433 | } |
|
434 | ||
435 | 7881 | int slash2Count = countToken("//", path); |
436 | 7881 | if (this.options.isOff(ALLOW_2_SLASHES) && (slash2Count > 0)) { |
437 | 630 | return false; |
438 | } |
|
439 | ||
440 | 7251 | int slashCount = countToken("/", path); |
441 | 7251 | int dot2Count = countToken("..", path); |
442 | 7251 | if (dot2Count > 0) { |
443 | 1890 | if ((slashCount - slash2Count - 1) <= dot2Count) { |
444 | 1890 | return false; |
445 | } |
|
446 | } |
|
447 | ||
448 | 5361 | return true; |
449 | } |
|
450 | ||
451 | /** |
|
452 | * Returns true if the query is null or it's a properly formatted query string. |
|
453 | * @param query Query value to validate. |
|
454 | * @return true if query is valid. |
|
455 | */ |
|
456 | protected boolean isValidQuery(String query) { |
|
457 | 5361 | if (query == null) { |
458 | 2211 | return true; |
459 | } |
|
460 | ||
461 | 3150 | Perl5Util queryMatcher = new Perl5Util(); |
462 | 3150 | return queryMatcher.match(QUERY_PATTERN, query); |
463 | } |
|
464 | ||
465 | /** |
|
466 | * Returns true if the given fragment is null or fragments are allowed. |
|
467 | * @param fragment Fragment value to validate. |
|
468 | * @return true if fragment is valid. |
|
469 | */ |
|
470 | protected boolean isValidFragment(String fragment) { |
|
471 | 5361 | if (fragment == null) { |
472 | 4731 | return true; |
473 | } |
|
474 | ||
475 | 630 | return this.options.isOff(NO_FRAGMENTS); |
476 | } |
|
477 | ||
478 | /** |
|
479 | * Returns the number of times the token appears in the target. |
|
480 | * @param token Token value to be counted. |
|
481 | * @param target Target value to count tokens in. |
|
482 | * @return the number of tokens. |
|
483 | */ |
|
484 | protected int countToken(String token, String target) { |
|
485 | 22383 | int tokenIndex = 0; |
486 | 22383 | int count = 0; |
487 | 81014 | while (tokenIndex != -1) { |
488 | 36248 | tokenIndex = target.indexOf(token, tokenIndex); |
489 | 36248 | if (tokenIndex > -1) { |
490 | 13865 | tokenIndex++; |
491 | 13865 | count++; |
492 | } |
|
493 | } |
|
494 | 22383 | return count; |
495 | } |
|
496 | } |
This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |