View Javadoc

1   /*
2    * $Id: TextUtil.java 720222 2008-11-24 16:41:07Z musachy $
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *  http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  package org.apache.struts2.views.util;
23  
24  
25  /***
26   * This class handles HTML escaping of text.
27   * It was written and optimized to be as fast as possible.
28   *
29   */
30  public class TextUtil {
31  
32      protected static final int MAX_LENGTH = 300;
33  
34      /***
35       * We use arrays of char in the lookup table because it is faster
36       * appending this to a StringBuilder than appending a String
37       */
38      protected static final char[][] _stringChars = new char[MAX_LENGTH][];
39  
40      static {
41          // Initialize the mapping table
42          initMapping();
43      }
44  
45  
46      /***
47       * Call escapeHTML(s, false)
48       */
49      public static final String escapeHTML(String s) {
50          return escapeHTML(s, false);
51      }
52  
53      /***
54       * Escape HTML.
55       *
56       * @param s           string to be escaped
57       * @param escapeEmpty if true, then empty string will be escaped.
58       */
59      public static final String escapeHTML(String s, boolean escapeEmpty) {
60          int len = s.length();
61  
62          if (len == 0) {
63              return s;
64          }
65  
66          if (!escapeEmpty) {
67              String trimmed = s.trim();
68  
69              if ((trimmed.length() == 0) || ("\"\"").equals(trimmed)) {
70                  return s;
71              }
72          }
73  
74          int i = 0;
75  
76          // First loop through String and check if escaping is needed at all
77          // No buffers are copied at this time
78          do {
79              int index = s.charAt(i);
80  
81              if (index >= MAX_LENGTH) {
82                  if (index != 0x20AC) { // If not euro symbol
83  
84                      continue;
85                  }
86  
87                  break;
88              } else if (_stringChars[index] != null) {
89                  break;
90              }
91          } while (++i < len);
92  
93          // If the check went to the end with no escaping then i should be == len now
94          // otherwise we must continue escaping for real
95          if (i == len) {
96              return s;
97          }
98  
99          // We found a character to escape and broke out at position i
100         // Now copy all characters before that to StringBuilder sb
101         // Since a char[] will be used for copying we might as well get
102         // a complete copy of it so that we can use array indexing instead of charAt
103         StringBuilder sb = new StringBuilder(len + 40);
104         char[] chars = new char[len];
105 
106         // Copy all chars from the String s to the chars buffer
107         s.getChars(0, len, chars, 0);
108 
109         // Append the first i characters that we have checked to the resulting StringBuilder
110         sb.append(chars, 0, i);
111 
112         int last = i;
113         char[] subst;
114 
115         for (; i < len; i++) {
116             char c = chars[i];
117             int index = c;
118 
119             if (index < MAX_LENGTH) {
120                 subst = _stringChars[index];
121 
122                 // It is faster to append a char[] than a String which is why we use this
123                 if (subst != null) {
124                     if (i > last) {
125                         sb.append(chars, last, i - last);
126                     }
127 
128                     sb.append(subst);
129                     last = i + 1;
130                 }
131             }
132             // Check if it is the euro symbol. This could be changed to check in a second lookup
133             // table in case one wants to convert more characters in that area
134             else if (index == 0x20AC) {
135                 if (i > last) {
136                     sb.append(chars, last, i - last);
137                 }
138 
139                 sb.append("&euro;");
140                 last = i + 1;
141             }
142         }
143 
144         if (i > last) {
145             sb.append(chars, last, i - last);
146         }
147 
148         return sb.toString();
149     }
150 
151     protected static void addMapping(int c, String txt, String[] strings) {
152         strings[c] = txt;
153     }
154 
155     protected static void initMapping() {
156         String[] strings = new String[MAX_LENGTH];
157 
158         addMapping(0x22, "&quot;", strings); // "
159         addMapping(0x26, "&amp;", strings); // &
160         addMapping(0x3c, "&lt;", strings); // <
161         addMapping(0x3e, "&gt;", strings); // >
162 
163         addMapping(0xa1, "&iexcl;", strings); //
164         addMapping(0xa2, "&cent;", strings); //
165         addMapping(0xa3, "&pound;", strings); //
166         addMapping(0xa9, "&copy;", strings); //
167         addMapping(0xae, "&reg;", strings); //
168         addMapping(0xbf, "&iquest;", strings); //
169 
170         addMapping(0xc0, "&Agrave;", strings); //
171         addMapping(0xc1, "&Aacute;", strings); //
172         addMapping(0xc2, "&Acirc;", strings); //
173         addMapping(0xc3, "&Atilde;", strings); //
174         addMapping(0xc4, "&Auml;", strings); //
175         addMapping(0xc5, "&Aring;", strings); //
176         addMapping(0xc6, "&AElig;", strings); //
177         addMapping(0xc7, "&Ccedil;", strings); //
178         addMapping(0xc8, "&Egrave;", strings); //
179         addMapping(0xc9, "&Eacute;", strings); //
180         addMapping(0xca, "&Ecirc;", strings); //
181         addMapping(0xcb, "&Euml;", strings); //
182         addMapping(0xcc, "&Igrave;", strings); //
183         addMapping(0xcd, "&Iacute;", strings); //
184         addMapping(0xce, "&Icirc;", strings); //
185         addMapping(0xcf, "&Iuml;", strings); //
186 
187         addMapping(0xd0, "&ETH;", strings); //
188         addMapping(0xd1, "&Ntilde;", strings); //
189         addMapping(0xd2, "&Ograve;", strings); //
190         addMapping(0xd3, "&Oacute;", strings); //
191         addMapping(0xd4, "&Ocirc;", strings); //
192         addMapping(0xd5, "&Otilde;", strings); //
193         addMapping(0xd6, "&Ouml;", strings); //
194         addMapping(0xd7, "&times;", strings); //
195         addMapping(0xd8, "&Oslash;", strings); //
196         addMapping(0xd9, "&Ugrave;", strings); //
197         addMapping(0xda, "&Uacute;", strings); //
198         addMapping(0xdb, "&Ucirc;", strings); //
199         addMapping(0xdc, "&Uuml;", strings); //
200         addMapping(0xdd, "&Yacute;", strings); //
201         addMapping(0xde, "&THORN;", strings); //
202         addMapping(0xdf, "&szlig;", strings); //
203 
204         addMapping(0xe0, "&agrave;", strings); //
205         addMapping(0xe1, "&aacute;", strings); //
206         addMapping(0xe2, "&acirc;", strings); //
207         addMapping(0xe3, "&atilde;", strings); //
208         addMapping(0xe4, "&auml;", strings); //
209         addMapping(0xe5, "&aring;", strings); //
210         addMapping(0xe6, "&aelig;", strings); //
211         addMapping(0xe7, "&ccedil;", strings); //
212         addMapping(0xe8, "&egrave;", strings); //
213         addMapping(0xe9, "&eacute;", strings); //
214         addMapping(0xea, "&ecirc;", strings); //
215         addMapping(0xeb, "&euml;", strings); //
216         addMapping(0xec, "&igrave;", strings); //
217         addMapping(0xed, "&iacute;", strings); //
218         addMapping(0xee, "&icirc;", strings); //
219         addMapping(0xef, "&iuml;", strings); //
220 
221         addMapping(0xf0, "&eth;", strings); //
222         addMapping(0xf1, "&ntilde;", strings); //
223         addMapping(0xf2, "&ograve;", strings); //
224         addMapping(0xf3, "&oacute;", strings); //
225         addMapping(0xf4, "&ocirc;", strings); //
226         addMapping(0xf5, "&otilde;", strings); //
227         addMapping(0xf6, "&ouml;", strings); //
228         addMapping(0xf7, "&divide;", strings); //
229         addMapping(0xf8, "&oslash;", strings); //
230         addMapping(0xf9, "&ugrave;", strings); //
231         addMapping(0xfa, "&uacute;", strings); //
232         addMapping(0xfb, "&ucirc;", strings); //
233         addMapping(0xfc, "&uuml;", strings); //
234         addMapping(0xfd, "&yacute;", strings); //
235         addMapping(0xfe, "&thorn;", strings); //
236         addMapping(0xff, "&yuml;", strings); //
237 
238         for (int i = 0; i < strings.length; i++) {
239             String str = strings[i];
240 
241             if (str != null) {
242                 _stringChars[i] = str.toCharArray();
243             }
244         }
245     }
246 }