1 package org.apache.turbine.services.mimetype.util;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 import java.io.File;
20 import java.io.FileInputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
23
24 import java.util.HashMap;
25 import java.util.Hashtable;
26 import java.util.Locale;
27 import java.util.Map;
28 import java.util.Properties;
29
30 /***
31 * This class maintains a set of mappers defining mappings
32 * between locales and the corresponding charsets. The mappings
33 * are defined as properties between locale and charset names.
34 * The definitions can be listed in property files located in user's
35 * home directory, Java home directory or the current class jar.
36 * In addition, this class maintains static default mappings
37 * and constructors support application specific mappings.
38 *
39 * @author <a href="mailto:ilkka.priha@simsoft.fi">Ilkka Priha</a>
40 * @version $Id: CharSetMap.java 264148 2005-08-29 14:21:04Z henning $
41 */
42 public class CharSetMap
43 {
44 /***
45 * The default charset when nothing else is applicable.
46 */
47 public static final String DEFAULT_CHARSET = "ISO-8859-1";
48
49 /***
50 * The name for charset mapper resources.
51 */
52 public static final String CHARSET_RESOURCE = "charset.properties";
53
54 /***
55 * Priorities of available mappers.
56 */
57 private static final int MAP_CACHE = 0;
58 private static final int MAP_PROG = 1;
59 private static final int MAP_HOME = 2;
60 private static final int MAP_SYS = 3;
61 private static final int MAP_JAR = 4;
62 private static final int MAP_COM = 5;
63
64 /***
65 * A common charset mapper for languages.
66 */
67 private static HashMap commonMapper = new HashMap();
68
69 static
70 {
71 commonMapper.put("ar", "ISO-8859-6");
72 commonMapper.put("be", "ISO-8859-5");
73 commonMapper.put("bg", "ISO-8859-5");
74 commonMapper.put("ca", "ISO-8859-1");
75 commonMapper.put("cs", "ISO-8859-2");
76 commonMapper.put("da", "ISO-8859-1");
77 commonMapper.put("de", "ISO-8859-1");
78 commonMapper.put("el", "ISO-8859-7");
79 commonMapper.put("en", "ISO-8859-1");
80 commonMapper.put("es", "ISO-8859-1");
81 commonMapper.put("et", "ISO-8859-1");
82 commonMapper.put("fi", "ISO-8859-1");
83 commonMapper.put("fr", "ISO-8859-1");
84 commonMapper.put("hr", "ISO-8859-2");
85 commonMapper.put("hu", "ISO-8859-2");
86 commonMapper.put("is", "ISO-8859-1");
87 commonMapper.put("it", "ISO-8859-1");
88 commonMapper.put("iw", "ISO-8859-8");
89 commonMapper.put("ja", "Shift_JIS");
90 commonMapper.put("ko", "EUC-KR");
91 commonMapper.put("lt", "ISO-8859-2");
92 commonMapper.put("lv", "ISO-8859-2");
93 commonMapper.put("mk", "ISO-8859-5");
94 commonMapper.put("nl", "ISO-8859-1");
95 commonMapper.put("no", "ISO-8859-1");
96 commonMapper.put("pl", "ISO-8859-2");
97 commonMapper.put("pt", "ISO-8859-1");
98 commonMapper.put("ro", "ISO-8859-2");
99 commonMapper.put("ru", "ISO-8859-5");
100 commonMapper.put("sh", "ISO-8859-5");
101 commonMapper.put("sk", "ISO-8859-2");
102 commonMapper.put("sl", "ISO-8859-2");
103 commonMapper.put("sq", "ISO-8859-2");
104 commonMapper.put("sr", "ISO-8859-5");
105 commonMapper.put("sv", "ISO-8859-1");
106 commonMapper.put("tr", "ISO-8859-9");
107 commonMapper.put("uk", "ISO-8859-5");
108 commonMapper.put("zh", "GB2312");
109 commonMapper.put("zh_TW", "Big5");
110 }
111
112 /***
113 * An array of available charset mappers.
114 */
115 private Map mappers[] = new Map[6];
116
117 /***
118 * Loads mappings from a stream.
119 *
120 * @param input an input stream.
121 * @return the mappings.
122 * @throws IOException for an incorrect stream.
123 */
124 protected static Map loadStream(InputStream input)
125 throws IOException
126 {
127 Properties props = new Properties();
128 props.load(input);
129 return new HashMap(props);
130 }
131
132 /***
133 * Loads mappings from a file.
134 *
135 * @param file a file.
136 * @return the mappings.
137 * @throws IOException for an incorrect file.
138 */
139 protected static Map loadFile(File file)
140 throws IOException
141 {
142 return loadStream(new FileInputStream(file));
143 }
144
145 /***
146 * Loads mappings from a file path.
147 *
148 * @param path a file path.
149 * @return the mappings.
150 * @throws IOException for an incorrect file.
151 */
152 protected static Map loadPath(String path)
153 throws IOException
154 {
155 return loadFile(new File(path));
156 }
157
158 /***
159 * Loads mappings from a resource.
160 *
161 * @param name a resource name.
162 * @return the mappings.
163 */
164 protected static Map loadResource(String name)
165 {
166 InputStream input = CharSetMap.class.getResourceAsStream(name);
167 if (input != null)
168 {
169 try
170 {
171 return loadStream(input);
172 }
173 catch (IOException x)
174 {
175 return null;
176 }
177 }
178 else
179 {
180 return null;
181 }
182 }
183
184 /***
185 * Constructs a new charset map with default mappers.
186 */
187 public CharSetMap()
188 {
189 String path;
190 try
191 {
192
193 path = System.getProperty("user.home");
194 if (path != null)
195 {
196 path = path + File.separator + CHARSET_RESOURCE;
197 mappers[MAP_HOME] = loadPath(path);
198 }
199 }
200 catch (Exception x)
201 {
202 }
203
204 try
205 {
206
207 path = System.getProperty("java.home") +
208 File.separator + "lib" + File.separator + CHARSET_RESOURCE;
209 mappers[MAP_SYS] = loadPath(path);
210 }
211 catch (Exception x)
212 {
213 }
214
215
216 mappers[MAP_JAR] = loadResource("/META-INF/" + CHARSET_RESOURCE);
217
218
219 mappers[MAP_COM] = commonMapper;
220
221
222 mappers[MAP_CACHE] = new Hashtable();
223 }
224
225 /***
226 * Contructs a charset map from properties.
227 *
228 * @param props charset mapping propeties.
229 */
230 public CharSetMap(Properties props)
231 {
232 this();
233 mappers[MAP_PROG] = new HashMap(props);
234 }
235
236 /***
237 * Contructs a charset map read from a stream.
238 *
239 * @param input an input stream.
240 * @throws IOException for an incorrect stream.
241 */
242 public CharSetMap(InputStream input)
243 throws IOException
244 {
245 this();
246 mappers[MAP_PROG] = loadStream(input);
247 }
248
249 /***
250 * Contructs a charset map read from a property file.
251 *
252 * @param file a property file.
253 * @throws IOException for an incorrect property file.
254 */
255 public CharSetMap(File file)
256 throws IOException
257 {
258 this();
259 mappers[MAP_PROG] = loadFile(file);
260 }
261
262 /***
263 * Contructs a charset map read from a property file path.
264 *
265 * @param path a property file path.
266 * @throws IOException for an incorrect property file.
267 */
268 public CharSetMap(String path)
269 throws IOException
270 {
271 this();
272 mappers[MAP_PROG] = loadPath(path);
273 }
274
275 /***
276 * Sets a locale-charset mapping.
277 *
278 * @param key the key for the charset.
279 * @param charset the corresponding charset.
280 */
281 public synchronized void setCharSet(String key,
282 String charset)
283 {
284 HashMap mapper = (HashMap) mappers[MAP_PROG];
285 mapper = mapper != null ?
286 (HashMap) mapper.clone() : new HashMap();
287 mapper.put(key, charset);
288 mappers[MAP_PROG] = mapper;
289 mappers[MAP_CACHE].clear();
290 }
291
292 /***
293 * Gets the charset for a locale. First a locale specific charset
294 * is searched for, then a country specific one and lastly a language
295 * specific one. If none is found, the default charset is returned.
296 *
297 * @param locale the locale.
298 * @return the charset.
299 */
300 public String getCharSet(Locale locale)
301 {
302
303 String key = locale.toString();
304 if (key.length() == 0)
305 {
306 key = "__" + locale.getVariant();
307 if (key.length() == 2)
308 {
309 return DEFAULT_CHARSET;
310 }
311 }
312 String charset = searchCharSet(key);
313 if (charset.length() == 0)
314 {
315
316 String[] items = new String[3];
317 items[2] = locale.getVariant();
318 items[1] = locale.getCountry();
319 items[0] = locale.getLanguage();
320 charset = searchCharSet(items);
321 if (charset.length() == 0)
322 {
323 charset = DEFAULT_CHARSET;
324 }
325 mappers[MAP_CACHE].put(key, charset);
326 }
327 return charset;
328 }
329
330 /***
331 * Gets the charset for a locale with a variant. The search
332 * is performed in the following order:
333 * "lang"_"country"_"variant"="charset",
334 * _"counry"_"variant"="charset",
335 * "lang"__"variant"="charset",
336 * __"variant"="charset",
337 * "lang"_"country"="charset",
338 * _"country"="charset",
339 * "lang"="charset".
340 * If nothing of the above is found, the default charset is returned.
341 *
342 * @param locale the locale.
343 * @param variant a variant field.
344 * @return the charset.
345 */
346 public String getCharSet(Locale locale,
347 String variant)
348 {
349
350 if ((variant != null) &&
351 (variant.length() > 0))
352 {
353 String key = locale.toString();
354 if (key.length() == 0)
355 {
356 key = "__" + locale.getVariant();
357 if (key.length() > 2)
358 {
359 key += '_' + variant;
360 }
361 else
362 {
363 key += variant;
364 }
365 }
366 else if (locale.getCountry().length() == 0)
367 {
368 key += "__" + variant;
369 }
370 else
371 {
372 key += '_' + variant;
373 }
374 String charset = searchCharSet(key);
375 if (charset.length() == 0)
376 {
377
378 String[] items = new String[4];
379 items[3] = variant;
380 items[2] = locale.getVariant();
381 items[1] = locale.getCountry();
382 items[0] = locale.getLanguage();
383 charset = searchCharSet(items);
384 if (charset.length() == 0)
385 {
386 charset = DEFAULT_CHARSET;
387 }
388 mappers[MAP_CACHE].put(key, charset);
389 }
390 return charset;
391 }
392 else
393 {
394 return getCharSet(locale);
395 }
396 }
397
398 /***
399 * Gets the charset for a specified key.
400 *
401 * @param key the key for the charset.
402 * @return the found charset or the default one.
403 */
404 public String getCharSet(String key)
405 {
406 String charset = searchCharSet(key);
407 return charset.length() > 0 ? charset : DEFAULT_CHARSET;
408 }
409
410 /***
411 * Gets the charset for a specified key.
412 *
413 * @param key the key for the charset.
414 * @param def the default charset if none is found.
415 * @return the found charset or the given default.
416 */
417 public String getCharSet(String key,
418 String def)
419 {
420 String charset = searchCharSet(key);
421 return charset.length() > 0 ? charset : def;
422 }
423
424 /***
425 * Searches for a charset for a specified locale.
426 *
427 * @param items an array of locale items.
428 * @return the found charset or an empty string.
429 */
430 private String searchCharSet(String[] items)
431 {
432 String charset;
433 StringBuffer sb = new StringBuffer();
434 for (int i = items.length; i > 0; i--)
435 {
436 charset = searchCharSet(items, sb, i);
437 if (charset.length() > 0)
438 {
439 return charset;
440 }
441 sb.setLength(0);
442 }
443 return "";
444 }
445
446 /***
447 * Searches recursively for a charset for a specified locale.
448 *
449 * @param items an array of locale items.
450 * @param base a buffer of base items.
451 * @param count the number of items to go through.
452 * @return the found charset or an empty string.
453 */
454 private String searchCharSet(String[] items,
455 StringBuffer base,
456 int count)
457 {
458 if ((--count >= 0) &&
459 (items[count] != null) &&
460 (items[count].length() > 0))
461 {
462 String charset;
463 base.insert(0, items[count]);
464 int length = base.length();
465 for (int i = count; i > 0; i--)
466 {
467 if ((i == count) ||
468 (i <= 1))
469 {
470 base.insert(0, '_');
471 length++;
472 }
473 charset = searchCharSet(items, base, i);
474 if (charset.length() > 0)
475 {
476 return charset;
477 }
478 base.delete(0, base.length() - length);
479 }
480 return searchCharSet(base.toString());
481 }
482 else
483 {
484 return "";
485 }
486 }
487
488 /***
489 * Searches for a charset for a specified key.
490 *
491 * @param key the key for the charset.
492 * @return the found charset or an empty string.
493 */
494 private String searchCharSet(String key)
495 {
496 if ((key != null) &&
497 (key.length() > 0))
498 {
499
500 Map mapper;
501 String charset;
502 for (int i = 0; i < mappers.length; i++)
503 {
504 mapper = mappers[i];
505 if (mapper != null)
506 {
507 charset = (String) mapper.get(key);
508 if (charset != null)
509 {
510
511 if (i > MAP_CACHE)
512 {
513 mappers[MAP_CACHE].put(key, charset);
514 }
515 return charset;
516 }
517 }
518 }
519
520
521 mappers[MAP_CACHE].put(key, "");
522 }
523 return "";
524 }
525
526 /***
527 * Sets a common locale-charset mapping.
528 *
529 * @param key the key for the charset.
530 * @param charset the corresponding charset.
531 */
532 protected synchronized void setCommonCharSet(String key,
533 String charset)
534 {
535 HashMap mapper = (HashMap) ((HashMap) mappers[MAP_COM]).clone();
536 mapper.put(key, charset);
537 mappers[MAP_COM] = mapper;
538 mappers[MAP_CACHE].clear();
539 }
540 }