Imported GNU Classpath 0.90
[official-gcc.git] / libjava / classpath / gnu / java / awt / font / opentype / NameDecoder.java
blobbc0c0df099553239fbdda878db920b74e4f7df62
1 /* NameDecoder.java -- Decodes names of OpenType and TrueType fonts.
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package gnu.java.awt.font.opentype;
40 import java.io.UnsupportedEncodingException;
41 import java.nio.ByteBuffer;
42 import java.util.Locale;
45 /**
46 * A utility class that helps with decoding the names of OpenType
47 * and TrueType fonts.
49 * @author Sascha Brawer (brawer@dandelis.ch)
51 class NameDecoder
53 public static final int NAME_COPYRIGHT = 0;
56 /**
57 * Specifies the name of the family to which a font belongs, for
58 * example “Univers”.
60 public static final int NAME_FAMILY = 1;
63 /**
64 * Specified the name of the font inside its family, for
65 * example “Light”.
67 public static final int NAME_SUBFAMILY = 2;
70 public static final int NAME_UNIQUE = 3;
73 /**
74 * Specifies the full human-readable name of a font, for example
75 * “Univers Light”
77 public static final int NAME_FULL = 4;
80 public static final int NAME_VERSION = 5;
83 /**
84 * Specifies the PostScript name of a font, for example
85 * “Univers-Light”.
87 public static final int NAME_POSTSCRIPT = 6;
90 public static final int NAME_TRADEMARK = 7;
91 public static final int NAME_MANUFACTURER = 8;
92 public static final int NAME_DESIGNER = 9;
93 public static final int NAME_DESCRIPTION = 10;
94 public static final int NAME_VENDOR_URL = 11;
95 public static final int NAME_DESIGNER_URL = 12;
96 public static final int NAME_LICENSE = 13;
97 public static final int NAME_LICENSE_URL = 14;
98 public static final int NAME_PREFERRED_FAMILY = 16;
99 public static final int NAME_PREFERRED_SUBFAMILY = 17;
100 public static final int NAME_FULL_MACCOMPATIBLE = 18;
101 public static final int NAME_SAMPLE_TEXT = 19;
102 public static final int NAME_POSTSCRIPT_CID = 20;
105 private static final int PLATFORM_MACINTOSH = 1;
106 private static final int PLATFORM_MICROSOFT = 3;
109 public static String getName(ByteBuffer nameTable,
110 int name, Locale locale)
112 int numRecords;
113 int macLanguage, msLanguage;
114 int offset;
115 int namePlatform, nameEncoding, nameLanguage, nameID, nameLen;
116 int nameStart;
117 String result;
118 boolean match;
120 if (nameTable == null)
121 return null;
123 nameTable.position(0);
124 /* We understand only format 0 of the name table. */
125 if (nameTable.getChar() != 0)
126 return null;
128 macLanguage = getMacLanguageCode(locale);
129 msLanguage = getMicrosoftLanguageCode(locale);
130 numRecords = nameTable.getChar();
131 offset = nameTable.getChar();
133 for (int i = 0; i < numRecords; i++)
135 namePlatform = nameTable.getChar();
136 nameEncoding = nameTable.getChar();
137 nameLanguage = nameTable.getChar();
138 nameID = nameTable.getChar();
139 nameLen = nameTable.getChar();
140 nameStart = offset + nameTable.getChar();
143 if (nameID != name)
144 continue;
146 match = false;
147 switch (namePlatform)
149 case PLATFORM_MACINTOSH:
150 if ((nameLanguage == macLanguage) || (locale == null))
151 match = true;
152 else
154 switch (macLanguage)
156 case 49: /* Azerbaijani/Cyrillic */
157 match = (nameLanguage == /* Azerbaijani/Arabic */ 50)
158 || (nameLanguage == /* Azerbaijani/Roman */ 150);
159 break;
161 case 57: /* Mongolian/Mongolian */
162 match = (nameLanguage == /* Mongolian/Cyrillic */ 58);
163 break;
165 case 83: /* Malay/Roman */
166 match = (nameLanguage == /* Malay/Arabic */ 84);
167 break;
170 break;
172 case PLATFORM_MICROSOFT:
173 if (((nameLanguage & 0xff) == msLanguage) || (locale == null))
174 match = true;
175 break;
179 if (match)
181 result = decodeName(namePlatform, nameEncoding, nameLanguage,
182 nameTable, nameStart, nameLen);
183 if (result != null)
184 return result;
188 return null;
193 * The language codes used by the Macintosh operating system. MacOS
194 * defines numeric language identifiers in the range [0 .. 95] and
195 * [128 .. 150]. To map this numeric identifier into an ISO 639
196 * language code, multiply it by two and take the substring at that
197 * position.
199 * <p>ISO 639 has revised the code for some languages, namely
200 * <code>he</code> for Hebrew (formerly <code>iw</code>),
201 * <code>yi</code> (formerly <code>ji</code>), and <code>id</code>
202 * for Indonesian (formerly <code>in</code>). In those cases, this
203 * table intentionally contains the older, obsolete code. The
204 * reason is that this is the code which
205 * java.util.Locale.getLanguage() is specified to return. The
206 * implementation of {@link #getMacLanguageCode} depends on this.
208 * @see <a href=
209 * "http://www.unicode.org/unicode/onlinedat/languages.html"
210 * >Language Codes: ISO 639, Microsoft and Macintosh</a>
212 private static final String macLanguageCodes
213 // 0 1 2
214 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
215 = "enfrdeitnlsvesdaptnoiwjaarfielismttrhrzhurhithkoltplhuetlv "
217 // 3 4 5
218 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
219 + "fofaruzhnlgdsqrocssksljisrmkbgukbeuzkkazazhykamokytgtkmnmnps"
221 // 6 7 8
222 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
223 + "kukssdbonesamrbnasgupaormlkntatesimykmloviintlmsmsamti sosw"
225 // 9 10 11
226 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
227 + "rwrn mgeo "
229 // 12 13 14
230 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
231 + " cyeucalaqugnayttugtsjwsuglafbriugdgvgatoelkl"
233 // 15
234 // 0
235 + "az";
239 * The primary language IDs used by the Microsoft operating systems.
241 * <p>ISO 639 has revised the code for some languages, namely
242 * <code>he</code> for Hebrew (formerly <code>iw</code>),
243 * <code>yi</code> (formerly <code>ji</code>), and <code>id</code>
244 * for Indonesian (formerly <code>in</code>). In those cases, this
245 * table intentionally contains the older, obsolete code. The
246 * reason is that this is the code which
247 * java.util.Locale.getLanguage() is specified to return. The
248 * implementation of {@link #getMicrosoftLanguageCode} depends on
249 * this.
251 * @see <a href=
252 * "http://www.unicode.org/unicode/onlinedat/languages.html"
253 * >Language Codes: ISO 639, Microsoft and Macintosh</a>
255 private static final String microsoftLanguageCodes
256 // 0 1 2
257 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
258 = " arbgcazhcsdadeelenesfifriwhuisitjakonlnoplptrmrorushsksqsv"
260 // 3 4 5
261 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
262 + "thtrurinukbesletlvlttgfavihyazeu mk ts xhzuafkafohimt "
264 // 6 7 8
265 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
266 + "gajimskkkyswtkuzttbnpaguortateknmlasmrsamnbocykmlomygl sd"
268 // 9 10 11
269 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
270 + " si iuam ksnefypstl ha yo omtign laso";
274 * Maps a Java Locale into a MacOS language code.
276 * <p>For languages that are written in several script systems,
277 * MacOS defines multiple language codes. Java Locales have a
278 * variant which could be used for that purpose, but a small
279 * test program revealed that with Sun's JDK 1.4.1_01, only two
280 * of 134 available Locales have a variant tag (namely no_NO_NY
281 * and th_TH_TH).</p>
283 * <p>The following cases are problematic:
285 * <ul> <li>Azerbaijani (az): The MacOS language code is 49 if
286 * Azerbaijani is written in the Cyrillic script; 50 if written in
287 * the Arabic script; 150 if written in the Roman script. This
288 * method will always return 49 for the Azerbaijani locale.</li>
290 * <li>Mongolian (mn): The MacOS language code is 57 if Mongolian is
291 * written in the Mongolian script; 58 if written in the Cyrillic
292 * script. This method will always return 57 for the Mongolian
293 * locale.</li>
295 * <li>Malay (ms): The MacOS language code is 83 if Malay is written
296 * in the Roman script; 84 if written in the Arabic script. This
297 * method will always return 83 for the Malay locale.</li> </ul>
299 * @return a MacOS language code, or -1 if there is no such code for
300 * <code>loc</code>&#x2019;s language.
302 private static int getMacLanguageCode(Locale loc)
304 int code;
306 if (loc == null)
307 return -1;
309 code = findLanguageCode(loc.getLanguage(), macLanguageCodes);
310 switch (code)
312 case 19:
313 /* Traditional Chinese (MacOS language #19) and and Simplified
314 * Chinese (MacOS language #33) both have "zh" as their ISO 639
315 * code.
317 if (loc.equals(Locale.SIMPLIFIED_CHINESE))
318 code = 33;
319 break;
321 // Other special cases would be 49, 57 and 83, but we do not
322 // know what do do about them. See the method documentation for
323 // details.
326 return code;
331 * Maps a Java Locale into a Microsoft language code.
333 private static int getMicrosoftLanguageCode(Locale locale)
335 String isoCode;
336 int code;
338 if (locale == null)
339 return -1;
341 isoCode = locale.getLanguage();
342 code = findLanguageCode(isoCode, microsoftLanguageCodes);
343 if (code == -1)
345 if (isoCode.equals("hr") || isoCode.equals("sr"))
347 /* Microsoft uses code 26 for "sh" (Serbo-Croatian),
348 * "hr" (Croatian) and "sr" (Serbian). Our table contains
349 * "sh".
351 code = 26;
353 else if (isoCode.equals("gd"))
355 /* Microsoft uses code 60 for "gd" (Scottish Gaelic) and
356 * "ga" (Irish Gaelic). Out table contains "ga".
358 code = 60;
361 return code;
365 private static int findLanguageCode(String lang, String langCodes)
367 int index;
368 if (lang == null)
369 return -1;
371 if (lang.length() != 2)
372 return -1;
374 index = 0;
377 index = langCodes.indexOf(lang, index);
379 /* The index must be even to be considered a match. Otherwise, we
380 * could match with the second letter of one language and the
381 * first of antoher one.
384 while (!((index < 0) || ((index & 1) == 0)));
385 if (index < 0)
386 return -1;
388 index = index / 2;
389 return index;
393 private static String decodeName(int platform, int encoding, int language,
394 ByteBuffer buffer, int offset, int len)
396 byte[] byteBuf;
397 String charsetName;
398 int oldPosition;
400 charsetName = getCharsetName(platform, language, encoding);
401 if (charsetName == null)
402 return null;
404 byteBuf = new byte[len];
405 oldPosition = buffer.position();
408 buffer.position(offset);
409 buffer.get(byteBuf);
412 return new String(byteBuf, charsetName);
414 catch (UnsupportedEncodingException uex)
418 finally
420 buffer.position(oldPosition);
423 return null;
428 * Maps a MacOS language code into a Java Locale.
430 * @param macLanguageCode the MacOS language code for
431 * the language whose Java locale is to be retrieved.
433 * @return an suitable Locale, or <code>null</code> if
434 * the mapping cannot be performed.
436 private static Locale getMacLocale(int macLanguageCode)
438 String isoCode;
440 switch (macLanguageCode)
442 case 0: return Locale.ENGLISH;
443 case 1: return Locale.FRENCH;
444 case 2: return Locale.GERMAN;
445 case 3: return Locale.ITALIAN;
446 case 11: return Locale.JAPANESE;
447 case 23: return Locale.KOREAN;
448 case 19: return Locale.TRADITIONAL_CHINESE;
449 case 33: return Locale.SIMPLIFIED_CHINESE;
452 if ((macLanguageCode < 0) || (macLanguageCode > 150))
453 return null;
455 isoCode = macLanguageCodes.substring(macLanguageCode << 1,
456 (macLanguageCode + 1) << 1);
457 if (isoCode.charAt(0) == ' ')
458 return null;
460 return new Locale(isoCode);
466 * Maps a Windows LCID into a Java Locale.
468 * @param lcid the Windows language ID whose Java locale
469 * is to be retrieved.
471 * @return an suitable Locale, or <code>null</code> if
472 * the mapping cannot be performed.
474 private static Locale getWindowsLocale(int lcid)
476 /* FIXME: This is grossly incomplete. */
477 switch (lcid)
479 case 0x0407: return Locale.GERMAN;
480 case 0x0408: return new Locale("el", "GR");
481 case 0x0409: return Locale.ENGLISH;
482 case 0x040b: return new Locale("fi");
483 case 0x040c: return Locale.FRENCH;
484 case 0x0416: return new Locale("pt");
485 case 0x0807: return new Locale("de", "CH");
486 case 0x0809: return new Locale("en", "UK");
487 case 0x080c: return new Locale("fr", "BE");
488 case 0x0816: return new Locale("pt", "BR");
489 case 0x0c07: return new Locale("de", "AT");
490 case 0x0c09: return new Locale("en", "AU");
491 case 0x0c0c: return new Locale("fr", "CA");
492 case 0x1007: return new Locale("de", "LU");
493 case 0x1009: return new Locale("en", "CA");
494 case 0x100c: return new Locale("fr", "CH");
495 case 0x1407: return new Locale("de", "LI");
496 case 0x1409: return new Locale("en", "NZ");
497 case 0x140c: return new Locale("fr", "LU");
498 case 0x1809: return new Locale("en", "IE");
500 default:
501 return null;
507 * Maps a Macintosh Script Manager code to the name of the
508 * corresponding Java Charset.
510 * @param macScript a MacOS ScriptCode, for example
511 * 6 for <code>smGreek</code>.
513 * @return a String that can be used to retrieve a Java
514 * CharsetDecorder, for example <code>MacGreek</code>, or
515 * <code>null</code> if <code>macScript</code> has an
516 * unsupported value.
518 private static String getMacCharsetName(int macScript)
520 switch (macScript)
522 case 0: return "MacRoman";
523 case 1: return "MacJapanese";
524 case 2: return "MacKorean";
525 case 3: return "MacTradChinese";
526 case 4: return "MacArabic";
527 case 5: return "MacHebrew";
528 case 6: return "MacGreek";
529 case 7: return "MacCyrillic";
530 case 8: return "MacRSymbol";
531 case 9: return "MacDevanagari";
532 case 10: return "MacGurmukhi";
533 case 11: return "MacGujarati";
534 case 12: return "MacOriya";
535 case 13: return "MacBengali";
536 case 14: return "MacTamil";
537 case 15: return "MacTelugu";
538 case 16: return "MacKannada";
539 case 17: return "MacMalayalam";
540 case 18: return "MacSinhalese";
541 case 19: return "MacBurmese";
542 case 20: return "MacKhmer";
543 case 21: return "MacThai";
544 case 22: return "MacLao";
545 case 23: return "MacGeorgian";
546 case 24: return "MacArmenian";
547 case 25: return "MacSimpChinese";
548 case 26: return "MacTibetan";
549 case 27: return "MacMongolian";
550 case 28: return "MacEthiopic";
551 case 29: return "MacCentralEurope";
552 case 30: return "MacVietnamese";
553 case 31: return "MacExtArabic";
555 default: return null;
561 * Maps a Microsoft locale ID (LCID) to the name of the
562 * corresponding Java Charset.
564 * @param lcid the Microsoft locale ID.
566 * @return a String that can be used to retrieve a Java
567 * CharsetDecorder, for example <code>windows-1252</code>, or
568 * <code>null</code> if <code>lcid</code> has an unsupported value.
570 private static String getMicrosoftCharsetName(int lcid)
572 int lang;
573 char codePage = '?';
575 /* Extract the language code from the LCID. */
576 lang = lcid & 0x3ff;
578 /* In the majority of cases, the language alone determines the
579 * codepage.
581 if (lang < 100)
582 codePage = (" 612D022322225022EC2202201?002A462110777 68 ?2 1 "
583 + " 2 2 2112 ?1 1 2 2 ")
584 .charAt(lang);
586 /* There are a few exceptions, however, where multiple code pages
587 * are used for the same language. */
588 if (codePage == '?')
590 switch (lcid)
592 case 0x041a: // Croatian --> Windows-1250 (Central Europe)
593 case 0x081a: // Serbian (Latin) --> Windows-1250 (Central Europe)
594 codePage = '0';
595 break;
597 case 0x42c: // Azeri (Latin) --> Windows-1254 (Turkish)
598 case 0x443: // Uzbek (Latin) --> Windows-1254 (Turkish)
599 codePage = '4';
600 break;
602 case 0x82c: // Azeri (Cyrillic) --> Windows-1251 (Cyrillic)
603 case 0x843: // Uzbek (Cyrillic) --> Windows-1251 (Cyrillic)
604 case 0xc1a: // Serbian (Cyrillic) --> Windows-1251 (Cyrillic)
605 codePage = '1';
606 break;
610 switch (codePage)
612 case '0': return "windows-1250"; // Central Europe
613 case '1': return "windows-1251"; // Cyrillic
614 case '2': return "windows-1252"; // Latin 1
615 case '3': return "windows-1253"; // Greek
616 case '4': return "windows-1254"; // Turkish
617 case '5': return "windows-1255"; // Hebrew
618 case '6': return "windows-1256"; // Arabic
619 case '7': return "windows-1257"; // Baltic
620 case '8': return "windows-1258"; // Vietnam
621 case 'A': return "windows-874"; // Thai
622 case 'B': return "windows-936"; // Simplified Chinese, GBK
623 case 'C': return "windows-949"; // Korean
624 case 'D': return "windows-950"; // Traditional Chinese, Big5
625 case 'E': return "windows-932"; // Japanese Shift-JIS
626 default: return null;
632 * Returns the Locale of an OpenType name.
634 * @param platform the OpenType platform ID.
636 * @param language the language tag of the OpenType name. If
637 * <code>platform</code> is 1, this is the MacOS language code.
639 * @param encoding the encoding tag of the OpenType name. If
640 * <code>platform</code> is 1, this is the MacOS script code.
642 public static Locale getLocale(int platform, int language, int encoding)
644 switch (platform)
646 case 1: /* Apple Macintosh */
647 return getMacLocale(language);
649 case 3: /* Microsoft Windows */
650 return getWindowsLocale(language);
652 default:
653 return null;
659 * Determines the name of the charset for an OpenType font name.
661 * @param platform the OpenType platform ID.
663 * @param language the language tag of the OpenType name. If
664 * <code>platform</code> is 1, this is the MacOS language code.
666 * @param encoding the encoding tag of the OpenType name. If
667 * <code>platform</code> is 1, this is the MacOS script code.
669 * @return a charset name such as <code>&quot;MacRoman&quot;</code>,
670 * or <code>null</code> if the combination is not known.
672 public static String getCharsetName(int platform, int language, int encoding)
674 switch (platform)
676 case 1: /* Apple Macintosh */
677 return getMacCharsetName(encoding);
679 case 3: /* Microsoft Windows */
680 return getMicrosoftCharsetName(language);
682 default:
683 return null;