1 /* NameDecoder.java -- Decodes names of OpenType and TrueType fonts.
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package gnu
.java
.awt
.font
.opentype
;
40 import java
.io
.UnsupportedEncodingException
;
41 import java
.nio
.ByteBuffer
;
42 import java
.util
.Locale
;
46 * A utility class that helps with decoding the names of OpenType
49 * @author Sascha Brawer (brawer@dandelis.ch)
53 public static final int NAME_COPYRIGHT
= 0;
57 * Specifies the name of the family to which a font belongs, for
58 * example “Univers”.
60 public static final int NAME_FAMILY
= 1;
64 * Specified the name of the font inside its family, for
65 * example “Light”.
67 public static final int NAME_SUBFAMILY
= 2;
70 public static final int NAME_UNIQUE
= 3;
74 * Specifies the full human-readable name of a font, for example
75 * “Univers Light”
77 public static final int NAME_FULL
= 4;
80 public static final int NAME_VERSION
= 5;
84 * Specifies the PostScript name of a font, for example
85 * “Univers-Light”.
87 public static final int NAME_POSTSCRIPT
= 6;
90 public static final int NAME_TRADEMARK
= 7;
91 public static final int NAME_MANUFACTURER
= 8;
92 public static final int NAME_DESIGNER
= 9;
93 public static final int NAME_DESCRIPTION
= 10;
94 public static final int NAME_VENDOR_URL
= 11;
95 public static final int NAME_DESIGNER_URL
= 12;
96 public static final int NAME_LICENSE
= 13;
97 public static final int NAME_LICENSE_URL
= 14;
98 public static final int NAME_PREFERRED_FAMILY
= 16;
99 public static final int NAME_PREFERRED_SUBFAMILY
= 17;
100 public static final int NAME_FULL_MACCOMPATIBLE
= 18;
101 public static final int NAME_SAMPLE_TEXT
= 19;
102 public static final int NAME_POSTSCRIPT_CID
= 20;
105 private static final int PLATFORM_MACINTOSH
= 1;
106 private static final int PLATFORM_MICROSOFT
= 3;
109 public static String
getName(ByteBuffer nameTable
,
110 int name
, Locale locale
)
113 int macLanguage
, msLanguage
;
115 int namePlatform
, nameEncoding
, nameLanguage
, nameID
, nameLen
;
120 if (nameTable
== null)
123 nameTable
.position(0);
124 /* We understand only format 0 of the name table. */
125 if (nameTable
.getChar() != 0)
128 macLanguage
= getMacLanguageCode(locale
);
129 msLanguage
= getMicrosoftLanguageCode(locale
);
130 numRecords
= nameTable
.getChar();
131 offset
= nameTable
.getChar();
133 for (int i
= 0; i
< numRecords
; i
++)
135 namePlatform
= nameTable
.getChar();
136 nameEncoding
= nameTable
.getChar();
137 nameLanguage
= nameTable
.getChar();
138 nameID
= nameTable
.getChar();
139 nameLen
= nameTable
.getChar();
140 nameStart
= offset
+ nameTable
.getChar();
147 switch (namePlatform
)
149 case PLATFORM_MACINTOSH
:
150 if ((nameLanguage
== macLanguage
) || (locale
== null))
156 case 49: /* Azerbaijani/Cyrillic */
157 match
= (nameLanguage
== /* Azerbaijani/Arabic */ 50)
158 || (nameLanguage
== /* Azerbaijani/Roman */ 150);
161 case 57: /* Mongolian/Mongolian */
162 match
= (nameLanguage
== /* Mongolian/Cyrillic */ 58);
165 case 83: /* Malay/Roman */
166 match
= (nameLanguage
== /* Malay/Arabic */ 84);
172 case PLATFORM_MICROSOFT
:
173 if (((nameLanguage
& 0xff) == msLanguage
) || (locale
== null))
181 result
= decodeName(namePlatform
, nameEncoding
, nameLanguage
,
182 nameTable
, nameStart
, nameLen
);
193 * The language codes used by the Macintosh operating system. MacOS
194 * defines numeric language identifiers in the range [0 .. 95] and
195 * [128 .. 150]. To map this numeric identifier into an ISO 639
196 * language code, multiply it by two and take the substring at that
199 * <p>ISO 639 has revised the code for some languages, namely
200 * <code>he</code> for Hebrew (formerly <code>iw</code>),
201 * <code>yi</code> (formerly <code>ji</code>), and <code>id</code>
202 * for Indonesian (formerly <code>in</code>). In those cases, this
203 * table intentionally contains the older, obsolete code. The
204 * reason is that this is the code which
205 * java.util.Locale.getLanguage() is specified to return. The
206 * implementation of {@link #getMacLanguageCode} depends on this.
209 * "http://www.unicode.org/unicode/onlinedat/languages.html"
210 * >Language Codes: ISO 639, Microsoft and Macintosh</a>
212 private static final String macLanguageCodes
214 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
215 = "enfrdeitnlsvesdaptnoiwjaarfielismttrhrzhurhithkoltplhuetlv "
218 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
219 + "fofaruzhnlgdsqrocssksljisrmkbgukbeuzkkazazhykamokytgtkmnmnps"
222 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
223 + "kukssdbonesamrbnasgupaormlkntatesimykmloviintlmsmsamti sosw"
226 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
230 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
231 + " cyeucalaqugnayttugtsjwsuglafbriugdgvgatoelkl"
239 * The primary language IDs used by the Microsoft operating systems.
241 * <p>ISO 639 has revised the code for some languages, namely
242 * <code>he</code> for Hebrew (formerly <code>iw</code>),
243 * <code>yi</code> (formerly <code>ji</code>), and <code>id</code>
244 * for Indonesian (formerly <code>in</code>). In those cases, this
245 * table intentionally contains the older, obsolete code. The
246 * reason is that this is the code which
247 * java.util.Locale.getLanguage() is specified to return. The
248 * implementation of {@link #getMicrosoftLanguageCode} depends on
252 * "http://www.unicode.org/unicode/onlinedat/languages.html"
253 * >Language Codes: ISO 639, Microsoft and Macintosh</a>
255 private static final String microsoftLanguageCodes
257 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
258 = " arbgcazhcsdadeelenesfifriwhuisitjakonlnoplptrmrorushsksqsv"
261 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
262 + "thtrurinukbesletlvlttgfavihyazeu mk ts xhzuafkafohimt "
265 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
266 + "gajimskkkyswtkuzttbnpaguortateknmlasmrsamnbocykmlomygl sd"
269 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
270 + " si iuam ksnefypstl ha yo omtign laso";
274 * Maps a Java Locale into a MacOS language code.
276 * <p>For languages that are written in several script systems,
277 * MacOS defines multiple language codes. Java Locales have a
278 * variant which could be used for that purpose, but a small
279 * test program revealed that with Sun's JDK 1.4.1_01, only two
280 * of 134 available Locales have a variant tag (namely no_NO_NY
283 * <p>The following cases are problematic:
285 * <ul> <li>Azerbaijani (az): The MacOS language code is 49 if
286 * Azerbaijani is written in the Cyrillic script; 50 if written in
287 * the Arabic script; 150 if written in the Roman script. This
288 * method will always return 49 for the Azerbaijani locale.</li>
290 * <li>Mongolian (mn): The MacOS language code is 57 if Mongolian is
291 * written in the Mongolian script; 58 if written in the Cyrillic
292 * script. This method will always return 57 for the Mongolian
295 * <li>Malay (ms): The MacOS language code is 83 if Malay is written
296 * in the Roman script; 84 if written in the Arabic script. This
297 * method will always return 83 for the Malay locale.</li> </ul>
299 * @return a MacOS language code, or -1 if there is no such code for
300 * <code>loc</code>’s language.
302 private static int getMacLanguageCode(Locale loc
)
309 code
= findLanguageCode(loc
.getLanguage(), macLanguageCodes
);
313 /* Traditional Chinese (MacOS language #19) and and Simplified
314 * Chinese (MacOS language #33) both have "zh" as their ISO 639
317 if (loc
.equals(Locale
.SIMPLIFIED_CHINESE
))
321 // Other special cases would be 49, 57 and 83, but we do not
322 // know what do do about them. See the method documentation for
331 * Maps a Java Locale into a Microsoft language code.
333 private static int getMicrosoftLanguageCode(Locale locale
)
341 isoCode
= locale
.getLanguage();
342 code
= findLanguageCode(isoCode
, microsoftLanguageCodes
);
345 if (isoCode
.equals("hr") || isoCode
.equals("sr"))
347 /* Microsoft uses code 26 for "sh" (Serbo-Croatian),
348 * "hr" (Croatian) and "sr" (Serbian). Our table contains
353 else if (isoCode
.equals("gd"))
355 /* Microsoft uses code 60 for "gd" (Scottish Gaelic) and
356 * "ga" (Irish Gaelic). Out table contains "ga".
365 private static int findLanguageCode(String lang
, String langCodes
)
371 if (lang
.length() != 2)
377 index
= langCodes
.indexOf(lang
, index
);
379 /* The index must be even to be considered a match. Otherwise, we
380 * could match with the second letter of one language and the
381 * first of antoher one.
384 while (!((index
< 0) || ((index
& 1) == 0)));
393 private static String
decodeName(int platform
, int encoding
, int language
,
394 ByteBuffer buffer
, int offset
, int len
)
400 charsetName
= getCharsetName(platform
, language
, encoding
);
401 if (charsetName
== null)
404 byteBuf
= new byte[len
];
405 oldPosition
= buffer
.position();
408 buffer
.position(offset
);
412 return new String(byteBuf
, charsetName
);
414 catch (UnsupportedEncodingException uex
)
420 buffer
.position(oldPosition
);
428 * Maps a MacOS language code into a Java Locale.
430 * @param macLanguageCode the MacOS language code for
431 * the language whose Java locale is to be retrieved.
433 * @return an suitable Locale, or <code>null</code> if
434 * the mapping cannot be performed.
436 private static Locale
getMacLocale(int macLanguageCode
)
440 switch (macLanguageCode
)
442 case 0: return Locale
.ENGLISH
;
443 case 1: return Locale
.FRENCH
;
444 case 2: return Locale
.GERMAN
;
445 case 3: return Locale
.ITALIAN
;
446 case 11: return Locale
.JAPANESE
;
447 case 23: return Locale
.KOREAN
;
448 case 19: return Locale
.TRADITIONAL_CHINESE
;
449 case 33: return Locale
.SIMPLIFIED_CHINESE
;
452 if ((macLanguageCode
< 0) || (macLanguageCode
> 150))
455 isoCode
= macLanguageCodes
.substring(macLanguageCode
<< 1,
456 (macLanguageCode
+ 1) << 1);
457 if (isoCode
.charAt(0) == ' ')
460 return new Locale(isoCode
);
466 * Maps a Windows LCID into a Java Locale.
468 * @param lcid the Windows language ID whose Java locale
469 * is to be retrieved.
471 * @return an suitable Locale, or <code>null</code> if
472 * the mapping cannot be performed.
474 private static Locale
getWindowsLocale(int lcid
)
476 /* FIXME: This is grossly incomplete. */
479 case 0x0407: return Locale
.GERMAN
;
480 case 0x0408: return new Locale("el", "GR");
481 case 0x0409: return Locale
.ENGLISH
;
482 case 0x040b: return new Locale("fi");
483 case 0x040c: return Locale
.FRENCH
;
484 case 0x0416: return new Locale("pt");
485 case 0x0807: return new Locale("de", "CH");
486 case 0x0809: return new Locale("en", "UK");
487 case 0x080c: return new Locale("fr", "BE");
488 case 0x0816: return new Locale("pt", "BR");
489 case 0x0c07: return new Locale("de", "AT");
490 case 0x0c09: return new Locale("en", "AU");
491 case 0x0c0c: return new Locale("fr", "CA");
492 case 0x1007: return new Locale("de", "LU");
493 case 0x1009: return new Locale("en", "CA");
494 case 0x100c: return new Locale("fr", "CH");
495 case 0x1407: return new Locale("de", "LI");
496 case 0x1409: return new Locale("en", "NZ");
497 case 0x140c: return new Locale("fr", "LU");
498 case 0x1809: return new Locale("en", "IE");
507 * Maps a Macintosh Script Manager code to the name of the
508 * corresponding Java Charset.
510 * @param macScript a MacOS ScriptCode, for example
511 * 6 for <code>smGreek</code>.
513 * @return a String that can be used to retrieve a Java
514 * CharsetDecorder, for example <code>MacGreek</code>, or
515 * <code>null</code> if <code>macScript</code> has an
518 private static String
getMacCharsetName(int macScript
)
522 case 0: return "MacRoman";
523 case 1: return "MacJapanese";
524 case 2: return "MacKorean";
525 case 3: return "MacTradChinese";
526 case 4: return "MacArabic";
527 case 5: return "MacHebrew";
528 case 6: return "MacGreek";
529 case 7: return "MacCyrillic";
530 case 8: return "MacRSymbol";
531 case 9: return "MacDevanagari";
532 case 10: return "MacGurmukhi";
533 case 11: return "MacGujarati";
534 case 12: return "MacOriya";
535 case 13: return "MacBengali";
536 case 14: return "MacTamil";
537 case 15: return "MacTelugu";
538 case 16: return "MacKannada";
539 case 17: return "MacMalayalam";
540 case 18: return "MacSinhalese";
541 case 19: return "MacBurmese";
542 case 20: return "MacKhmer";
543 case 21: return "MacThai";
544 case 22: return "MacLao";
545 case 23: return "MacGeorgian";
546 case 24: return "MacArmenian";
547 case 25: return "MacSimpChinese";
548 case 26: return "MacTibetan";
549 case 27: return "MacMongolian";
550 case 28: return "MacEthiopic";
551 case 29: return "MacCentralEurope";
552 case 30: return "MacVietnamese";
553 case 31: return "MacExtArabic";
555 default: return null;
561 * Maps a Microsoft locale ID (LCID) to the name of the
562 * corresponding Java Charset.
564 * @param lcid the Microsoft locale ID.
566 * @return a String that can be used to retrieve a Java
567 * CharsetDecorder, for example <code>windows-1252</code>, or
568 * <code>null</code> if <code>lcid</code> has an unsupported value.
570 private static String
getMicrosoftCharsetName(int lcid
)
575 /* Extract the language code from the LCID. */
578 /* In the majority of cases, the language alone determines the
582 codePage
= (" 612D022322225022EC2202201?002A462110777 68 ?2 1 "
583 + " 2 2 2112 ?1 1 2 2 ")
586 /* There are a few exceptions, however, where multiple code pages
587 * are used for the same language. */
592 case 0x041a: // Croatian --> Windows-1250 (Central Europe)
593 case 0x081a: // Serbian (Latin) --> Windows-1250 (Central Europe)
597 case 0x42c: // Azeri (Latin) --> Windows-1254 (Turkish)
598 case 0x443: // Uzbek (Latin) --> Windows-1254 (Turkish)
602 case 0x82c: // Azeri (Cyrillic) --> Windows-1251 (Cyrillic)
603 case 0x843: // Uzbek (Cyrillic) --> Windows-1251 (Cyrillic)
604 case 0xc1a: // Serbian (Cyrillic) --> Windows-1251 (Cyrillic)
612 case '0': return "windows-1250"; // Central Europe
613 case '1': return "windows-1251"; // Cyrillic
614 case '2': return "windows-1252"; // Latin 1
615 case '3': return "windows-1253"; // Greek
616 case '4': return "windows-1254"; // Turkish
617 case '5': return "windows-1255"; // Hebrew
618 case '6': return "windows-1256"; // Arabic
619 case '7': return "windows-1257"; // Baltic
620 case '8': return "windows-1258"; // Vietnam
621 case 'A': return "windows-874"; // Thai
622 case 'B': return "windows-936"; // Simplified Chinese, GBK
623 case 'C': return "windows-949"; // Korean
624 case 'D': return "windows-950"; // Traditional Chinese, Big5
625 case 'E': return "windows-932"; // Japanese Shift-JIS
626 default: return null;
632 * Returns the Locale of an OpenType name.
634 * @param platform the OpenType platform ID.
636 * @param language the language tag of the OpenType name. If
637 * <code>platform</code> is 1, this is the MacOS language code.
639 * @param encoding the encoding tag of the OpenType name. If
640 * <code>platform</code> is 1, this is the MacOS script code.
642 public static Locale
getLocale(int platform
, int language
, int encoding
)
646 case 1: /* Apple Macintosh */
647 return getMacLocale(language
);
649 case 3: /* Microsoft Windows */
650 return getWindowsLocale(language
);
659 * Determines the name of the charset for an OpenType font name.
661 * @param platform the OpenType platform ID.
663 * @param language the language tag of the OpenType name. If
664 * <code>platform</code> is 1, this is the MacOS language code.
666 * @param encoding the encoding tag of the OpenType name. If
667 * <code>platform</code> is 1, this is the MacOS script code.
669 * @return a charset name such as <code>"MacRoman"</code>,
670 * or <code>null</code> if the combination is not known.
672 public static String
getCharsetName(int platform
, int language
, int encoding
)
676 case 1: /* Apple Macintosh */
677 return getMacCharsetName(encoding
);
679 case 3: /* Microsoft Windows */
680 return getMicrosoftCharsetName(language
);