3 * Copyright (C) 1999-2004 A.J. van Os; Released under GNU GPL
6 * Translate Word characters to local representation
12 #if defined(__STDC_ISO_10646__)
14 #endif /* __STDC_ISO_10646__ */
17 static const USHORT usCp850
[] = { /* DOS implementation of Latin1 */
18 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7,
19 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
20 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
21 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192,
22 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
23 0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
24 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0,
25 0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510,
26 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
27 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4,
28 0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce,
29 0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
30 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe,
31 0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4,
32 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
33 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0,
36 static const USHORT usCp1250
[] = { /* Windows implementation of Latin2 */
37 0x20ac, 0x003f, 0x201a, 0x003f, 0x201e, 0x2026, 0x2020, 0x2021,
38 0x003f, 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179,
39 0x003f, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
40 0x003f, 0x2122, 0x0161, 0x203a, 0x015b, 0x0165, 0x017e, 0x017a,
41 0x00a0, 0x02c7, 0x02d8, 0x0141, 0x00a4, 0x0104, 0x00a6, 0x00a7,
42 0x00a8, 0x00a9, 0x015e, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x017b,
43 0x00b0, 0x00b1, 0x02db, 0x0142, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
44 0x00b8, 0x0105, 0x015f, 0x00bb, 0x013d, 0x02dd, 0x013e, 0x017c,
45 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
46 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
47 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
48 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
49 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
50 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
51 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
52 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
55 static const USHORT usCp1251
[] = { /* Windows implementation of Cyrillic */
56 0x0402, 0x0403, 0x201a, 0x0453, 0x201e, 0x2026, 0x2020, 0x2021,
57 0x20ac, 0x2030, 0x0409, 0x2039, 0x040a, 0x040c, 0x040b, 0x040f,
58 0x0452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
59 0x00f3, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f,
60 0x00a0, 0x040e, 0x045e, 0x0408, 0x00a4, 0x0490, 0x00a6, 0x00a7,
61 0x0401, 0x00a9, 0x0404, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0407,
62 0x00b0, 0x00b1, 0x0406, 0x0456, 0x0491, 0x00b5, 0x00b6, 0x00b7,
63 0x0451, 0x2116, 0x0454, 0x00bb, 0x0458, 0x0405, 0x0455, 0x0457,
64 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
65 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
66 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
67 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
68 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
69 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
70 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
71 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
74 static const USHORT usCp1252
[] = { /* Windows implementation of Latin1 */
75 0x20ac, 0x003f, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
76 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x003f, 0x017d, 0x003f,
77 0x003f, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
78 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x003f, 0x017e, 0x0178,
79 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
80 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
81 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
82 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
83 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
84 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
85 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
86 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
87 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
88 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
89 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
90 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
93 static const USHORT usMacRoman
[] = { /* Apple implementation of Latin1 */
94 0x00c4, 0x00c5, 0x00c7, 0x00c9, 0x00d1, 0x00d6, 0x00dc, 0x00e1,
95 0x00e0, 0x00e2, 0x00e4, 0x00e3, 0x00e5, 0x00e7, 0x00e9, 0x00e8,
96 0x00ea, 0x00eb, 0x00ed, 0x00ec, 0x00ee, 0x00ef, 0x00f1, 0x00f3,
97 0x00f2, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x00f9, 0x00fb, 0x00fc,
98 0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df,
99 0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8,
100 0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211,
101 0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x2126, 0x00e6, 0x00f8,
102 0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab,
103 0x00bb, 0x2026, 0x00a0, 0x00c0, 0x00c3, 0x00d5, 0x0152, 0x0153,
104 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca,
105 0x00ff, 0x0178, 0x2044, 0x00a4, 0x2039, 0x203a, 0xfb01, 0xfb02,
106 0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x00c2, 0x00ca, 0x00c1,
107 0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf, 0x00cc, 0x00d3, 0x00d4,
108 0x003f, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x02c6, 0x02dc,
109 0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7,
112 static const USHORT usPrivateArea
[] = {
113 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220d,
114 0x0028, 0x0029, 0x2217, 0x002b, 0x002c, 0x2212, 0x002e, 0x002f,
115 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
116 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x2019, 0x003e, 0x003f,
117 0x201d, 0x201c, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
118 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
119 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
120 0x039e, 0x03a8, 0x0396, 0x005b, 0x2234, 0x005d, 0x22a5, 0x005f,
121 0x003f, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,
122 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
123 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
124 0x03be, 0x03c8, 0x03b6, 0x007b, 0x007c, 0x007d, 0x223c, 0x003f,
125 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
126 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
127 0x003f, 0x003f, 0x003f, 0x2022, 0x003f, 0x003f, 0x003f, 0x003f,
128 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
129 0x20ac, 0x03d2, 0x2032, 0x2264, 0x2044, 0x221e, 0x0192, 0x2663,
130 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193,
131 0x00b0, 0x00b1, 0x2033, 0x2265, 0x00d7, 0x221d, 0x2202, 0x2022,
132 0x00f7, 0x2260, 0x2261, 0x2248, 0x2026, 0x007c, 0x23af, 0x21b5,
133 0x2135, 0x2111, 0x211c, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229,
134 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
135 0x2220, 0x2207, 0x00ae, 0x00a9, 0x2122, 0x220f, 0x221a, 0x22c5,
136 0x00ac, 0x2227, 0x2228, 0x21d4, 0x21d0, 0x21d1, 0x21d2, 0x21d3,
137 0x22c4, 0x3008, 0x00ae, 0x00a9, 0x2122, 0x2211, 0x239b, 0x239c,
138 0x239d, 0x23a1, 0x23a2, 0x23a3, 0x23a7, 0x23a8, 0x23a9, 0x23aa,
139 0x003f, 0x3009, 0x222b, 0x2320, 0x23ae, 0x2321, 0x239e, 0x239f,
140 0x23a0, 0x23a4, 0x23a5, 0x23a6, 0x23ab, 0x23ac, 0x23ad, 0x003f,
143 typedef struct char_table_tag
{
148 static char_table_type atCharTable
[256];
149 static size_t tNextPosFree
= 0;
153 * iCompare - compare two records
155 * Compares two records. For use by qsort(3C) and bsearch(3C).
157 * returns -1 if rec1 < rec2, 0 if rec1 == rec2, 1 if rec1 > rec2
160 iCompare(const void *pvRecord1
, const void *pvRecord2
)
162 USHORT usUnicode1
, usUnicode2
;
164 usUnicode1
= ((char_table_type
*)pvRecord1
)->usUnicode
;
165 usUnicode2
= ((char_table_type
*)pvRecord2
)->usUnicode
;
167 if (usUnicode1
< usUnicode2
) {
170 if (usUnicode1
> usUnicode2
) {
174 } /* end of iCompare */
177 * pGetCharTableRecord - get the character table record
179 * returns a pointer to the record when found, otherwise NULL
181 static const char_table_type
*
182 pGetCharTableRecord(USHORT usUnicode
)
184 char_table_type tKey
;
186 if (tNextPosFree
== 0) {
189 tKey
.usUnicode
= usUnicode
;
191 return (char_table_type
*)bsearch(&tKey
,
193 tNextPosFree
, sizeof(atCharTable
[0]),
195 } /* end of pGetCharTableRecord */
198 * ucGetBulletCharacter - get the local representation of the bullet
201 ucGetBulletCharacter(conversion_type eConversionType
, encoding_type eEncoding
)
203 #if defined(__riscos)
206 const char_table_type
*pRec
;
208 fail(eEncoding
== encoding_utf_8
);
210 if (eEncoding
== encoding_latin_1
&&
211 (eConversionType
== conversion_ps
||
212 eConversionType
== conversion_pdf
)) {
213 /* Ugly, but it makes the PostScript and PDF look better */
216 if (eConversionType
!= conversion_text
&&
217 eConversionType
!= conversion_fmt_text
) {
218 pRec
= pGetCharTableRecord(UNICODE_BULLET
);
220 return pRec
->ucLocal
;
222 pRec
= pGetCharTableRecord(UNICODE_BULLET_OPERATOR
);
224 return pRec
->ucLocal
;
226 pRec
= pGetCharTableRecord(UNICODE_MIDDLE_DOT
);
228 return pRec
->ucLocal
;
232 #endif /* __riscos */
233 } /* end of ucGetBulletCharacter */
236 * ucGetNbspCharacter - get the local representation of the non-breaking space
239 ucGetNbspCharacter(void)
241 const char_table_type
*pRec
;
243 pRec
= pGetCharTableRecord(0x00a0); /* Unicode non-breaking space */
245 DBG_MSG("Non-breaking space record not found");
246 /* No value found, use the best guess */
249 return pRec
->ucLocal
;
250 } /* end of ucGetNbspCharacter */
253 * bReadCharacterMappingTable - read the mapping table
255 * Read the character mapping table from file and have the contents sorted
257 * returns TRUE if successful, otherwise FALSE
260 bReadCharacterMappingTable(FILE *pFile
)
272 /* Clean the table first */
273 (void)memset(atCharTable
, 0, sizeof(atCharTable
));
276 while (fgets(szLine
, (int)sizeof(szLine
), pFile
)) {
277 if (szLine
[0] == '#' ||
280 /* Comment or empty line */
283 iFields
= sscanf(szLine
, "%x %lx %*s", &uiLocal
, &ulUnicode
);
285 pcTmp
= strchr(szLine
, '\r');
289 pcTmp
= strchr(szLine
, '\n');
293 werr(0, "Syntax error in: '%s'", szLine
);
296 if (uiLocal
> 0xff || ulUnicode
> 0xffff) {
297 werr(0, "Syntax error in: '%02x %04lx'",
301 /* Store only the relevant entries */
302 if (uiLocal
!= ulUnicode
|| uiLocal
>= 0x80) {
303 atCharTable
[tNextPosFree
].ucLocal
= (UCHAR
)uiLocal
;
304 atCharTable
[tNextPosFree
].usUnicode
= (USHORT
)ulUnicode
;
307 if (tNextPosFree
>= elementsof(atCharTable
)) {
308 werr(0, "Too many entries in the character mapping "
309 "file. Ignoring the rest.");
314 if (tNextPosFree
!= 0) {
315 DBG_HEX(atCharTable
[0].usUnicode
);
316 DBG_HEX(atCharTable
[tNextPosFree
- 1].usUnicode
);
319 tNextPosFree
, sizeof(atCharTable
[0]),
322 DBG_HEX(atCharTable
[0].usUnicode
);
323 DBG_HEX(atCharTable
[tNextPosFree
- 1].usUnicode
);
327 } /* end of bReadCharacterMappingTable */
330 * ulTranslateCharacters - Translate characters to local representation
332 * Translate all characters to local representation
334 * returns the translated character
337 ulTranslateCharacters(USHORT usChar
, ULONG ulFileOffset
, int iWordVersion
,
338 conversion_type eConversionType
, encoding_type eEncoding
,
341 const char_table_type
*pTmp
;
342 const USHORT
*usCharSet
;
345 if (bUseMacCharSet
) {
346 /* Macintosh character set */
347 usCharSet
= usMacRoman
;
348 } else if (iWordVersion
== 0) {
349 /* DOS character set */
352 /* Windows character set */
354 case encoding_latin_2
:
355 usCharSet
= usCp1250
;
357 case encoding_cyrillic
:
358 usCharSet
= usCp1251
;
360 case encoding_latin_1
:
362 usCharSet
= usCp1252
;
366 fail(usCharSet
== NULL
);
367 if (usChar
>= 0x80 && usChar
<= 0x9f) {
368 /* Translate implementation defined characters */
369 usChar
= usCharSet
[usChar
- 0x80];
370 } else if (iWordVersion
< 8 && usChar
>= 0xa0 && usChar
<= 0xff) {
371 /* Translate old character set to Unixcode */
372 usChar
= usCharSet
[usChar
- 0x80];
375 /* Microsoft Unicode to real Unicode */
376 if (usChar
>= 0xf020 && usChar
<= 0xf0ff) {
377 DBG_HEX_C(usPrivateArea
[usChar
- 0xf020] == 0x003f, usChar
);
378 usChar
= usPrivateArea
[usChar
- 0xf020];
381 /* Characters with a special meaning in Word */
383 case IGNORE_CHARACTER
:
384 case FOOTNOTE_SEPARATOR
:
385 case FOOTNOTE_CONTINUATION
:
389 case WORD_SOFT_HYPHEN
:
390 case UNICODE_HYPHENATION_POINT
:
391 return IGNORE_CHARACTER
;
393 case TABLE_SEPARATOR
:
399 return (ULONG
)usChar
;
400 case FOOTNOTE_OR_ENDNOTE
:
401 NO_DBG_HEX(ulFileOffset
);
402 switch (eGetNotetype(ulFileOffset
)) {
403 case notetype_is_footnote
:
404 return FOOTNOTE_CHAR
;
405 case notetype_is_endnote
:
408 return UNKNOWN_NOTE_CHAR
;
410 case WORD_UNBREAKABLE_JOIN
:
411 return (ULONG
)OUR_UNBREAKABLE_JOIN
;
416 if (eEncoding
!= encoding_utf_8
) {
417 /* Latin characters in an oriental text */
418 if (usChar
>= 0xff01 && usChar
<= 0xff5e) {
423 if (eEncoding
== encoding_latin_1
&&
424 (eConversionType
== conversion_ps
||
425 eConversionType
== conversion_pdf
)) {
426 /* Ugly, but it makes the PostScript and PDF look better */
428 case UNICODE_ELLIPSIS
:
430 case UNICODE_TRADEMARK_SIGN
:
432 case UNICODE_PER_MILLE_SIGN
:
435 case UNICODE_BULLET_OPERATOR
:
436 case UNICODE_BLACK_CLUB_SUIT
:
438 case UNICODE_LEFT_SINGLE_QMARK
:
440 case UNICODE_RIGHT_SINGLE_QMARK
:
442 case UNICODE_SINGLE_LEFT_ANGLE_QMARK
:
444 case UNICODE_SINGLE_RIGHT_ANGLE_QMARK
:
446 case UNICODE_LEFT_DOUBLE_QMARK
:
448 case UNICODE_RIGHT_DOUBLE_QMARK
:
450 case UNICODE_DOUBLE_LOW_9_QMARK
:
452 case UNICODE_EN_DASH
:
454 case UNICODE_EM_DASH
:
456 case UNICODE_MINUS_SIGN
:
458 case UNICODE_CAPITAL_LIGATURE_OE
:
460 case UNICODE_SMALL_LIGATURE_OE
:
464 case UNICODE_DOUBLE_DAGGER
:
466 case UNICODE_SMALL_LIGATURE_FI
:
468 case UNICODE_SMALL_LIGATURE_FL
:
475 if (eConversionType
== conversion_pdf
) {
476 if (eEncoding
== encoding_latin_1
) {
478 case UNICODE_EURO_SIGN
:
483 } else if (eEncoding
== encoding_latin_2
) {
485 case UNICODE_CAPITAL_D_WITH_STROKE
:
486 case UNICODE_SMALL_D_WITH_STROKE
:
496 if (usChar
< 0x20 || usChar
== 0x7f) {
497 /* Ignore control characters */
500 return IGNORE_CHARACTER
;
502 return (ULONG
)usChar
;
505 if (eEncoding
== encoding_utf_8
) {
506 /* No need to convert Unicode characters */
507 return (ULONG
)usChar
;
510 /* Unicode to local representation */
511 pTmp
= pGetCharTableRecord(usChar
);
513 DBG_HEX_C(usChar
>= 0x7f && usChar
<= 0x9f, usChar
);
514 return (ULONG
)pTmp
->ucLocal
;
517 /* Fancy characters to simple US ASCII */
519 case UNICODE_SMALL_F_HOOK
:
521 case UNICODE_GREEK_CAPITAL_CHI
:
523 case UNICODE_GREEK_SMALL_UPSILON
:
525 case UNICODE_MODIFIER_CIRCUMFLEX
:
526 case UNICODE_UPWARDS_ARROW
:
528 case UNICODE_SMALL_TILDE
:
529 case UNICODE_TILDE_OPERATOR
:
531 case UNICODE_EN_QUAD
:
532 case UNICODE_EM_QUAD
:
533 case UNICODE_EN_SPACE
:
534 case UNICODE_EM_SPACE
:
535 case UNICODE_THREE_PER_EM_SPACE
:
536 case UNICODE_FOUR_PER_EM_SPACE
:
537 case UNICODE_SIX_PER_EM_SPACE
:
538 case UNICODE_FIGURE_SPACE
:
539 case UNICODE_PUNCTUATION_SPACE
:
540 case UNICODE_THIN_SPACE
:
541 case UNICODE_NARROW_NO_BREAK_SPACE
:
542 case UNICODE_LIGHT_SHADE
:
543 case UNICODE_MEDIUM_SHADE
:
544 case UNICODE_DARK_SHADE
:
546 case UNICODE_LEFT_DOUBLE_QMARK
:
547 case UNICODE_RIGHT_DOUBLE_QMARK
:
548 case UNICODE_DOUBLE_LOW_9_QMARK
:
549 case UNICODE_DOUBLE_HIGH_REV_9_QMARK
:
550 case UNICODE_DOUBLE_PRIME
:
552 case UNICODE_LEFT_SINGLE_QMARK
:
553 case UNICODE_RIGHT_SINGLE_QMARK
:
554 case UNICODE_SINGLE_LOW_9_QMARK
:
555 case UNICODE_SINGLE_HIGH_REV_9_QMARK
:
559 case UNICODE_NON_BREAKING_HYPHEN
:
560 case UNICODE_FIGURE_DASH
:
561 case UNICODE_EN_DASH
:
562 case UNICODE_EM_DASH
:
563 case UNICODE_HORIZONTAL_BAR
:
564 case UNICODE_MINUS_SIGN
:
565 case UNICODE_BD_LIGHT_HORIZONTAL
:
566 case UNICODE_BD_DOUBLE_HORIZONTAL
:
568 case UNICODE_DOUBLE_VERTICAL_LINE
:
569 case UNICODE_BD_LIGHT_VERTICAL
:
570 case UNICODE_BD_DOUBLE_VERTICAL
:
572 case UNICODE_DOUBLE_LOW_LINE
:
576 case UNICODE_DOUBLE_DAGGER
:
579 case UNICODE_BULLET_OPERATOR
:
580 case UNICODE_BLACK_CLUB_SUIT
:
581 return (ULONG
)ucGetBulletCharacter(eConversionType
, eEncoding
);
582 case UNICODE_ONE_DOT_LEADER
:
583 case UNICODE_TWO_DOT_LEADER
:
585 case UNICODE_ELLIPSIS
:
586 #if defined(__riscos)
587 return (ULONG
)OUR_ELLIPSIS
;
589 if (ulFileOffset
== 0) {
590 return (ULONG
)OUR_ELLIPSIS
;
592 return UNICODE_ELLIPSIS
;
593 #endif /* __riscos */
594 case UNICODE_DOUBLE_LEFT_ANGLE_QMARK
:
595 case UNICODE_TRIANGULAR_BULLET
:
596 case UNICODE_SINGLE_LEFT_ANGLE_QMARK
:
597 case UNICODE_LEFTWARDS_ARROW
:
599 case UNICODE_DOUBLE_RIGHT_ANGLE_QMARK
:
600 case UNICODE_SINGLE_RIGHT_ANGLE_QMARK
:
601 case UNICODE_RIGHTWARDS_ARROW
:
603 case UNICODE_UNDERTIE
:
605 case UNICODE_N_ARY_SUMMATION
:
607 case UNICODE_EURO_SIGN
:
612 case UNICODE_DIAMOND
:
613 return (ULONG
)OUR_DIAMOND
;
614 case UNICODE_NUMERO_SIGN
:
616 case UNICODE_KELVIN_SIGN
:
618 case UNICODE_DOWNWARDS_ARROW
:
620 case UNICODE_FRACTION_SLASH
:
621 case UNICODE_DIVISION_SLASH
:
623 case UNICODE_ASTERISK_OPERATOR
:
627 case UNICODE_BD_LIGHT_DOWN_RIGHT
:
628 case UNICODE_BD_LIGHT_DOWN_AND_LEFT
:
629 case UNICODE_BD_LIGHT_UP_AND_RIGHT
:
630 case UNICODE_BD_LIGHT_UP_AND_LEFT
:
631 case UNICODE_BD_LIGHT_VERTICAL_AND_RIGHT
:
632 case UNICODE_BD_LIGHT_VERTICAL_AND_LEFT
:
633 case UNICODE_BD_LIGHT_DOWN_AND_HORIZONTAL
:
634 case UNICODE_BD_LIGHT_UP_AND_HORIZONTAL
:
635 case UNICODE_BD_LIGHT_VERTICAL_AND_HORIZONTAL
:
636 case UNICODE_BD_DOUBLE_DOWN_AND_RIGHT
:
637 case UNICODE_BD_DOUBLE_DOWN_AND_LEFT
:
638 case UNICODE_BD_DOUBLE_UP_AND_RIGHT
:
639 case UNICODE_BD_DOUBLE_UP_AND_LEFT
:
640 case UNICODE_BD_DOUBLE_VERTICAL_AND_RIGHT
:
641 case UNICODE_BD_DOUBLE_VERTICAL_AND_LEFT
:
642 case UNICODE_BD_DOUBLE_DOWN_AND_HORIZONTAL
:
643 case UNICODE_BD_DOUBLE_UP_AND_HORIZONTAL
:
644 case UNICODE_BD_DOUBLE_VERTICAL_AND_HORIZONTAL
:
645 case UNICODE_BLACK_SQUARE
:
647 case UNICODE_HAIR_SPACE
:
648 case UNICODE_ZERO_WIDTH_SPACE
:
649 case UNICODE_ZERO_WIDTH_NON_JOINER
:
650 case UNICODE_ZERO_WIDTH_JOINER
:
651 case UNICODE_LEFT_TO_RIGHT_MARK
:
652 case UNICODE_RIGHT_TO_LEFT_MARK
:
653 case UNICODE_LEFT_TO_RIGHT_EMBEDDING
:
654 case UNICODE_RIGHT_TO_LEFT_EMBEDDING
:
655 case UNICODE_POP_DIRECTIONAL_FORMATTING
:
656 case UNICODE_LEFT_TO_RIGHT_OVERRIDE
:
657 case UNICODE_RIGHT_TO_LEFT_OVERRIDE
:
658 case UNICODE_ZERO_WIDTH_NO_BREAK_SPACE
:
659 return IGNORE_CHARACTER
;
664 if (usChar
== UNICODE_TRADEMARK_SIGN
) {
666 * No local representation, it doesn't look like anything in
667 * US-ASCII and a question mark does more harm than good.
669 return IGNORE_CHARACTER
;
672 if (usChar
>= 0xa0 && usChar
<= 0xff) {
673 /* Before Word 97, Word did't use Unicode */
674 return (ULONG
)usChar
;
677 DBG_HEX_C(usChar
< 0x3000 || usChar
>= 0xd800, ulFileOffset
);
678 DBG_HEX_C(usChar
< 0x3000 || usChar
>= 0xd800, usChar
);
679 DBG_MSG_C(usChar
>= 0xe000 && usChar
< 0xf900, "Private Use Area");
681 /* Untranslated Unicode character */
683 } /* end of ulTranslateCharacters */
686 * ulToUpper - convert letter to upper case
688 * This function converts a letter to upper case. Unlike toupper(3) this
689 * function is independent from the settings of locale. This comes in handy
690 * for people who have to read Word documents in more than one language or
691 * contain more than one language.
693 * returns the converted letter, or ulChar if the conversion was not possible.
696 ulToUpper(ULONG ulChar
)
699 /* US ASCII: use standard function */
700 return (ULONG
)toupper((int)ulChar
);
702 if (ulChar
>= 0xe0 && ulChar
<= 0xfe && ulChar
!= 0xf7) {
704 * Lower case accented characters
705 * 0xf7 is Division sign; 0xd7 is Multiplication sign
706 * 0xff is y with diaeresis; 0xdf is Sharp s
708 return ulChar
& ~0x20;
710 #if defined(__STDC_ISO_10646__)
712 * If this is ISO C99 and all locales have wchar_t = ISO 10646
713 * (e.g., glibc 2.2 or newer), then use standard function
716 return (ULONG
)towupper((wint_t)ulChar
);
718 #endif /* __STDC_ISO_10646__ */
720 } /* end of ulToUpper */