3 * Copyright (C) 1999,2000 A.J. van Os
6 * Translate characters to ISO-8859-1
14 * iTranslateCharacters - Translate characters to ISO-8859-1
16 * Translate Macintosh characters, Window characters and some Unicode
17 * characters into ISO-8859-1 (aka Latin1).
19 * returns the translated character or EOF if the input character should
23 iTranslateCharacters(int iChar
, int iFileOffset
, BOOL bMacFile
)
26 /* Translate special Macintosh characters */
28 case MAC_CAPTAL_A_DIAERESIS
:
29 return OUR_CAPTAL_A_DIAERESIS
;
30 case MAC_CAPTAL_A_RING_ABOVE
:
31 return OUR_CAPTAL_A_RING_ABOVE
;
32 case MAC_CAPTAL_C_CEDILLA
:
33 return OUR_CAPTAL_C_CEDILLA
;
34 case MAC_CAPTAL_O_ACUTE_ACCENT
:
35 return OUR_CAPTAL_O_ACUTE_ACCENT
;
36 case MAC_CAPTAL_N_TILDE
:
37 return OUR_CAPTAL_N_TILDE
;
38 case MAC_CAPTAL_O_DIAERESIS
:
39 return OUR_CAPTAL_O_DIAERESIS
;
40 case MAC_CAPTAL_U_DIAERESIS
:
41 return OUR_CAPTAL_U_DIAERESIS
;
42 case MAC_SMALL_A_ACUTE_ACCENT
:
43 return OUR_SMALL_A_ACUTE_ACCENT
;
44 case MAC_SMALL_A_GRAVE_ACCENT
:
45 return OUR_SMALL_A_GRAVE_ACCENT
;
46 case MAC_SMALL_A_CIRCUMFLEX_ACCENT
:
47 return OUR_SMALL_A_CIRCUMFLEX_ACCENT
;
48 case MAC_SMALL_A_DIAERESIS
:
49 return OUR_SMALL_A_DIAERESIS
;
50 case MAC_SMALL_A_TILDE
:
51 return OUR_SMALL_A_TILDE
;
52 case MAC_SMALL_A_RING_ABOVE
:
53 return OUR_SMALL_A_RING_ABOVE
;
54 case MAC_SMALL_C_CEDILLA
:
55 return OUR_SMALL_C_CEDILLA
;
56 case MAC_SMALL_E_ACUTE_ACCENT
:
57 return OUR_SMALL_E_ACUTE_ACCENT
;
58 case MAC_SMALL_E_GRAVE_ACCENT
:
59 return OUR_SMALL_E_GRAVE_ACCENT
;
60 case MAC_SMALL_E_CIRCUMFLEX_ACCENT
:
61 return OUR_SMALL_E_CIRCUMFLEX_ACCENT
;
62 case MAC_SMALL_E_DIAERESIS
:
63 return OUR_SMALL_E_DIAERESIS
;
64 case MAC_SMALL_I_ACUTE_ACCENT
:
65 return OUR_SMALL_I_ACUTE_ACCENT
;
66 case MAC_SMALL_I_GRAVE_ACCENT
:
67 return OUR_SMALL_I_GRAVE_ACCENT
;
68 case MAC_SMALL_I_CIRCUMFLEX_ACCENT
:
69 return OUR_SMALL_I_CIRCUMFLEX_ACCENT
;
70 case MAC_SMALL_I_DIAERESIS
:
71 return OUR_SMALL_I_DIAERESIS
;
72 case MAC_SMALL_N_TILDE
:
73 return OUR_SMALL_N_TILDE
;
74 case MAC_SMALL_O_ACUTE_ACCENT
:
75 return OUR_SMALL_O_ACUTE_ACCENT
;
76 case MAC_SMALL_O_GRAVE_ACCENT
:
77 return OUR_SMALL_O_GRAVE_ACCENT
;
78 case MAC_SMALL_O_CIRCUMFLEX_ACCENT
:
79 return OUR_SMALL_O_CIRCUMFLEX_ACCENT
;
80 case MAC_SMALL_O_DIAERESIS
:
81 return OUR_SMALL_O_DIAERESIS
;
82 case MAC_SMALL_O_TILDE
:
83 return OUR_SMALL_O_TILDE
;
84 case MAC_SMALL_U_ACUTE_ACCENT
:
85 return OUR_SMALL_U_ACUTE_ACCENT
;
86 case MAC_SMALL_U_GRAVE_ACCENT
:
87 return OUR_SMALL_U_GRAVE_ACCENT
;
88 case MAC_SMALL_U_CIRCUMFLEX_ACCENT
:
89 return OUR_SMALL_U_CIRCUMFLEX_ACCENT
;
90 case MAC_SMALL_U_DIAERESIS
:
91 return OUR_SMALL_U_DIAERESIS
;
92 case MAC_SMALL_SHARP_S
:
93 return OUR_SMALL_SHARP_S
;
94 case MAC_LEFT_DOUBLE_QMARK
:
95 return OUR_LEFT_DOUBLE_QMARK
;
96 case MAC_RIGHT_DOUBLE_QMARK
:
97 return OUR_RIGHT_DOUBLE_QMARK
;
102 case MAC_OPENING_DOUBLE_QUOTE
:
103 return OUR_OPENING_DOUBLE_QUOTE
;
104 case MAC_CLOSING_DOUBLE_QUOTE
:
105 return OUR_CLOSING_DOUBLE_QUOTE
;
106 case MAC_LEFT_SINGLE_QUOTE
:
107 return OUR_LEFT_SINGLE_QUOTE
;
108 case MAC_RIGHT_SINGLE_QUOTE
:
109 return OUR_RIGHT_SINGLE_QUOTE
;
115 /* Translate characters to ISO-8859-1 */
120 case WORD_SOFT_HYPHEN
:
121 case UNICODE_HYPHENATION_POINT
:
124 case TABLE_SEPARATOR
:
131 case FOOTNOTE_OR_ENDNOTE
:
132 NO_DBG_HEX(iFileOffset
);
133 switch (eGetNotetype(iFileOffset
)) {
134 case notetype_is_footnote
:
135 return FOOTNOTE_CHAR
;
136 case notetype_is_endnote
:
139 return UNKNOWN_NOTE_CHAR
;
141 case WORD_UNBREAKABLE_JOIN
:
142 case UNICODE_NON_BREAKING_HYPHEN
:
143 return OUR_UNBREAKABLE_JOIN
;
145 case UNICODE_EURO_SIGN
:
146 return OUR_EURO_SIGN
;
149 case WORD_DUTCH_GUILDER_SIGN
:
150 return OUR_DUTCH_GUILDER_SIGN
;
151 case WORD_LOW_DOUBLE_QUOTE
:
152 case UNICODE_LOW_DOUBLE_QUOTE
:
153 return OUR_LOW_DOUBLE_QUOTE
;
155 case UNICODE_ELLIPSIS
:
160 case WORD_DOUBLE_DAGGER
:
161 case UNICODE_DOUBLE_DAGGER
:
162 return OUR_DOUBLE_DAGGER
;
163 case WORD_NON_SPACING_CIRCUMFLEX_ACCENT
:
164 return OUR_NON_SPACING_CIRCUMFLEX_ACCENT
;
165 case WORD_PER_MILLE_SIGN
:
166 case UNICODE_PER_MILLE_SIGN
:
167 return OUR_PER_MILLE_SIGN
;
168 case WORD_LEFT_SINGLE_QMARK
:
169 case UNICODE_LEFT_SINGLE_QMARK
:
170 return OUR_LEFT_SINGLE_QMARK
;
171 case WORD_CAPITAL_LIGATURE_OE
:
172 return OUR_CAPITAL_LIGATURE_OE
;
173 case WORD_LEFT_SINGLE_QUOTE
:
174 case UNICODE_LEFT_SINGLE_QUOTE
:
175 case UNICODE_LEFT_SINGLE_QUOTE_ALT
:
176 return OUR_LEFT_SINGLE_QUOTE
;
177 case WORD_RIGHT_SINGLE_QUOTE
:
178 case UNICODE_RIGHT_SINGLE_QUOTE
:
179 case UNICODE_RIGHT_SINGLE_QUOTE_ALT
:
180 return OUR_RIGHT_SINGLE_QUOTE
;
181 case WORD_OPENING_DOUBLE_QUOTE
:
182 case UNICODE_OPENING_DOUBLE_QUOTE
:
183 return OUR_OPENING_DOUBLE_QUOTE
;
184 case WORD_CLOSING_DOUBLE_QUOTE
:
185 case UNICODE_CLOSING_DOUBLE_QUOTE
:
186 return OUR_CLOSING_DOUBLE_QUOTE
;
189 case UNICODE_BULLET_ALT
:
192 case UNICODE_EM_DASH
:
193 case UNICODE_HORIZONTAL_BAR
:
196 case UNICODE_EN_DASH
:
197 case UNICODE_FIGURE_DASH
:
199 case WORD_NON_SPACING_TILDE
:
200 return OUR_NON_SPACING_TILDE
;
202 case UNICODE_TRADEMARK
:
203 return OUR_TRADEMARK
;
204 case WORD_RIGHT_SINGLE_QMARK
:
205 case UNICODE_RIGHT_SINGLE_QMARK
:
206 return OUR_RIGHT_SINGLE_QMARK
;
207 case WORD_SMALL_LIGATURE_OE
:
208 return OUR_SMALL_LIGATURE_OE
;
209 case UNICODE_CAPITAL_W_CIRCUMFLEX_ACCENT
:
210 return OUR_CAPITAL_W_CIRCUMFLEX_ACCENT
;
211 case UNICODE_SMALL_W_CIRCUMFLEX_ACCENT
:
212 return OUR_SMALL_W_CIRCUMFLEX_ACCENT
;
213 case UNICODE_CAPITAL_Y_CIRCUMFLEX_ACCENT
:
214 return OUR_CAPITAL_Y_CIRCUMFLEX_ACCENT
;
215 case UNICODE_SMALL_Y_CIRCUMFLEX_ACCENT
:
216 return OUR_SMALL_Y_CIRCUMFLEX_ACCENT
;
219 case UNICODE_DOUBLE_VERTICAL_LINE
:
220 return OUR_DOUBLE_VERTICAL_LINE
;
221 case UNICODE_DOUBLE_LOW_LINE
:
222 return OUR_DOUBLE_LOW_LINE
;
223 case UNICODE_FRACTION_SLASH
:
224 return OUR_FRACTION_SLASH
;
225 case UNICODE_WHITE_SMILING_FACE
:
226 return OUR_WHITE_SMILING_FACE
;
227 case UNICODE_BLACK_SMILING_FACE
:
228 return OUR_BLACK_SMILING_FACE
;
229 case UNICODE_DIAMOND
:
231 case UNICODE_COPYRIGHT
:
232 return OUR_COPYRIGHT
;
234 DBG_HEX_C(iChar
>= 0x80 && iChar
<= 0x9f, iFileOffset
);
235 DBG_HEX_C(iChar
>= 0x80 && iChar
<= 0x9f, iChar
);
236 DBG_HEX_C(iChar
< 0x20 || iChar
> 0xff, iFileOffset
);
237 DBG_HEX_C(iChar
< 0x20 || iChar
> 0xff, iChar
);
239 /* A control character slipped through */
243 /* Untranslated Unicode character */
248 } /* end of iTranslateCharacters */
251 * iToUpper - convert letter to upper case
253 * This function converts a letter to upper case. Unlike toupper(3) this
254 * function is independent from the settings of locale. This comes in handy
255 * for people who have to read Word documents in more than one language or
256 * contain more than one language.
258 * returns the converted letter, or iChar if the conversion was not possible.
260 int iToUpper(int iChar
)
262 if ((iChar
& ~0x7f) == 0) {
263 /* US ASCII: use standard function */
264 return toupper(iChar
);
266 if (iChar
>= 0xe0 && iChar
<= 0xfe && iChar
!= 0xf7) {
268 * Lower case accented characters
269 * 0xf7 is Division sign; 0xd7 is Multiplication sign
270 * 0xff is y with diaeresis; 0xdf is Sharp s
272 return iChar
& ~0x20;
275 } /* end of iToUpper */