Imported from antiword-0.37.tar.gz.
[antiword.git] / wordlib.c
blobd8471d62c3e50c253216a3567b667a47c6a66f51
1 /*
2 * wordlib.c
3 * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL
5 * Description:
6 * Deal with the internals of a MS Word file
7 */
9 #include "antiword.h"
11 static BOOL bOldMacFile = FALSE;
15 * Common part of the file checking functions
17 static BOOL
18 bCheckBytes(FILE *pFile, const UCHAR *aucBytes, size_t tBytes)
20 int iIndex, iChar;
22 fail(pFile == NULL || aucBytes == NULL || tBytes == 0);
24 rewind(pFile);
26 for (iIndex = 0; iIndex < (int)tBytes; iIndex++) {
27 iChar = getc(pFile);
28 if (iChar == EOF || iChar != (int)aucBytes[iIndex]) {
29 NO_DBG_HEX(iChar);
30 NO_DBG_HEX(aucBytes[iIndex]);
31 return FALSE;
34 return TRUE;
35 } /* end of bCheckBytes */
38 * This function checks whether the given file is or is not a "Word for DOS"
39 * document
41 BOOL
42 bIsWordForDosFile(FILE *pFile, long lFilesize)
44 static UCHAR aucBytes[] =
45 { 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab }; /* Word for DOS */
47 DBG_MSG("bIsWordForDosFile");
49 if (pFile == NULL || lFilesize < 0) {
50 DBG_MSG("No proper file given");
51 return FALSE;
53 if (lFilesize < 128) {
54 DBG_MSG("File too small to be a Word document");
55 return FALSE;
57 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
58 } /* end of bIsWordForDosFile */
61 * This function checks whether the given file is or is not a file with an
62 * OLE envelope (That is a document made by Word 6 or later)
64 static BOOL
65 bIsWordFileWithOLE(FILE *pFile, long lFilesize)
67 static UCHAR aucBytes[] =
68 { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
69 int iTailLen;
71 if (pFile == NULL || lFilesize < 0) {
72 DBG_MSG("No proper file given");
73 return FALSE;
75 if (lFilesize < (long)BIG_BLOCK_SIZE * 3) {
76 DBG_MSG("This file is too small to be a Word document");
77 return FALSE;
80 iTailLen = (int)(lFilesize % BIG_BLOCK_SIZE);
81 switch (iTailLen) {
82 case 0: /* No tail, as it should be */
83 break;
84 case 1:
85 case 2: /* Filesize mismatch or a buggy email program */
86 if ((int)(lFilesize % 3) == iTailLen) {
87 DBG_DEC(lFilesize);
88 return FALSE;
91 * Ignore extra bytes caused by buggy email programs.
92 * They have bugs in their base64 encoding or decoding.
93 * 3 bytes -> 4 ascii chars -> 3 bytes
95 DBG_MSG("Document with extra bytes");
96 break;
97 default: /* Wrong filesize for a Word document */
98 DBG_DEC(lFilesize);
99 DBG_DEC(iTailLen);
100 return FALSE;
102 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
103 } /* end of bIsWordFileWithOLE */
106 * This function checks whether the given file is or is not a RTF document
108 BOOL
109 bIsRtfFile(FILE *pFile)
111 static UCHAR aucBytes[] =
112 { '{', '\\', 'r', 't', 'f', '1' };
114 DBG_MSG("bIsRtfFile");
116 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
117 } /* end of bIsRtfFile */
120 * This function checks whether the given file is or is not a WP document
122 BOOL
123 bIsWordPerfectFile(FILE *pFile)
125 static UCHAR aucBytes[] =
126 { 0xff, 'W', 'P', 'C' };
128 DBG_MSG("bIsWordPerfectFile");
130 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
131 } /* end of bIsWordPerfectFile */
134 * This function checks whether the given file is or is not a "Win Word 1 or 2"
135 * document
137 BOOL
138 bIsWinWord12File(FILE *pFile, long lFilesize)
140 static UCHAR aucBytes[2][4] = {
141 { 0x9b, 0xa5, 0x21, 0x00 }, /* Win Word 1.x */
142 { 0xdb, 0xa5, 0x2d, 0x00 }, /* Win Word 2.0 */
144 int iIndex;
146 DBG_MSG("bIsWinWord12File");
148 if (pFile == NULL || lFilesize < 0) {
149 DBG_MSG("No proper file given");
150 return FALSE;
152 if (lFilesize < 384) {
153 DBG_MSG("This file is too small to be a Word document");
154 return FALSE;
157 for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
158 if (bCheckBytes(pFile,
159 aucBytes[iIndex],
160 elementsof(aucBytes[iIndex]))) {
161 return TRUE;
164 return FALSE;
165 } /* end of bIsWinWord12File */
168 * This function checks whether the given file is or is not a "Mac Word 4 or 5"
169 * document
171 BOOL
172 bIsMacWord45File(FILE *pFile)
174 static UCHAR aucBytes[2][6] = {
175 { 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00 }, /* Mac Word 4 */
176 { 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00 }, /* Mac Word 5 */
178 int iIndex;
180 DBG_MSG("bIsMacWord45File");
182 for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
183 if (bCheckBytes(pFile,
184 aucBytes[iIndex],
185 elementsof(aucBytes[iIndex]))) {
186 return TRUE;
189 return FALSE;
190 } /* end of bIsMacWord45File */
193 * iGuessVersionNumber - guess the Word version number from first few bytes
195 * Returns the guessed version number or -1 when no guess it possible
198 iGuessVersionNumber(FILE *pFile, long lFilesize)
200 if(bIsWordForDosFile(pFile, lFilesize)) {
201 return 0;
203 if (bIsWinWord12File(pFile, lFilesize)) {
204 return 2;
206 if (bIsMacWord45File(pFile)) {
207 return 5;
209 if (bIsWordFileWithOLE(pFile, lFilesize)) {
210 return 6;
212 return -1;
213 } /* end of iGuessVersionNumber */
216 * iGetVersionNumber - get the Word version number from the header
218 * Returns the version number or -1 when unknown
221 iGetVersionNumber(const UCHAR *aucHeader)
223 USHORT usFib, usChse;
225 usFib = usGetWord(0x02, aucHeader);
226 if (usFib >= 0x1000) {
227 /* To big: must be MacWord using Big Endian */
228 DBG_HEX(usFib);
229 usFib = usGetWordBE(0x02, aucHeader);
231 DBG_DEC(usFib);
232 bOldMacFile = FALSE;
233 switch (usFib) {
234 case 0:
235 DBG_MSG("Word for DOS");
236 return 0;
237 case 28:
238 DBG_MSG("Word 4 for Macintosh");
239 bOldMacFile = TRUE;
240 return 4;
241 case 33:
242 DBG_MSG("Word 1.x for Windows");
243 return 1;
244 case 35:
245 DBG_MSG("Word 5 for Macintosh");
246 bOldMacFile = TRUE;
247 return 5;
248 case 45:
249 DBG_MSG("Word 2 for Windows");
250 return 2;
251 case 101:
252 case 102:
253 DBG_MSG("Word 6 for Windows");
254 return 6;
255 case 103:
256 case 104:
257 usChse = usGetWord(0x14, aucHeader);
258 DBG_DEC(usChse);
259 switch (usChse) {
260 case 0:
261 DBG_MSG("Word 7 for Win95");
262 return 7;
263 case 256:
264 DBG_MSG("Word 6 for Macintosh");
265 bOldMacFile = TRUE;
266 return 6;
267 default:
268 DBG_FIXME();
269 if ((int)ucGetByte(0x05, aucHeader) == 0xe0) {
270 DBG_MSG("Word 7 for Win95");
271 return 7;
273 DBG_MSG("Word 6 for Macintosh");
274 bOldMacFile = TRUE;
275 return 6;
277 default:
278 usChse = usGetWord(0x14, aucHeader);
279 DBG_DEC(usChse);
280 if (usFib < 192) {
281 /* Unknown or unsupported version of Word */
282 DBG_DEC(usFib);
283 return -1;
285 DBG_MSG_C(usChse != 256, "Word97 for Win95/98/NT");
286 DBG_MSG_C(usChse == 256, "Word98 for Macintosh");
287 return 8;
289 } /* end of iGetVersionNumber */
292 * TRUE if the current file was made by Word version 6 or older on an
293 * Apple Macintosh, otherwise FALSE.
294 * This function hides the methode of how to find out from the rest of the
295 * program.
297 BOOL
298 bIsOldMacFile(void)
300 return bOldMacFile;
301 } /* end of bIsOldMacFile */
304 * iInitDocument - initialize a document
306 * Returns the version of Word that made the document or -1
309 iInitDocument(FILE *pFile, long lFilesize)
311 int iGuess, iWordVersion;
313 iGuess = iGuessVersionNumber(pFile, lFilesize);
314 switch (iGuess) {
315 case 0:
316 iWordVersion = iInitDocumentDOS(pFile, lFilesize);
317 break;
318 case 2:
319 iWordVersion = iInitDocumentWIN(pFile, lFilesize);
320 break;
321 case 5:
322 iWordVersion = iInitDocumentMAC(pFile, lFilesize);
323 break;
324 case 6:
325 iWordVersion = iInitDocumentOLE(pFile, lFilesize);
326 break;
327 default:
328 DBG_DEC(iGuess);
329 iWordVersion = -1;
330 break;
332 return iWordVersion;
333 } /* end of iInitDocument */
336 * vFreeDocument - free a document by free-ing its parts
338 void
339 vFreeDocument(void)
341 DBG_MSG("vFreeDocument");
343 /* Free the memory */
344 vDestroyTextBlockList();
345 vDestroyDataBlockList();
346 vDestroyListInfoList();
347 vDestroyRowInfoList();
348 vDestroyStyleInfoList();
349 vDestroyFontInfoList();
350 vDestroyStylesheetList();
351 vDestroyPictInfoList();
352 vDestroyDocumentInfoList();
353 vDestroySectionInfoList();
354 vDestroyHdrFtrInfoList();
355 vDestroyPropModList();
356 vDestroyNotesInfoLists();
357 vDestroyFontTable();
358 vDestroySummaryInfo();
359 } /* end of vFreeDocument */