3 * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL
6 * Deal with the internals of a MS Word file
11 static BOOL bOldMacFile
= FALSE
;
15 * Common part of the file checking functions
18 bCheckBytes(FILE *pFile
, const UCHAR
*aucBytes
, size_t tBytes
)
22 fail(pFile
== NULL
|| aucBytes
== NULL
|| tBytes
== 0);
26 for (iIndex
= 0; iIndex
< (int)tBytes
; iIndex
++) {
28 if (iChar
== EOF
|| iChar
!= (int)aucBytes
[iIndex
]) {
30 NO_DBG_HEX(aucBytes
[iIndex
]);
35 } /* end of bCheckBytes */
38 * This function checks whether the given file is or is not a "Word for DOS"
42 bIsWordForDosFile(FILE *pFile
, long lFilesize
)
44 static UCHAR aucBytes
[] =
45 { 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab }; /* Word for DOS */
47 DBG_MSG("bIsWordForDosFile");
49 if (pFile
== NULL
|| lFilesize
< 0) {
50 DBG_MSG("No proper file given");
53 if (lFilesize
< 128) {
54 DBG_MSG("File too small to be a Word document");
57 return bCheckBytes(pFile
, aucBytes
, elementsof(aucBytes
));
58 } /* end of bIsWordForDosFile */
61 * This function checks whether the given file is or is not a file with an
62 * OLE envelope (That is a document made by Word 6 or later)
65 bIsWordFileWithOLE(FILE *pFile
, long lFilesize
)
67 static UCHAR aucBytes
[] =
68 { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
71 if (pFile
== NULL
|| lFilesize
< 0) {
72 DBG_MSG("No proper file given");
75 if (lFilesize
< (long)BIG_BLOCK_SIZE
* 3) {
76 DBG_MSG("This file is too small to be a Word document");
80 iTailLen
= (int)(lFilesize
% BIG_BLOCK_SIZE
);
82 case 0: /* No tail, as it should be */
85 case 2: /* Filesize mismatch or a buggy email program */
86 if ((int)(lFilesize
% 3) == iTailLen
) {
91 * Ignore extra bytes caused by buggy email programs.
92 * They have bugs in their base64 encoding or decoding.
93 * 3 bytes -> 4 ascii chars -> 3 bytes
95 DBG_MSG("Document with extra bytes");
97 default: /* Wrong filesize for a Word document */
102 return bCheckBytes(pFile
, aucBytes
, elementsof(aucBytes
));
103 } /* end of bIsWordFileWithOLE */
106 * This function checks whether the given file is or is not a RTF document
109 bIsRtfFile(FILE *pFile
)
111 static UCHAR aucBytes
[] =
112 { '{', '\\', 'r', 't', 'f', '1' };
114 DBG_MSG("bIsRtfFile");
116 return bCheckBytes(pFile
, aucBytes
, elementsof(aucBytes
));
117 } /* end of bIsRtfFile */
120 * This function checks whether the given file is or is not a WP document
123 bIsWordPerfectFile(FILE *pFile
)
125 static UCHAR aucBytes
[] =
126 { 0xff, 'W', 'P', 'C' };
128 DBG_MSG("bIsWordPerfectFile");
130 return bCheckBytes(pFile
, aucBytes
, elementsof(aucBytes
));
131 } /* end of bIsWordPerfectFile */
134 * This function checks whether the given file is or is not a "Win Word 1 or 2"
138 bIsWinWord12File(FILE *pFile
, long lFilesize
)
140 static UCHAR aucBytes
[2][4] = {
141 { 0x9b, 0xa5, 0x21, 0x00 }, /* Win Word 1.x */
142 { 0xdb, 0xa5, 0x2d, 0x00 }, /* Win Word 2.0 */
146 DBG_MSG("bIsWinWord12File");
148 if (pFile
== NULL
|| lFilesize
< 0) {
149 DBG_MSG("No proper file given");
152 if (lFilesize
< 384) {
153 DBG_MSG("This file is too small to be a Word document");
157 for (iIndex
= 0; iIndex
< (int)elementsof(aucBytes
); iIndex
++) {
158 if (bCheckBytes(pFile
,
160 elementsof(aucBytes
[iIndex
]))) {
165 } /* end of bIsWinWord12File */
168 * This function checks whether the given file is or is not a "Mac Word 4 or 5"
172 bIsMacWord45File(FILE *pFile
)
174 static UCHAR aucBytes
[2][6] = {
175 { 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00 }, /* Mac Word 4 */
176 { 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00 }, /* Mac Word 5 */
180 DBG_MSG("bIsMacWord45File");
182 for (iIndex
= 0; iIndex
< (int)elementsof(aucBytes
); iIndex
++) {
183 if (bCheckBytes(pFile
,
185 elementsof(aucBytes
[iIndex
]))) {
190 } /* end of bIsMacWord45File */
193 * iGuessVersionNumber - guess the Word version number from first few bytes
195 * Returns the guessed version number or -1 when no guess it possible
198 iGuessVersionNumber(FILE *pFile
, long lFilesize
)
200 if(bIsWordForDosFile(pFile
, lFilesize
)) {
203 if (bIsWinWord12File(pFile
, lFilesize
)) {
206 if (bIsMacWord45File(pFile
)) {
209 if (bIsWordFileWithOLE(pFile
, lFilesize
)) {
213 } /* end of iGuessVersionNumber */
216 * iGetVersionNumber - get the Word version number from the header
218 * Returns the version number or -1 when unknown
221 iGetVersionNumber(const UCHAR
*aucHeader
)
223 USHORT usFib
, usChse
;
225 usFib
= usGetWord(0x02, aucHeader
);
226 if (usFib
>= 0x1000) {
227 /* To big: must be MacWord using Big Endian */
229 usFib
= usGetWordBE(0x02, aucHeader
);
235 DBG_MSG("Word for DOS");
238 DBG_MSG("Word 4 for Macintosh");
242 DBG_MSG("Word 1.x for Windows");
245 DBG_MSG("Word 5 for Macintosh");
249 DBG_MSG("Word 2 for Windows");
253 DBG_MSG("Word 6 for Windows");
257 usChse
= usGetWord(0x14, aucHeader
);
261 DBG_MSG("Word 7 for Win95");
264 DBG_MSG("Word 6 for Macintosh");
269 if ((int)ucGetByte(0x05, aucHeader
) == 0xe0) {
270 DBG_MSG("Word 7 for Win95");
273 DBG_MSG("Word 6 for Macintosh");
278 usChse
= usGetWord(0x14, aucHeader
);
281 /* Unknown or unsupported version of Word */
285 DBG_MSG_C(usChse
!= 256, "Word97 for Win95/98/NT");
286 DBG_MSG_C(usChse
== 256, "Word98 for Macintosh");
289 } /* end of iGetVersionNumber */
292 * TRUE if the current file was made by Word version 6 or older on an
293 * Apple Macintosh, otherwise FALSE.
294 * This function hides the methode of how to find out from the rest of the
301 } /* end of bIsOldMacFile */
304 * iInitDocument - initialize a document
306 * Returns the version of Word that made the document or -1
309 iInitDocument(FILE *pFile
, long lFilesize
)
311 int iGuess
, iWordVersion
;
313 iGuess
= iGuessVersionNumber(pFile
, lFilesize
);
316 iWordVersion
= iInitDocumentDOS(pFile
, lFilesize
);
319 iWordVersion
= iInitDocumentWIN(pFile
, lFilesize
);
322 iWordVersion
= iInitDocumentMAC(pFile
, lFilesize
);
325 iWordVersion
= iInitDocumentOLE(pFile
, lFilesize
);
333 } /* end of iInitDocument */
336 * vFreeDocument - free a document by free-ing its parts
341 DBG_MSG("vFreeDocument");
343 /* Free the memory */
344 vDestroyTextBlockList();
345 vDestroyDataBlockList();
346 vDestroyListInfoList();
347 vDestroyRowInfoList();
348 vDestroyStyleInfoList();
349 vDestroyFontInfoList();
350 vDestroyStylesheetList();
351 vDestroyPictInfoList();
352 vDestroyDocumentInfoList();
353 vDestroySectionInfoList();
354 vDestroyHdrFtrInfoList();
355 vDestroyPropModList();
356 vDestroyNotesInfoLists();
358 vDestroySummaryInfo();
359 } /* end of vFreeDocument */