Imported from antiword-0.37.tar.gz.
[antiword.git] / wordole.c
blob8a95fb9ed820088631538530152468cc3e5fece9
1 /*
2 * wordole.c
3 * Copyright (C) 1998-2004 A.J. van Os; Released under GPL
5 * Description:
6 * Deal with the OLE internals of a MS Word file
7 */
9 #include <string.h>
10 #include "antiword.h"
12 /* Private type for Property Set Storage entries */
13 typedef struct pps_entry_tag {
14 ULONG ulNext;
15 ULONG ulPrevious;
16 ULONG ulDir;
17 ULONG ulSB;
18 ULONG ulSize;
19 int iLevel;
20 char szName[32];
21 UCHAR ucType;
22 } pps_entry_type;
24 /* Show that a PPS number or index should not be used */
25 #define PPS_NUMBER_INVALID 0xffffffffUL
28 /* Macro to make sure all such statements will be identical */
29 #define FREE_ALL() \
30 do {\
31 vDestroySmallBlockList();\
32 aulRootList = xfree(aulRootList);\
33 aulSbdList = xfree(aulSbdList);\
34 aulBbdList = xfree(aulBbdList);\
35 aulSBD = xfree(aulSBD);\
36 aulBBD = xfree(aulBBD);\
37 } while(0)
41 * ulReadLong - read four bytes from the given file and offset
43 static ULONG
44 ulReadLong(FILE *pFile, ULONG ulOffset)
46 UCHAR aucBytes[4];
48 fail(pFile == NULL);
50 if (!bReadBytes(aucBytes, 4, ulOffset, pFile)) {
51 werr(1, "Read long 0x%lx not possible", ulOffset);
53 return ulGetLong(0, aucBytes);
54 } /* end of ulReadLong */
57 * vName2String - turn the name into a proper string.
59 static void
60 vName2String(char *szName, const UCHAR *aucBytes, size_t tNameSize)
62 char *pcChar;
63 size_t tIndex;
65 fail(aucBytes == NULL || szName == NULL);
67 if (tNameSize < 2) {
68 szName[0] = '\0';
69 return;
71 for (tIndex = 0, pcChar = szName;
72 tIndex < 2 * tNameSize;
73 tIndex += 2, pcChar++) {
74 *pcChar = (char)aucBytes[tIndex];
76 szName[tNameSize - 1] = '\0';
77 } /* end of vName2String */
80 * tReadBlockIndices - read the Big/Small Block Depot indices
82 * Returns the number of indices read
84 static size_t
85 tReadBlockIndices(FILE *pFile, ULONG *aulBlockDepot,
86 size_t tMaxRec, ULONG ulOffset)
88 size_t tDone;
89 int iIndex;
90 UCHAR aucBytes[BIG_BLOCK_SIZE];
92 fail(pFile == NULL || aulBlockDepot == NULL);
93 fail(tMaxRec == 0);
95 /* Read a big block with BBD or SBD indices */
96 if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, ulOffset, pFile)) {
97 werr(0, "Reading big block from 0x%lx is not possible",
98 ulOffset);
99 return 0;
101 /* Split the big block into indices, an index is four bytes */
102 tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4);
103 for (iIndex = 0; iIndex < (int)tDone; iIndex++) {
104 aulBlockDepot[iIndex] = ulGetLong(4 * iIndex, aucBytes);
105 NO_DBG_DEC(aulBlockDepot[iIndex]);
107 return tDone;
108 } /* end of tReadBlockIndices */
111 * bGetBBD - get the Big Block Depot indices from the index-blocks
113 static BOOL
114 bGetBBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
115 ULONG *aulBBD, size_t tBBDLen)
117 ULONG ulBegin;
118 size_t tToGo, tDone;
119 int iIndex;
121 fail(pFile == NULL || aulDepot == NULL || aulBBD == NULL);
123 DBG_MSG("bGetBBD");
125 tToGo = tBBDLen;
126 for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
127 ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
128 NO_DBG_HEX(ulBegin);
129 tDone = tReadBlockIndices(pFile, aulBBD, tToGo, ulBegin);
130 fail(tDone > tToGo);
131 if (tDone == 0) {
132 return FALSE;
134 aulBBD += tDone;
135 tToGo -= tDone;
137 return tToGo == 0;
138 } /* end of bGetBBD */
141 * bGetSBD - get the Small Block Depot indices from the index-blocks
143 static BOOL
144 bGetSBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
145 ULONG *aulSBD, size_t tSBDLen)
147 ULONG ulBegin;
148 size_t tToGo, tDone;
149 int iIndex;
151 fail(pFile == NULL || aulDepot == NULL || aulSBD == NULL);
153 DBG_MSG("bGetSBD");
155 tToGo = tSBDLen;
156 for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
157 fail(aulDepot[iIndex] >= ULONG_MAX / BIG_BLOCK_SIZE);
158 ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
159 NO_DBG_HEX(ulBegin);
160 tDone = tReadBlockIndices(pFile, aulSBD, tToGo, ulBegin);
161 fail(tDone > tToGo);
162 if (tDone == 0) {
163 return FALSE;
165 aulSBD += tDone;
166 tToGo -= tDone;
168 return tToGo == 0;
169 } /* end of bGetSBD */
172 * vComputePPSlevels - compute the levels of the Property Set Storage entries
174 static void
175 vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode,
176 int iLevel, int iRecursionLevel)
178 fail(atPPSlist == NULL || pNode == NULL);
179 fail(iLevel < 0 || iRecursionLevel < 0);
181 if (iRecursionLevel > 25) {
182 /* This removes the possibility of an infinite recursion */
183 DBG_DEC(iRecursionLevel);
184 return;
186 if (pNode->iLevel <= iLevel) {
187 /* Avoid entering a loop */
188 DBG_DEC(iLevel);
189 DBG_DEC(pNode->iLevel);
190 return;
193 pNode->iLevel = iLevel;
195 if (pNode->ulDir != PPS_NUMBER_INVALID) {
196 vComputePPSlevels(atPPSlist,
197 &atPPSlist[pNode->ulDir],
198 iLevel + 1,
199 iRecursionLevel + 1);
201 if (pNode->ulNext != PPS_NUMBER_INVALID) {
202 vComputePPSlevels(atPPSlist,
203 &atPPSlist[pNode->ulNext],
204 iLevel,
205 iRecursionLevel + 1);
207 if (pNode->ulPrevious != PPS_NUMBER_INVALID) {
208 vComputePPSlevels(atPPSlist,
209 &atPPSlist[pNode->ulPrevious],
210 iLevel,
211 iRecursionLevel + 1);
213 } /* end of vComputePPSlevels */
216 * bGetPPS - search the Property Set Storage for three sets
218 * Return TRUE if the WordDocument PPS is found
220 static BOOL
221 bGetPPS(FILE *pFile,
222 const ULONG *aulRootList, size_t tRootListLen, pps_info_type *pPPS)
224 pps_entry_type *atPPSlist;
225 ULONG ulBegin, ulOffset, ulTmp;
226 size_t tNbrOfPPS, tNameSize;
227 int iIndex, iStartBlock, iRootIndex;
228 BOOL bWord, bExcel;
229 UCHAR aucBytes[PROPERTY_SET_STORAGE_SIZE];
231 fail(pFile == NULL || aulRootList == NULL || pPPS == NULL);
233 DBG_MSG("bGetPPS");
235 NO_DBG_DEC(tRootListLen);
237 bWord = FALSE;
238 bExcel = FALSE;
239 (void)memset(pPPS, 0, sizeof(*pPPS));
241 /* Read and store all the Property Set Storage entries */
243 tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE;
244 atPPSlist = xcalloc(tNbrOfPPS, sizeof(pps_entry_type));
245 iRootIndex = 0;
247 for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
248 ulTmp = (ULONG)iIndex * PROPERTY_SET_STORAGE_SIZE;
249 iStartBlock = (int)(ulTmp / BIG_BLOCK_SIZE);
250 ulOffset = ulTmp % BIG_BLOCK_SIZE;
251 ulBegin = (aulRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE +
252 ulOffset;
253 NO_DBG_HEX(ulBegin);
254 if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE,
255 ulBegin, pFile)) {
256 werr(0, "Reading PPS %d is not possible", iIndex);
257 atPPSlist = xfree(atPPSlist);
258 return FALSE;
260 tNameSize = (size_t)usGetWord(0x40, aucBytes);
261 tNameSize = (tNameSize + 1) / 2;
262 vName2String(atPPSlist[iIndex].szName, aucBytes, tNameSize);
263 atPPSlist[iIndex].ucType = ucGetByte(0x42, aucBytes);
264 if (atPPSlist[iIndex].ucType == 5) {
265 iRootIndex = iIndex;
267 atPPSlist[iIndex].ulPrevious = ulGetLong(0x44, aucBytes);
268 atPPSlist[iIndex].ulNext = ulGetLong(0x48, aucBytes);
269 atPPSlist[iIndex].ulDir = ulGetLong(0x4c, aucBytes);
270 atPPSlist[iIndex].ulSB = ulGetLong(0x74, aucBytes);
271 atPPSlist[iIndex].ulSize = ulGetLong(0x78, aucBytes);
272 atPPSlist[iIndex].iLevel = INT_MAX;
273 if ((atPPSlist[iIndex].ulPrevious >= (ULONG)tNbrOfPPS &&
274 atPPSlist[iIndex].ulPrevious != PPS_NUMBER_INVALID) ||
275 (atPPSlist[iIndex].ulNext >= (ULONG)tNbrOfPPS &&
276 atPPSlist[iIndex].ulNext != PPS_NUMBER_INVALID) ||
277 (atPPSlist[iIndex].ulDir >= (ULONG)tNbrOfPPS &&
278 atPPSlist[iIndex].ulDir != PPS_NUMBER_INVALID)) {
279 DBG_DEC(iIndex);
280 DBG_DEC(atPPSlist[iIndex].ulPrevious);
281 DBG_DEC(atPPSlist[iIndex].ulNext);
282 DBG_DEC(atPPSlist[iIndex].ulDir);
283 DBG_DEC(tNbrOfPPS);
284 werr(0, "The Property Set Storage is damaged");
285 atPPSlist = xfree(atPPSlist);
286 return FALSE;
290 #if 0 /* defined(DEBUG) */
291 DBG_MSG("Before");
292 for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
293 DBG_MSG(atPPSlist[iIndex].szName);
294 DBG_HEX(atPPSlist[iIndex].ulDir);
295 DBG_HEX(atPPSlist[iIndex].ulPrevious);
296 DBG_HEX(atPPSlist[iIndex].ulNext);
297 DBG_DEC(atPPSlist[iIndex].ulSB);
298 DBG_HEX(atPPSlist[iIndex].ulSize);
299 DBG_DEC(atPPSlist[iIndex].iLevel);
301 #endif /* DEBUG */
303 /* Add level information to each entry */
304 vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0);
306 /* Check the entries on level 1 for the required information */
307 NO_DBG_MSG("After");
308 for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
309 #if 0 /* defined(DEBUG) */
310 DBG_MSG(atPPSlist[iIndex].szName);
311 DBG_HEX(atPPSlist[iIndex].ulDir);
312 DBG_HEX(atPPSlist[iIndex].ulPrevious);
313 DBG_HEX(atPPSlist[iIndex].ulNext);
314 DBG_DEC(atPPSlist[iIndex].ulSB);
315 DBG_HEX(atPPSlist[iIndex].ulSize);
316 DBG_DEC(atPPSlist[iIndex].iLevel);
317 #endif /* DEBUG */
318 if (atPPSlist[iIndex].iLevel != 1 ||
319 atPPSlist[iIndex].ucType != 2 ||
320 atPPSlist[iIndex].szName[0] == '\0' ||
321 atPPSlist[iIndex].ulSize == 0) {
322 /* This entry can be ignored */
323 continue;
325 if (pPPS->tWordDocument.ulSize == 0 &&
326 STREQ(atPPSlist[iIndex].szName, "WordDocument")) {
327 pPPS->tWordDocument.ulSB = atPPSlist[iIndex].ulSB;
328 pPPS->tWordDocument.ulSize = atPPSlist[iIndex].ulSize;
329 bWord = TRUE;
330 } else if (pPPS->tData.ulSize == 0 &&
331 STREQ(atPPSlist[iIndex].szName, "Data")) {
332 pPPS->tData.ulSB = atPPSlist[iIndex].ulSB;
333 pPPS->tData.ulSize = atPPSlist[iIndex].ulSize;
334 } else if (pPPS->t0Table.ulSize == 0 &&
335 STREQ(atPPSlist[iIndex].szName, "0Table")) {
336 pPPS->t0Table.ulSB = atPPSlist[iIndex].ulSB;
337 pPPS->t0Table.ulSize = atPPSlist[iIndex].ulSize;
338 } else if (pPPS->t1Table.ulSize == 0 &&
339 STREQ(atPPSlist[iIndex].szName, "1Table")) {
340 pPPS->t1Table.ulSB = atPPSlist[iIndex].ulSB;
341 pPPS->t1Table.ulSize = atPPSlist[iIndex].ulSize;
342 } else if (pPPS->tSummaryInfo.ulSize == 0 &&
343 STREQ(atPPSlist[iIndex].szName,
344 "\005SummaryInformation")) {
345 pPPS->tSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
346 pPPS->tSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
347 } else if (pPPS->tDocSummaryInfo.ulSize == 0 &&
348 STREQ(atPPSlist[iIndex].szName,
349 "\005DocumentSummaryInformation")) {
350 pPPS->tDocSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
351 pPPS->tDocSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
352 } else if (STREQ(atPPSlist[iIndex].szName, "Book") ||
353 STREQ(atPPSlist[iIndex].szName, "Workbook")) {
354 bExcel = TRUE;
358 /* Free the space for the Property Set Storage entries */
359 atPPSlist = xfree(atPPSlist);
361 /* Draw your conclusions */
362 if (bWord) {
363 return TRUE;
366 if (bExcel) {
367 werr(0, "Sorry, but this is an Excel spreadsheet");
368 } else {
369 werr(0, "This OLE file does not contain a Word document");
371 return FALSE;
372 } /* end of bGetPPS */
375 * vGetBbdList - make a list of the places to find big blocks
377 static void
378 vGetBbdList(FILE *pFile, int iNbr, ULONG *aulBbdList, ULONG ulOffset)
380 int iIndex;
382 fail(pFile == NULL);
383 fail(iNbr > 127);
384 fail(aulBbdList == NULL);
386 NO_DBG_DEC(iNbr);
387 for (iIndex = 0; iIndex < iNbr; iIndex++) {
388 aulBbdList[iIndex] =
389 ulReadLong(pFile, ulOffset + 4 * (ULONG)iIndex);
390 NO_DBG_DEC(iIndex);
391 NO_DBG_HEX(aulBbdList[iIndex]);
393 } /* end of vGetBbdList */
396 * bGetDocumentText - make a list of the text blocks of a Word document
398 * Return TRUE when succesful, otherwise FALSE
400 static BOOL
401 bGetDocumentText(FILE *pFile, const pps_info_type *pPPS,
402 const ULONG *aulBBD, size_t tBBDLen,
403 const ULONG *aulSBD, size_t tSBDLen,
404 const UCHAR *aucHeader, int iWordVersion)
406 ULONG ulBeginOfText;
407 ULONG ulTextLen, ulFootnoteLen, ulEndnoteLen;
408 ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
409 ULONG ulTextBoxLen, ulHdrTextBoxLen;
410 UINT uiQuickSaves;
411 BOOL bFarEastWord, bTemplate, bFastSaved, bEncrypted, bSuccess;
412 USHORT usIdent, usDocStatus;
414 fail(pFile == NULL || pPPS == NULL);
415 fail(aulBBD == NULL);
416 fail(aulSBD == NULL);
418 DBG_MSG("bGetDocumentText");
420 /* Get the "magic number" from the header */
421 usIdent = usGetWord(0x00, aucHeader);
422 DBG_HEX(usIdent);
423 bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 ||
424 usIdent == 0xa697 || usIdent == 0xa699;
425 /* Get the status flags from the header */
426 usDocStatus = usGetWord(0x0a, aucHeader);
427 DBG_HEX(usDocStatus);
428 bTemplate = (usDocStatus & BIT(0)) != 0;
429 DBG_MSG_C(bTemplate, "This document is a Template");
430 bFastSaved = (usDocStatus & BIT(2)) != 0;
431 uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
432 DBG_MSG_C(bFastSaved, "This document is Fast Saved");
433 DBG_DEC_C(bFastSaved, uiQuickSaves);
434 bEncrypted = (usDocStatus & BIT(8)) != 0;
435 if (bEncrypted) {
436 werr(0, "Encrypted documents are not supported");
437 return FALSE;
440 /* Get length information */
441 ulBeginOfText = ulGetLong(0x18, aucHeader);
442 DBG_HEX(ulBeginOfText);
443 switch (iWordVersion) {
444 case 6:
445 case 7:
446 ulTextLen = ulGetLong(0x34, aucHeader);
447 ulFootnoteLen = ulGetLong(0x38, aucHeader);
448 ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
449 ulMacroLen = ulGetLong(0x40, aucHeader);
450 ulAnnotationLen = ulGetLong(0x44, aucHeader);
451 ulEndnoteLen = ulGetLong(0x48, aucHeader);
452 ulTextBoxLen = ulGetLong(0x4c, aucHeader);
453 ulHdrTextBoxLen = ulGetLong(0x50, aucHeader);
454 break;
455 case 8:
456 ulTextLen = ulGetLong(0x4c, aucHeader);
457 ulFootnoteLen = ulGetLong(0x50, aucHeader);
458 ulHdrFtrLen = ulGetLong(0x54, aucHeader);
459 ulMacroLen = ulGetLong(0x58, aucHeader);
460 ulAnnotationLen = ulGetLong(0x5c, aucHeader);
461 ulEndnoteLen = ulGetLong(0x60, aucHeader);
462 ulTextBoxLen = ulGetLong(0x64, aucHeader);
463 ulHdrTextBoxLen = ulGetLong(0x68, aucHeader);
464 break;
465 default:
466 werr(0, "This version of Word is not supported");
467 return FALSE;
469 DBG_DEC(ulTextLen);
470 DBG_DEC(ulFootnoteLen);
471 DBG_DEC(ulHdrFtrLen);
472 DBG_DEC(ulMacroLen);
473 DBG_DEC(ulAnnotationLen);
474 DBG_DEC(ulEndnoteLen);
475 DBG_DEC(ulTextBoxLen);
476 DBG_DEC(ulHdrTextBoxLen);
478 /* Make a list of the text blocks */
479 switch (iWordVersion) {
480 case 6:
481 case 7:
482 if (bFastSaved) {
483 bSuccess = bGet6DocumentText(pFile,
484 bFarEastWord,
485 pPPS->tWordDocument.ulSB,
486 aulBBD, tBBDLen,
487 aucHeader);
488 } else {
489 bSuccess = bAddTextBlocks(ulBeginOfText,
490 ulTextLen +
491 ulFootnoteLen +
492 ulHdrFtrLen +
493 ulMacroLen + ulAnnotationLen +
494 ulEndnoteLen +
495 ulTextBoxLen + ulHdrTextBoxLen,
496 bFarEastWord,
497 IGNORE_PROPMOD,
498 pPPS->tWordDocument.ulSB,
499 aulBBD, tBBDLen);
501 break;
502 case 8:
503 bSuccess = bGet8DocumentText(pFile,
504 pPPS,
505 aulBBD, tBBDLen, aulSBD, tSBDLen,
506 aucHeader);
507 break;
508 default:
509 werr(0, "This version of Word is not supported");
510 bSuccess = FALSE;
511 break;
514 if (bSuccess) {
515 vSplitBlockList(pFile,
516 ulTextLen,
517 ulFootnoteLen,
518 ulHdrFtrLen,
519 ulMacroLen,
520 ulAnnotationLen,
521 ulEndnoteLen,
522 ulTextBoxLen,
523 ulHdrTextBoxLen,
524 !bFastSaved && iWordVersion == 8);
525 } else {
526 vDestroyTextBlockList();
527 werr(0, "I can't find the text of this document");
529 return bSuccess;
530 } /* end of bGetDocumentText */
533 * vGetDocumentData - make a list of the data blocks of a Word document
535 static void
536 vGetDocumentData(FILE *pFile, const pps_info_type *pPPS,
537 const ULONG *aulBBD, size_t tBBDLen,
538 const UCHAR *aucHeader, int iWordVersion)
540 options_type tOptions;
541 ULONG ulBeginOfText;
542 BOOL bFastSaved, bHasImages, bSuccess;
543 USHORT usDocStatus;
545 fail(pFile == NULL);
546 fail(pPPS == NULL);
547 fail(aulBBD == NULL);
549 /* Get the options */
550 vGetOptions(&tOptions);
552 /* Get the status flags from the header */
553 usDocStatus = usGetWord(0x0a, aucHeader);
554 DBG_HEX(usDocStatus);
555 bFastSaved = (usDocStatus & BIT(2)) != 0;
556 bHasImages = (usDocStatus & BIT(3)) != 0;
558 if (!bHasImages ||
559 tOptions.eConversionType == conversion_text ||
560 tOptions.eConversionType == conversion_fmt_text ||
561 tOptions.eConversionType == conversion_xml ||
562 tOptions.eImageLevel == level_no_images) {
564 * No images in the document or text-only output or
565 * no images wanted, so no data blocks will be needed
567 vDestroyDataBlockList();
568 return;
571 /* Get length information */
572 ulBeginOfText = ulGetLong(0x18, aucHeader);
573 DBG_HEX(ulBeginOfText);
575 /* Make a list of the data blocks */
576 switch (iWordVersion) {
577 case 6:
578 case 7:
580 * The data blocks are in the text stream. The text stream
581 * is in "fast saved" format or "normal saved" format
583 if (bFastSaved) {
584 bSuccess = bGet6DocumentData(pFile,
585 pPPS->tWordDocument.ulSB,
586 aulBBD, tBBDLen,
587 aucHeader);
588 } else {
589 bSuccess = bAddDataBlocks(ulBeginOfText,
590 (ULONG)LONG_MAX,
591 pPPS->tWordDocument.ulSB,
592 aulBBD, tBBDLen);
594 break;
595 case 8:
597 * The data blocks are in the data stream. The data stream
598 * is always in "normal saved" format
600 bSuccess = bAddDataBlocks(0, (ULONG)LONG_MAX,
601 pPPS->tData.ulSB, aulBBD, tBBDLen);
602 break;
603 default:
604 werr(0, "This version of Word is not supported");
605 bSuccess = FALSE;
606 break;
609 if (!bSuccess) {
610 vDestroyDataBlockList();
611 werr(0, "I can't find the data of this document");
613 } /* end of vGetDocumentData */
616 * iInitDocumentOLE - initialize an OLE document
618 * Returns the version of Word that made the document or -1
621 iInitDocumentOLE(FILE *pFile, long lFilesize)
623 pps_info_type PPS_info;
624 ULONG *aulBBD, *aulSBD;
625 ULONG *aulRootList, *aulBbdList, *aulSbdList;
626 ULONG ulBdbListStart, ulAdditionalBBDlist;
627 ULONG ulRootStartblock, ulSbdStartblock, ulSBLstartblock;
628 ULONG ulStart, ulTmp;
629 long lMaxBlock;
630 size_t tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen;
631 int iWordVersion, iIndex, iToGo;
632 BOOL bSuccess;
633 USHORT usIdent, usDocStatus;
634 UCHAR aucHeader[HEADER_SIZE];
636 fail(pFile == NULL);
638 lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2;
639 DBG_DEC(lMaxBlock);
640 if (lMaxBlock < 1) {
641 return -1;
643 tBBDLen = (size_t)(lMaxBlock + 1);
644 tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c);
645 DBG_DEC(tNumBbdBlocks);
646 ulRootStartblock = ulReadLong(pFile, 0x30);
647 DBG_DEC(ulRootStartblock);
648 ulSbdStartblock = ulReadLong(pFile, 0x3c);
649 DBG_DEC(ulSbdStartblock);
650 ulAdditionalBBDlist = ulReadLong(pFile, 0x44);
651 DBG_HEX(ulAdditionalBBDlist);
652 ulSBLstartblock = ulReadLong(pFile,
653 (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74);
654 DBG_DEC(ulSBLstartblock);
655 tSBDLen = (size_t)(ulReadLong(pFile,
656 (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x78) /
657 SMALL_BLOCK_SIZE);
658 /* All to be xcalloc-ed pointers to NULL */
659 aulRootList = NULL;
660 aulSbdList = NULL;
661 aulBbdList = NULL;
662 aulSBD = NULL;
663 aulBBD = NULL;
664 /* Big Block Depot */
665 aulBbdList = xcalloc(tNumBbdBlocks, sizeof(ULONG));
666 aulBBD = xcalloc(tBBDLen, sizeof(ULONG));
667 iToGo = (int)tNumBbdBlocks;
668 vGetBbdList(pFile, min(iToGo, 109), aulBbdList, 0x4c);
669 ulStart = 109;
670 iToGo -= 109;
671 while (ulAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) {
672 ulBdbListStart = (ulAdditionalBBDlist + 1) * BIG_BLOCK_SIZE;
673 vGetBbdList(pFile, min(iToGo, 127),
674 aulBbdList + ulStart, ulBdbListStart);
675 ulAdditionalBBDlist = ulReadLong(pFile,
676 ulBdbListStart + 4 * 127);
677 DBG_DEC(ulAdditionalBBDlist);
678 DBG_HEX(ulAdditionalBBDlist);
679 ulStart += 127;
680 iToGo -= 127;
682 if (!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) {
683 FREE_ALL();
684 return -1;
686 aulBbdList = xfree(aulBbdList);
687 /* Small Block Depot */
688 aulSbdList = xcalloc(tBBDLen, sizeof(ULONG));
689 aulSBD = xcalloc(tSBDLen, sizeof(ULONG));
690 for (iIndex = 0, ulTmp = ulSbdStartblock;
691 iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
692 iIndex++, ulTmp = aulBBD[ulTmp]) {
693 if (ulTmp >= (ULONG)tBBDLen) {
694 DBG_DEC(ulTmp);
695 DBG_DEC(tBBDLen);
696 werr(1, "The Big Block Depot is damaged");
698 aulSbdList[iIndex] = ulTmp;
699 NO_DBG_HEX(aulSbdList[iIndex]);
701 if (!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) {
702 FREE_ALL();
703 return -1;
705 aulSbdList = xfree(aulSbdList);
706 /* Root list */
707 for (tRootListLen = 0, ulTmp = ulRootStartblock;
708 tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN;
709 tRootListLen++, ulTmp = aulBBD[ulTmp]) {
710 if (ulTmp >= (ULONG)tBBDLen) {
711 DBG_DEC(ulTmp);
712 DBG_DEC(tBBDLen);
713 werr(1, "The Big Block Depot is damaged");
716 if (tRootListLen == 0) {
717 werr(0, "No Rootlist found");
718 FREE_ALL();
719 return -1;
721 aulRootList = xcalloc(tRootListLen, sizeof(ULONG));
722 for (iIndex = 0, ulTmp = ulRootStartblock;
723 iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
724 iIndex++, ulTmp = aulBBD[ulTmp]) {
725 if (ulTmp >= (ULONG)tBBDLen) {
726 DBG_DEC(ulTmp);
727 DBG_DEC(tBBDLen);
728 werr(1, "The Big Block Depot is damaged");
730 aulRootList[iIndex] = ulTmp;
731 NO_DBG_DEC(aulRootList[iIndex]);
733 fail(tRootListLen != (size_t)iIndex);
734 bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info);
735 aulRootList = xfree(aulRootList);
736 if (!bSuccess) {
737 FREE_ALL();
738 return -1;
740 /* Small block list */
741 if (!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) {
742 FREE_ALL();
743 return -1;
746 if (PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) {
747 DBG_DEC(PPS_info.tWordDocument.ulSize);
748 FREE_ALL();
749 werr(0, "I'm afraid the text stream of this file "
750 "is too small to handle.");
751 return -1;
753 /* Read the headerblock */
754 if (!bReadBuffer(pFile, PPS_info.tWordDocument.ulSB,
755 aulBBD, tBBDLen, BIG_BLOCK_SIZE,
756 aucHeader, 0, HEADER_SIZE)) {
757 FREE_ALL();
758 return -1;
760 usIdent = usGetWord(0x00, aucHeader);
761 DBG_HEX(usIdent);
762 fail(usIdent != 0x8098 && /* Word 7 for oriental languages */
763 usIdent != 0x8099 && /* Word 7 for oriental languages */
764 usIdent != 0xa5dc && /* Word 6 & 7 */
765 usIdent != 0xa5ec && /* Word 7 & 97 & 98 */
766 usIdent != 0xa697 && /* Word 7 for oriental languages */
767 usIdent != 0xa699); /* Word 7 for oriental languages */
768 iWordVersion = iGetVersionNumber(aucHeader);
769 if (iWordVersion < 6) {
770 FREE_ALL();
771 werr(0, "This file is from a version of Word before Word 6.");
772 return -1;
775 /* Get the status flags from the header */
776 usDocStatus = usGetWord(0x0a, aucHeader);
777 if (usDocStatus & BIT(9)) {
778 PPS_info.tTable = PPS_info.t1Table;
779 } else {
780 PPS_info.tTable = PPS_info.t0Table;
782 /* Clean the entries that should not be used */
783 memset(&PPS_info.t0Table, 0, sizeof(PPS_info.t0Table));
784 memset(&PPS_info.t1Table, 0, sizeof(PPS_info.t1Table));
786 bSuccess = bGetDocumentText(pFile, &PPS_info,
787 aulBBD, tBBDLen, aulSBD, tSBDLen,
788 aucHeader, iWordVersion);
789 if (bSuccess) {
790 vGetDocumentData(pFile, &PPS_info,
791 aulBBD, tBBDLen, aucHeader, iWordVersion);
792 vGetPropertyInfo(pFile, &PPS_info,
793 aulBBD, tBBDLen, aulSBD, tSBDLen,
794 aucHeader, iWordVersion);
795 vSetDefaultTabWidth(pFile, &PPS_info,
796 aulBBD, tBBDLen, aulSBD, tSBDLen,
797 aucHeader, iWordVersion);
798 vGetNotesInfo(pFile, &PPS_info,
799 aulBBD, tBBDLen, aulSBD, tSBDLen,
800 aucHeader, iWordVersion);
802 FREE_ALL();
803 return bSuccess ? iWordVersion : -1;
804 } /* end of iInitDocumentOLE */