3 * Copyright (C) 2002-2005 A.J. van Os; Released under GPL
6 * Deal with the WIN internals of a MS Word file
13 * bGetDocumentText - make a list of the text blocks of a Word document
15 * Return TRUE when succesful, otherwise FALSE
18 bGetDocumentText(FILE *pFile
, const UCHAR
*aucHeader
)
20 text_block_type tTextBlock
;
22 ULONG ulTextLen
, ulFootnoteLen
;
23 ULONG ulHdrFtrLen
, ulMacroLen
, ulAnnotationLen
;
26 BOOL bTemplate
, bFastSaved
, bEncrypted
, bSuccess
;
29 fail(aucHeader
== NULL
);
31 DBG_MSG("bGetDocumentText");
33 /* Get the status flags from the header */
34 usDocStatus
= usGetWord(0x0a, aucHeader
);
36 bTemplate
= (usDocStatus
& BIT(0)) != 0;
37 DBG_MSG_C(bTemplate
, "This document is a Template");
38 bFastSaved
= (usDocStatus
& BIT(2)) != 0;
39 uiQuickSaves
= (UINT
)(usDocStatus
& 0x00f0) >> 4;
40 DBG_MSG_C(bFastSaved
, "This document is Fast Saved");
41 DBG_DEC_C(bFastSaved
, uiQuickSaves
);
43 werr(0, "Word2: fast saved documents are not supported yet");
46 bEncrypted
= (usDocStatus
& BIT(8)) != 0;
48 werr(0, "Encrypted documents are not supported");
52 /* Get length information */
53 ulBeginOfText
= ulGetLong(0x18, aucHeader
);
54 DBG_HEX(ulBeginOfText
);
55 ulTextLen
= ulGetLong(0x34, aucHeader
);
56 ulFootnoteLen
= ulGetLong(0x38, aucHeader
);
57 ulHdrFtrLen
= ulGetLong(0x3c, aucHeader
);
58 ulMacroLen
= ulGetLong(0x40, aucHeader
);
59 ulAnnotationLen
= ulGetLong(0x44, aucHeader
);
61 DBG_DEC(ulFootnoteLen
);
64 DBG_DEC(ulAnnotationLen
);
68 tTextBlock
.ulFileOffset
= ulBeginOfText
;
69 tTextBlock
.ulCharPos
= ulBeginOfText
;
70 tTextBlock
.ulLength
= ulTextLen
+
72 ulHdrFtrLen
+ ulMacroLen
+ ulAnnotationLen
;
73 tTextBlock
.bUsesUnicode
= FALSE
;
74 tTextBlock
.usPropMod
= IGNORE_PROPMOD
;
75 bSuccess
= bAdd2TextBlockList(&tTextBlock
);
76 DBG_HEX_C(!bSuccess
, tTextBlock
.ulFileOffset
);
77 DBG_HEX_C(!bSuccess
, tTextBlock
.ulCharPos
);
78 DBG_DEC_C(!bSuccess
, tTextBlock
.ulLength
);
79 DBG_DEC_C(!bSuccess
, tTextBlock
.bUsesUnicode
);
80 DBG_DEC_C(!bSuccess
, tTextBlock
.usPropMod
);
84 vSplitBlockList(pFile
,
95 vDestroyTextBlockList();
96 werr(0, "I can't find the text of this document");
99 } /* end of bGetDocumentText */
102 * vGetDocumentData - make a list of the data blocks of a Word document
105 vGetDocumentData(FILE *pFile
, const UCHAR
*aucHeader
)
107 data_block_type tDataBlock
;
108 options_type tOptions
;
109 ULONG ulEndOfText
, ulBeginCharInfo
;
110 BOOL bFastSaved
, bHasImages
, bSuccess
;
113 /* Get the options */
114 vGetOptions(&tOptions
);
116 /* Get the status flags from the header */
117 usDocStatus
= usGetWord(0x0a, aucHeader
);
118 DBG_HEX(usDocStatus
);
119 bFastSaved
= (usDocStatus
& BIT(2)) != 0;
120 bHasImages
= (usDocStatus
& BIT(3)) != 0;
123 tOptions
.eConversionType
== conversion_text
||
124 tOptions
.eConversionType
== conversion_fmt_text
||
125 tOptions
.eConversionType
== conversion_xml
||
126 tOptions
.eImageLevel
== level_no_images
) {
128 * No images in the document or text-only output or
129 * no images wanted, so no data blocks will be needed
131 vDestroyDataBlockList();
138 /* This datablock is too big, but it contains all images */
139 ulEndOfText
= ulGetLong(0x1c, aucHeader
);
140 DBG_HEX(ulEndOfText
);
141 ulBeginCharInfo
= ulGetLong(0xa0, aucHeader
);
142 DBG_HEX(ulBeginCharInfo
);
143 if (ulBeginCharInfo
> ulEndOfText
) {
144 tDataBlock
.ulFileOffset
= ulEndOfText
;
145 tDataBlock
.ulDataPos
= ulEndOfText
;
146 tDataBlock
.ulLength
= ulBeginCharInfo
- ulEndOfText
;
147 bSuccess
= bAdd2DataBlockList(&tDataBlock
);
148 DBG_HEX_C(!bSuccess
, tDataBlock
.ulFileOffset
);
149 DBG_HEX_C(!bSuccess
, tDataBlock
.ulDataPos
);
150 DBG_DEC_C(!bSuccess
, tDataBlock
.ulLength
);
152 bSuccess
= ulBeginCharInfo
== ulEndOfText
;
157 vDestroyDataBlockList();
158 werr(0, "I can't find the data of this document");
160 } /* end of vGetDocumentData */
163 * iInitDocumentWIN - initialize an WIN document
165 * Returns the version of Word that made the document or -1
168 iInitDocumentWIN(FILE *pFile
, long lFilesize
)
173 UCHAR aucHeader
[384];
177 if (lFilesize
< 384) {
181 /* Read the headerblock */
182 if (!bReadBytes(aucHeader
, 384, 0x00, pFile
)) {
185 /* Get the "magic number" from the header */
186 usIdent
= usGetWord(0x00, aucHeader
);
188 fail(usIdent
!= 0xa59b && /* WinWord 1.x */
189 usIdent
!= 0xa5db); /* WinWord 2.0 */
190 iWordVersion
= iGetVersionNumber(aucHeader
);
191 if (iWordVersion
!= 1 && iWordVersion
!= 2) {
192 werr(0, "This file is not from ''Win Word 1 or 2'.");
195 bSuccess
= bGetDocumentText(pFile
, aucHeader
);
197 vGetDocumentData(pFile
, aucHeader
);
198 vGetPropertyInfo(pFile
, NULL
,
200 aucHeader
, iWordVersion
);
201 vSetDefaultTabWidth(pFile
, NULL
,
203 aucHeader
, iWordVersion
);
204 vGetNotesInfo(pFile
, NULL
,
206 aucHeader
, iWordVersion
);
208 return bSuccess
? iWordVersion
: -1;
209 } /* end of iInitDocumentWIN */