Imported from antiword-0.37.tar.gz.
[antiword.git] / wordwin.c
blobd31b7be5ee722ca8cb4b6bbefbcc9313ab5fe200
1 /*
2 * wordwin.c
3 * Copyright (C) 2002-2005 A.J. van Os; Released under GPL
5 * Description:
6 * Deal with the WIN internals of a MS Word file
7 */
9 #include "antiword.h"
13 * bGetDocumentText - make a list of the text blocks of a Word document
15 * Return TRUE when succesful, otherwise FALSE
17 static BOOL
18 bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
20 text_block_type tTextBlock;
21 ULONG ulBeginOfText;
22 ULONG ulTextLen, ulFootnoteLen;
23 ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
24 UINT uiQuickSaves;
25 USHORT usDocStatus;
26 BOOL bTemplate, bFastSaved, bEncrypted, bSuccess;
28 fail(pFile == NULL);
29 fail(aucHeader == NULL);
31 DBG_MSG("bGetDocumentText");
33 /* Get the status flags from the header */
34 usDocStatus = usGetWord(0x0a, aucHeader);
35 DBG_HEX(usDocStatus);
36 bTemplate = (usDocStatus & BIT(0)) != 0;
37 DBG_MSG_C(bTemplate, "This document is a Template");
38 bFastSaved = (usDocStatus & BIT(2)) != 0;
39 uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
40 DBG_MSG_C(bFastSaved, "This document is Fast Saved");
41 DBG_DEC_C(bFastSaved, uiQuickSaves);
42 if (bFastSaved) {
43 werr(0, "Word2: fast saved documents are not supported yet");
44 return FALSE;
46 bEncrypted = (usDocStatus & BIT(8)) != 0;
47 if (bEncrypted) {
48 werr(0, "Encrypted documents are not supported");
49 return FALSE;
52 /* Get length information */
53 ulBeginOfText = ulGetLong(0x18, aucHeader);
54 DBG_HEX(ulBeginOfText);
55 ulTextLen = ulGetLong(0x34, aucHeader);
56 ulFootnoteLen = ulGetLong(0x38, aucHeader);
57 ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
58 ulMacroLen = ulGetLong(0x40, aucHeader);
59 ulAnnotationLen = ulGetLong(0x44, aucHeader);
60 DBG_DEC(ulTextLen);
61 DBG_DEC(ulFootnoteLen);
62 DBG_DEC(ulHdrFtrLen);
63 DBG_DEC(ulMacroLen);
64 DBG_DEC(ulAnnotationLen);
65 if (bFastSaved) {
66 bSuccess = FALSE;
67 } else {
68 tTextBlock.ulFileOffset = ulBeginOfText;
69 tTextBlock.ulCharPos = ulBeginOfText;
70 tTextBlock.ulLength = ulTextLen +
71 ulFootnoteLen +
72 ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
73 tTextBlock.bUsesUnicode = FALSE;
74 tTextBlock.usPropMod = IGNORE_PROPMOD;
75 bSuccess = bAdd2TextBlockList(&tTextBlock);
76 DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
77 DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
78 DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
79 DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
80 DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
83 if (bSuccess) {
84 vSplitBlockList(pFile,
85 ulTextLen,
86 ulFootnoteLen,
87 ulHdrFtrLen,
88 ulMacroLen,
89 ulAnnotationLen,
93 FALSE);
94 } else {
95 vDestroyTextBlockList();
96 werr(0, "I can't find the text of this document");
98 return bSuccess;
99 } /* end of bGetDocumentText */
102 * vGetDocumentData - make a list of the data blocks of a Word document
104 static void
105 vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
107 data_block_type tDataBlock;
108 options_type tOptions;
109 ULONG ulEndOfText, ulBeginCharInfo;
110 BOOL bFastSaved, bHasImages, bSuccess;
111 USHORT usDocStatus;
113 /* Get the options */
114 vGetOptions(&tOptions);
116 /* Get the status flags from the header */
117 usDocStatus = usGetWord(0x0a, aucHeader);
118 DBG_HEX(usDocStatus);
119 bFastSaved = (usDocStatus & BIT(2)) != 0;
120 bHasImages = (usDocStatus & BIT(3)) != 0;
122 if (!bHasImages ||
123 tOptions.eConversionType == conversion_text ||
124 tOptions.eConversionType == conversion_fmt_text ||
125 tOptions.eConversionType == conversion_xml ||
126 tOptions.eImageLevel == level_no_images) {
128 * No images in the document or text-only output or
129 * no images wanted, so no data blocks will be needed
131 vDestroyDataBlockList();
132 return;
135 if (bFastSaved) {
136 bSuccess = FALSE;
137 } else {
138 /* This datablock is too big, but it contains all images */
139 ulEndOfText = ulGetLong(0x1c, aucHeader);
140 DBG_HEX(ulEndOfText);
141 ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
142 DBG_HEX(ulBeginCharInfo);
143 if (ulBeginCharInfo > ulEndOfText) {
144 tDataBlock.ulFileOffset = ulEndOfText;
145 tDataBlock.ulDataPos = ulEndOfText;
146 tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
147 bSuccess = bAdd2DataBlockList(&tDataBlock);
148 DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
149 DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
150 DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
151 } else {
152 bSuccess = ulBeginCharInfo == ulEndOfText;
156 if (!bSuccess) {
157 vDestroyDataBlockList();
158 werr(0, "I can't find the data of this document");
160 } /* end of vGetDocumentData */
163 * iInitDocumentWIN - initialize an WIN document
165 * Returns the version of Word that made the document or -1
168 iInitDocumentWIN(FILE *pFile, long lFilesize)
170 int iWordVersion;
171 BOOL bSuccess;
172 USHORT usIdent;
173 UCHAR aucHeader[384];
175 fail(pFile == NULL);
177 if (lFilesize < 384) {
178 return -1;
181 /* Read the headerblock */
182 if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
183 return -1;
185 /* Get the "magic number" from the header */
186 usIdent = usGetWord(0x00, aucHeader);
187 DBG_HEX(usIdent);
188 fail(usIdent != 0xa59b && /* WinWord 1.x */
189 usIdent != 0xa5db); /* WinWord 2.0 */
190 iWordVersion = iGetVersionNumber(aucHeader);
191 if (iWordVersion != 1 && iWordVersion != 2) {
192 werr(0, "This file is not from ''Win Word 1 or 2'.");
193 return -1;
195 bSuccess = bGetDocumentText(pFile, aucHeader);
196 if (bSuccess) {
197 vGetDocumentData(pFile, aucHeader);
198 vGetPropertyInfo(pFile, NULL,
199 NULL, 0, NULL, 0,
200 aucHeader, iWordVersion);
201 vSetDefaultTabWidth(pFile, NULL,
202 NULL, 0, NULL, 0,
203 aucHeader, iWordVersion);
204 vGetNotesInfo(pFile, NULL,
205 NULL, 0, NULL, 0,
206 aucHeader, iWordVersion);
208 return bSuccess ? iWordVersion : -1;
209 } /* end of iInitDocumentWIN */