3 * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL
6 * Find the blocks that contain the text of MS Word files
15 * bAddTextBlocks - Add the blocks to the text block list
17 * Returns TRUE when successful, FALSE if not
20 bAddTextBlocks(ULONG ulCharPosFirst
, ULONG ulTotalLength
,
21 BOOL bUsesUnicode
, USHORT usPropMod
,
22 ULONG ulStartBlock
, const ULONG
*aulBBD
, size_t tBBDLen
)
24 text_block_type tTextBlock
;
25 ULONG ulCharPos
, ulOffset
, ulIndex
;
28 fail(ulTotalLength
> (ULONG
)LONG_MAX
/ 2);
29 fail(ulStartBlock
> MAX_BLOCKNUMBER
&& ulStartBlock
!= END_OF_CHAIN
);
32 NO_DBG_HEX(ulCharPosFirst
);
33 NO_DBG_DEC(ulTotalLength
);
36 /* One character equals two bytes */
37 NO_DBG_MSG("Uses Unicode");
38 lToGo
= (long)ulTotalLength
* 2;
40 /* One character equals one byte */
41 NO_DBG_MSG("Uses ASCII");
42 lToGo
= (long)ulTotalLength
;
45 ulCharPos
= ulCharPosFirst
;
46 ulOffset
= ulCharPosFirst
;
47 for (ulIndex
= ulStartBlock
;
48 ulIndex
!= END_OF_CHAIN
&& lToGo
> 0;
49 ulIndex
= aulBBD
[ulIndex
]) {
50 if (ulIndex
>= (ULONG
)tBBDLen
) {
53 werr(1, "The Big Block Depot is damaged");
55 if (ulOffset
>= BIG_BLOCK_SIZE
) {
56 ulOffset
-= BIG_BLOCK_SIZE
;
59 tTextBlock
.ulFileOffset
=
60 (ulIndex
+ 1) * BIG_BLOCK_SIZE
+ ulOffset
;
61 tTextBlock
.ulCharPos
= ulCharPos
;
62 tTextBlock
.ulLength
= min(BIG_BLOCK_SIZE
- ulOffset
,
64 tTextBlock
.bUsesUnicode
= bUsesUnicode
;
65 tTextBlock
.usPropMod
= usPropMod
;
67 if (!bAdd2TextBlockList(&tTextBlock
)) {
68 DBG_HEX(tTextBlock
.ulFileOffset
);
69 DBG_HEX(tTextBlock
.ulCharPos
);
70 DBG_DEC(tTextBlock
.ulLength
);
71 DBG_DEC(tTextBlock
.bUsesUnicode
);
72 DBG_DEC(tTextBlock
.usPropMod
);
75 ulCharPos
+= tTextBlock
.ulLength
;
76 lToGo
-= (long)tTextBlock
.ulLength
;
78 DBG_DEC_C(lToGo
!= 0, lToGo
);
80 } /* end of bAddTextBlocks */
83 * bGet6DocumentText - make a list of the text blocks of Word 6/7 files
85 * Code for "fast saved" files.
87 * Returns TRUE when successful, FALSE if not
90 bGet6DocumentText(FILE *pFile
, BOOL bUsesUnicode
, ULONG ulStartBlock
,
91 const ULONG
*aulBBD
, size_t tBBDLen
, const UCHAR
*aucHeader
)
94 ULONG ulBeginTextInfo
, ulTextOffset
, ulTotLength
;
96 int iIndex
, iType
, iOff
, iLen
, iPieces
;
99 DBG_MSG("bGet6DocumentText");
102 fail(aulBBD
== NULL
);
103 fail(aucHeader
== NULL
);
105 ulBeginTextInfo
= ulGetLong(0x160, aucHeader
); /* fcClx */
106 DBG_HEX(ulBeginTextInfo
);
107 tTextInfoLen
= (size_t)ulGetLong(0x164, aucHeader
); /* lcbClx */
108 DBG_DEC(tTextInfoLen
);
110 aucBuffer
= xmalloc(tTextInfoLen
);
111 if (!bReadBuffer(pFile
, ulStartBlock
,
112 aulBBD
, tBBDLen
, BIG_BLOCK_SIZE
,
113 aucBuffer
, ulBeginTextInfo
, tTextInfoLen
)) {
114 aucBuffer
= xfree(aucBuffer
);
117 NO_DBG_PRINT_BLOCK(aucBuffer
, tTextInfoLen
);
120 while ((size_t)iOff
< tTextInfoLen
) {
121 iType
= (int)ucGetByte(iOff
, aucBuffer
);
129 iLen
= (int)usGetWord(iOff
, aucBuffer
);
130 vAdd2PropModList(aucBuffer
+ iOff
);
135 werr(0, "Unknown type of 'fastsaved' format");
136 aucBuffer
= xfree(aucBuffer
);
140 iLen
= (int)usGetWord(iOff
, aucBuffer
);
143 iPieces
= (iLen
- 4) / 12;
145 for (iIndex
= 0; iIndex
< iPieces
; iIndex
++) {
146 ulTextOffset
= ulGetLong(
147 iOff
+ (iPieces
+ 1) * 4 + iIndex
* 8 + 2,
149 usPropMod
= usGetWord(
150 iOff
+ (iPieces
+ 1) * 4 + iIndex
* 8 + 6,
152 ulTotLength
= ulGetLong(iOff
+ (iIndex
+ 1) * 4,
154 ulGetLong(iOff
+ iIndex
* 4,
156 NO_DBG_HEX_C(usPropMod
!= 0, usPropMod
);
157 if (!bAddTextBlocks(ulTextOffset
, ulTotLength
,
158 bUsesUnicode
, usPropMod
,
161 aucBuffer
= xfree(aucBuffer
);
167 aucBuffer
= xfree(aucBuffer
);
169 } /* end of bGet6DocumentText */
172 * bGet8DocumentText - make a list of the text blocks of Word 8/97 files
174 * Returns TRUE when successful, FALSE if not
177 bGet8DocumentText(FILE *pFile
, const pps_info_type
*pPPS
,
178 const ULONG
*aulBBD
, size_t tBBDLen
,
179 const ULONG
*aulSBD
, size_t tSBDLen
,
180 const UCHAR
*aucHeader
)
182 const ULONG
*aulBlockDepot
;
184 ULONG ulTextOffset
, ulBeginTextInfo
;
185 ULONG ulTotLength
, ulLen
;
186 long lIndex
, lPieces
, lOff
;
187 size_t tTextInfoLen
, tBlockDepotLen
, tBlockSize
;
192 DBG_MSG("bGet8DocumentText");
194 fail(pFile
== NULL
|| pPPS
== NULL
);
195 fail(aulBBD
== NULL
|| aulSBD
== NULL
);
196 fail(aucHeader
== NULL
);
198 ulBeginTextInfo
= ulGetLong(0x1a2, aucHeader
); /* fcClx */
199 DBG_HEX(ulBeginTextInfo
);
200 tTextInfoLen
= (size_t)ulGetLong(0x1a6, aucHeader
); /* lcbClx */
201 DBG_DEC(tTextInfoLen
);
203 DBG_DEC(pPPS
->tTable
.ulSB
);
204 DBG_HEX(pPPS
->tTable
.ulSize
);
205 if (pPPS
->tTable
.ulSize
== 0) {
209 if (pPPS
->tTable
.ulSize
< MIN_SIZE_FOR_BBD_USE
) {
210 /* Use the Small Block Depot */
211 aulBlockDepot
= aulSBD
;
212 tBlockDepotLen
= tSBDLen
;
213 tBlockSize
= SMALL_BLOCK_SIZE
;
215 /* Use the Big Block Depot */
216 aulBlockDepot
= aulBBD
;
217 tBlockDepotLen
= tBBDLen
;
218 tBlockSize
= BIG_BLOCK_SIZE
;
220 aucBuffer
= xmalloc(tTextInfoLen
);
221 if (!bReadBuffer(pFile
, pPPS
->tTable
.ulSB
,
222 aulBlockDepot
, tBlockDepotLen
, tBlockSize
,
223 aucBuffer
, ulBeginTextInfo
, tTextInfoLen
)) {
224 aucBuffer
= xfree(aucBuffer
);
227 NO_DBG_PRINT_BLOCK(aucBuffer
, tTextInfoLen
);
230 while (lOff
< (long)tTextInfoLen
) {
231 iType
= (int)ucGetByte(lOff
, aucBuffer
);
239 iLen
= (int)usGetWord(lOff
, aucBuffer
);
240 vAdd2PropModList(aucBuffer
+ lOff
);
241 lOff
+= (long)iLen
+ 2;
245 werr(0, "Unknown type of 'fastsaved' format");
246 aucBuffer
= xfree(aucBuffer
);
250 ulLen
= ulGetLong(lOff
, aucBuffer
);
256 lPieces
= (long)((ulLen
- 4) / 12);
258 for (lIndex
= 0; lIndex
< lPieces
; lIndex
++) {
259 ulTextOffset
= ulGetLong(
260 lOff
+ (lPieces
+ 1) * 4 + lIndex
* 8 + 2,
262 usPropMod
= usGetWord(
263 lOff
+ (lPieces
+ 1) * 4 + lIndex
* 8 + 6,
265 ulTotLength
= ulGetLong(lOff
+ (lIndex
+ 1) * 4,
267 ulGetLong(lOff
+ lIndex
* 4,
269 if ((ulTextOffset
& BIT(30)) == 0) {
272 bUsesUnicode
= FALSE
;
273 ulTextOffset
&= ~BIT(30);
276 NO_DBG_HEX_C(usPropMod
!= 0, usPropMod
);
277 if (!bAddTextBlocks(ulTextOffset
, ulTotLength
,
278 bUsesUnicode
, usPropMod
,
279 pPPS
->tWordDocument
.ulSB
,
281 aucBuffer
= xfree(aucBuffer
);
287 aucBuffer
= xfree(aucBuffer
);
289 } /* end of bGet8DocumentText */