Imported from antiword-0.37.tar.gz.
[antiword.git] / prop0.c
blobbac2fbdb2f6eb96297c1747654643a040a1035e4
1 /*
2 * prop0.c
3 * Copyright (C) 2002-2004 A.J. van Os; Released under GNU GPL
5 * Description:
6 * Read the property information from a Word for DOS file
7 */
9 #include <string.h>
10 #include <time.h>
11 #include "antiword.h"
15 * tConvertDosDate - convert DOS date format
17 * returns Unix time_t or -1
19 static time_t
20 tConvertDosDate(const char *szDosDate)
22 struct tm tTime;
23 const char *pcTmp;
24 time_t tResult;
26 memset(&tTime, 0, sizeof(tTime));
27 pcTmp = szDosDate;
28 /* Get the month */
29 if (!isdigit(*pcTmp)) {
30 return (time_t)-1;
32 tTime.tm_mon = (int)(*pcTmp - '0');
33 pcTmp++;
34 if (isdigit(*pcTmp)) {
35 tTime.tm_mon *= 10;
36 tTime.tm_mon += (int)(*pcTmp - '0');
37 pcTmp++;
39 /* Get the first separater */
40 if (isalnum(*pcTmp)) {
41 return (time_t)-1;
43 pcTmp++;
44 /* Get the day */
45 if (!isdigit(*pcTmp)) {
46 return (time_t)-1;
48 tTime.tm_mday = (int)(*pcTmp - '0');
49 pcTmp++;
50 if (isdigit(*pcTmp)) {
51 tTime.tm_mday *= 10;
52 tTime.tm_mday += (int)(*pcTmp - '0');
53 pcTmp++;
55 /* Get the second separater */
56 if (isalnum(*pcTmp)) {
57 return (time_t)-1;
59 pcTmp++;
60 /* Get the year */
61 if (!isdigit(*pcTmp)) {
62 return (time_t)-1;
64 tTime.tm_year = (int)(*pcTmp - '0');
65 pcTmp++;
66 if (isdigit(*pcTmp)) {
67 tTime.tm_year *= 10;
68 tTime.tm_year += (int)(*pcTmp - '0');
69 pcTmp++;
71 /* Check the values */
72 if (tTime.tm_mon == 0 || tTime.tm_mday == 0 || tTime.tm_mday > 31) {
73 return (time_t)-1;
75 /* Correct the values */
76 tTime.tm_mon--; /* From 01-12 to 00-11 */
77 if (tTime.tm_year < 80) {
78 tTime.tm_year += 100; /* 00 means 2000 is 100 */
80 tTime.tm_isdst = -1;
81 tResult = mktime(&tTime);
82 NO_DBG_MSG(ctime(&tResult));
83 return tResult;
84 } /* end of tConvertDosDate */
87 * Build the lists with Document Property Information for Word for DOS files
89 void
90 vGet0DopInfo(FILE *pFile, const UCHAR *aucHeader)
92 document_block_type tDocument;
93 UCHAR *aucBuffer;
94 ULONG ulBeginSumdInfo, ulBeginNextBlock;
95 size_t tLen;
96 USHORT usOffset;
98 tDocument.ucHdrFtrSpecification = 0;
99 tDocument.usDefaultTabWidth = usGetWord(0x70, aucHeader); /* dxaTab */
100 tDocument.tCreateDate = (time_t)-1;
101 tDocument.tRevisedDate = (time_t)-1;
103 ulBeginSumdInfo = 128 * (ULONG)usGetWord(0x1c, aucHeader);
104 DBG_HEX(ulBeginSumdInfo);
105 ulBeginNextBlock = 128 * (ULONG)usGetWord(0x6a, aucHeader);
106 DBG_HEX(ulBeginNextBlock);
108 if (ulBeginSumdInfo < ulBeginNextBlock && ulBeginNextBlock != 0) {
109 /* There is a summary information block */
110 tLen = (size_t)(ulBeginNextBlock - ulBeginSumdInfo);
111 aucBuffer = xmalloc(tLen);
112 /* Read the summary information block */
113 if (bReadBytes(aucBuffer, tLen, ulBeginSumdInfo, pFile)) {
114 usOffset = usGetWord(12, aucBuffer);
115 if (aucBuffer[usOffset] != 0) {
116 NO_DBG_STRN(aucBuffer + usOffset, 8);
117 tDocument.tRevisedDate =
118 tConvertDosDate((char *)aucBuffer + usOffset);
120 usOffset = usGetWord(14, aucBuffer);
121 if (aucBuffer[usOffset] != 0) {
122 NO_DBG_STRN(aucBuffer + usOffset, 8);
123 tDocument.tCreateDate =
124 tConvertDosDate((char *)aucBuffer + usOffset);
127 aucBuffer = xfree(aucBuffer);
129 vCreateDocumentInfoList(&tDocument);
130 } /* end of vGet0DopInfo */
133 * Fill the section information block with information
134 * from a Word for DOS file.
136 static void
137 vGet0SectionInfo(const UCHAR *aucGrpprl, size_t tBytes,
138 section_block_type *pSection)
140 USHORT usCcol;
141 UCHAR ucTmp;
143 fail(aucGrpprl == NULL || pSection == NULL);
145 if (tBytes < 2) {
146 return;
148 /* bkc */
149 ucTmp = ucGetByte(1, aucGrpprl);
150 DBG_HEX(ucTmp);
151 ucTmp &= 0x07;
152 DBG_HEX(ucTmp);
153 pSection->bNewPage = ucTmp != 0 && ucTmp != 1;
154 if (tBytes < 18) {
155 return;
157 /* ccolM1 */
158 usCcol = (USHORT)ucGetByte(17, aucGrpprl);
159 DBG_DEC(usCcol);
160 } /* end of vGet0SectionInfo */
163 * Build the lists with Section Property Information for Word for DOS files
165 void
166 vGet0SepInfo(FILE *pFile, const UCHAR *aucHeader)
168 section_block_type tSection;
169 UCHAR *aucBuffer;
170 ULONG ulBeginOfText, ulTextOffset, ulBeginSectInfo;
171 ULONG ulCharPos, ulSectPage, ulBeginNextBlock;
172 size_t tSectInfoLen, tIndex, tSections, tBytes;
173 UCHAR aucTmp[2], aucFpage[35];
175 fail(pFile == NULL || aucHeader == NULL);
177 ulBeginOfText = 128;
178 NO_DBG_HEX(ulBeginOfText);
179 ulBeginSectInfo = 128 * (ULONG)usGetWord(0x18, aucHeader);
180 DBG_HEX(ulBeginSectInfo);
181 ulBeginNextBlock = 128 * (ULONG)usGetWord(0x1a, aucHeader);
182 DBG_HEX(ulBeginNextBlock);
183 if (ulBeginSectInfo == ulBeginNextBlock) {
184 /* There is no section information block */
185 return;
188 /* Get the the number of sections */
189 if (!bReadBytes(aucTmp, 2, ulBeginSectInfo, pFile)) {
190 return;
192 tSections = (size_t)usGetWord(0, aucTmp);
193 NO_DBG_DEC(tSections);
195 /* Read the Section Descriptors */
196 tSectInfoLen = 10 * tSections;
197 NO_DBG_DEC(tSectInfoLen);
198 aucBuffer = xmalloc(tSectInfoLen);
199 if (!bReadBytes(aucBuffer, tSectInfoLen, ulBeginSectInfo + 4, pFile)) {
200 aucBuffer = xfree(aucBuffer);
201 return;
203 NO_DBG_PRINT_BLOCK(aucBuffer, tSectInfoLen);
205 /* Read the Section Properties */
206 for (tIndex = 0; tIndex < tSections; tIndex++) {
207 ulTextOffset = ulGetLong(10 * tIndex, aucBuffer);
208 NO_DBG_HEX(ulTextOffset);
209 ulCharPos = ulBeginOfText + ulTextOffset;
210 NO_DBG_HEX(ulTextOffset);
211 ulSectPage = ulGetLong(10 * tIndex + 6, aucBuffer);
212 NO_DBG_HEX(ulSectPage);
213 if (ulSectPage == FC_INVALID || /* Must use defaults */
214 ulSectPage < 128 || /* Should not happen */
215 ulSectPage >= ulBeginSectInfo) { /* Should not happen */
216 DBG_HEX_C(ulSectPage != FC_INVALID, ulSectPage);
217 vDefault2SectionInfoList(ulCharPos);
218 continue;
220 /* Get the number of bytes to read */
221 if (!bReadBytes(aucTmp, 1, ulSectPage, pFile)) {
222 continue;
224 tBytes = 1 + (size_t)ucGetByte(0, aucTmp);
225 NO_DBG_DEC(tBytes);
226 if (tBytes > sizeof(aucFpage)) {
227 DBG_DEC(tBytes);
228 tBytes = sizeof(aucFpage);
230 /* Read the bytes */
231 if (!bReadBytes(aucFpage, tBytes, ulSectPage, pFile)) {
232 continue;
234 NO_DBG_PRINT_BLOCK(aucFpage, tBytes);
235 /* Process the bytes */
236 vGetDefaultSection(&tSection);
237 vGet0SectionInfo(aucFpage + 1, tBytes - 1, &tSection);
238 vAdd2SectionInfoList(&tSection, ulCharPos);
240 /* Clean up before you leave */
241 aucBuffer = xfree(aucBuffer);
242 } /* end of vGet0SepInfo */
245 * Fill the style information block with information
246 * from a Word for DOS file.
248 static void
249 vGet0StyleInfo(int iFodo, const UCHAR *aucGrpprl, style_block_type *pStyle)
251 int iBytes;
252 UCHAR ucTmp;
254 fail(iFodo <= 0 || aucGrpprl == NULL || pStyle == NULL);
256 pStyle->usIstdNext = ISTD_NORMAL;
258 iBytes = (int)ucGetByte(iFodo, aucGrpprl);
259 if (iBytes < 1) {
260 return;
262 /* stc if styled */
263 ucTmp = ucGetByte(iFodo + 1, aucGrpprl);
264 if ((ucTmp & BIT(0)) != 0) {
265 ucTmp >>= 1;
266 if (ucTmp >= 88 && ucTmp <= 94) {
267 /* Header levels 1 through 7 */
268 pStyle->usIstd = ucTmp - 87;
269 pStyle->ucNumLevel = 1;
272 if (iBytes < 2) {
273 return;
275 /* jc */
276 ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
277 pStyle->ucAlignment = ucTmp & 0x02;
278 if (iBytes < 3) {
279 return;
281 /* stc */
282 ucTmp = ucGetByte(iFodo + 3, aucGrpprl);
283 ucTmp &= 0x7f;
284 if (ucTmp >= 88 && ucTmp <= 94) {
285 /* Header levels 1 through 7 */
286 pStyle->usIstd = ucTmp - 87;
287 pStyle->ucNumLevel = 1;
289 if (iBytes < 6) {
290 return;
292 /* dxaRight */
293 pStyle->sRightIndent = (short)usGetWord(iFodo + 5, aucGrpprl);
294 NO_DBG_DEC(pStyle->sRightIndent);
295 if (iBytes < 8) {
296 return;
298 /* dxaLeft */
299 pStyle->sLeftIndent = (short)usGetWord(iFodo + 7, aucGrpprl);
300 NO_DBG_DEC(pStyle->sLeftIndent);
301 if (iBytes < 10) {
302 return;
304 /* dxaLeft1 */
305 pStyle->sLeftIndent1 = (short)usGetWord(iFodo + 9, aucGrpprl);
306 NO_DBG_DEC(pStyle->sLeftIndent1);
307 if (iBytes < 14) {
308 return;
310 /* dyaBefore */
311 pStyle->usBeforeIndent = usGetWord(iFodo + 13, aucGrpprl);
312 NO_DBG_DEC(pStyle->usBeforeIndent);
313 if (iBytes < 16) {
314 return;
316 /* dyaAfter */
317 pStyle->usAfterIndent = usGetWord(iFodo + 15, aucGrpprl);
318 NO_DBG_DEC(pStyle->usAfterIndent);
319 } /* end of vGet0StyleInfo */
322 * Build the lists with Paragraph Information for Word for DOS files
324 void
325 vGet0PapInfo(FILE *pFile, const UCHAR *aucHeader)
327 style_block_type tStyle;
328 ULONG ulBeginParfInfo, ulCharPos, ulCharPosNext;
329 int iIndex, iRun, iFodo;
330 UCHAR aucFpage[128];
332 fail(pFile == NULL || aucHeader == NULL);
334 ulBeginParfInfo = 128 * (ULONG)usGetWord(0x12, aucHeader);
335 NO_DBG_HEX(ulBeginParfInfo);
337 do {
338 if (!bReadBytes(aucFpage, 128, ulBeginParfInfo, pFile)) {
339 return;
341 NO_DBG_PRINT_BLOCK(aucFpage, 128);
342 ulCharPosNext = ulGetLong(0, aucFpage);
343 iRun = (int)ucGetByte(0x7f, aucFpage);
344 NO_DBG_DEC(iRun);
345 for (iIndex = 0; iIndex < iRun; iIndex++) {
346 iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
347 if (iFodo <= 0 || iFodo > 0x79) {
348 DBG_DEC_C(iFodo != (int)0xffff, iFodo);
349 continue;
351 vFillStyleFromStylesheet(0, &tStyle);
352 vGet0StyleInfo(iFodo, aucFpage + 4, &tStyle);
353 ulCharPos = ulCharPosNext;
354 ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
355 tStyle.ulFileOffset = ulCharPos;
356 vAdd2StyleInfoList(&tStyle);
358 ulBeginParfInfo += 128;
359 } while (ulCharPosNext == ulBeginParfInfo);
360 } /* end of vGet0PapInfo */
363 * Fill the font information block with information
364 * from a Word for DOS file.
366 static void
367 vGet0FontInfo(int iFodo, const UCHAR *aucGrpprl, font_block_type *pFont)
369 int iBytes;
370 UCHAR ucTmp;
372 fail(iFodo <= 0 || aucGrpprl == NULL || pFont == NULL);
374 iBytes = (int)ucGetByte(iFodo, aucGrpprl);
375 if (iBytes < 2) {
376 return;
378 /* fBold, fItalic, cFtc */
379 ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
380 if ((ucTmp & BIT(0)) != 0) {
381 pFont->usFontStyle |= FONT_BOLD;
383 if ((ucTmp & BIT(1)) != 0) {
384 pFont->usFontStyle |= FONT_ITALIC;
386 pFont->ucFontNumber = ucTmp >> 2;
387 NO_DBG_DEC(pFont->ucFontNumber);
388 if (iBytes < 3) {
389 return;
391 /* cHps */
392 pFont->usFontSize = (USHORT)ucGetByte(iFodo + 3, aucGrpprl);
393 NO_DBG_DEC(pFont->usFontSize);
394 if (iBytes < 4) {
395 return;
397 /* cKul, fStrike, fCaps, fSmallCaps, fVanish */
398 ucTmp = ucGetByte(iFodo + 4, aucGrpprl);
399 if ((ucTmp & BIT(0)) != 0 || (ucTmp & BIT(2)) != 0) {
400 pFont->usFontStyle |= FONT_UNDERLINE;
402 if ((ucTmp & BIT(1)) != 0) {
403 pFont->usFontStyle |= FONT_STRIKE;
405 if ((ucTmp & BIT(4)) != 0) {
406 pFont->usFontStyle |= FONT_CAPITALS;
408 if ((ucTmp & BIT(5)) != 0) {
409 pFont->usFontStyle |= FONT_SMALL_CAPITALS;
411 if ((ucTmp & BIT(7)) != 0) {
412 pFont->usFontStyle |= FONT_HIDDEN;
414 DBG_HEX(pFont->usFontStyle);
415 if (iBytes < 6) {
416 return;
418 /* cIss */
419 ucTmp = ucGetByte(iFodo + 6, aucGrpprl);
420 if (ucTmp != 0) {
421 if (ucTmp < 128) {
422 pFont->usFontStyle |= FONT_SUPERSCRIPT;
423 DBG_MSG("Superscript");
424 } else {
425 pFont->usFontStyle |= FONT_SUBSCRIPT;
426 DBG_MSG("Subscript");
429 if (iBytes < 7) {
430 return;
432 /* cIco */
433 ucTmp = ucGetByte(iFodo + 7, aucGrpprl);
434 switch (ucTmp & 0x07) {
435 case 0: pFont->ucFontColor = FONT_COLOR_BLACK; break;
436 case 1: pFont->ucFontColor = FONT_COLOR_RED; break;
437 case 2: pFont->ucFontColor = FONT_COLOR_GREEN; break;
438 case 3: pFont->ucFontColor = FONT_COLOR_BLUE; break;
439 case 4: pFont->ucFontColor = FONT_COLOR_CYAN; break;
440 case 5: pFont->ucFontColor = FONT_COLOR_MAGENTA; break;
441 case 6: pFont->ucFontColor = FONT_COLOR_YELLOW; break;
442 case 7: pFont->ucFontColor = FONT_COLOR_WHITE; break;
443 default:pFont->ucFontColor = FONT_COLOR_BLACK; break;
445 NO_DBG_DEC(pFont->ucFontColor);
446 } /* end of vGet0FontInfo */
449 * Build the lists with Character Information for Word for DOS files
451 void
452 vGet0ChrInfo(FILE *pFile, const UCHAR *aucHeader)
454 font_block_type tFont;
455 ULONG ulBeginCharInfo, ulCharPos, ulCharPosNext;
456 int iIndex, iRun, iFodo;
457 UCHAR aucFpage[128];
459 fail(pFile == NULL || aucHeader == NULL);
461 ulBeginCharInfo = ulGetLong(0x0e, aucHeader);
462 NO_DBG_HEX(ulBeginCharInfo);
463 ulBeginCharInfo = ROUND128(ulBeginCharInfo);
464 NO_DBG_HEX(ulBeginCharInfo);
466 do {
467 if (!bReadBytes(aucFpage, 128, ulBeginCharInfo, pFile)) {
468 return;
470 NO_DBG_PRINT_BLOCK(aucFpage, 128);
471 ulCharPosNext = ulGetLong(0, aucFpage);
472 iRun = (int)ucGetByte(0x7f, aucFpage);
473 NO_DBG_DEC(iRun);
474 for (iIndex = 0; iIndex < iRun; iIndex++) {
475 iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
476 if (iFodo <= 0 || iFodo > 0x79) {
477 DBG_DEC_C(iFodo != (int)0xffff, iFodo);
478 continue;
480 vFillFontFromStylesheet(0, &tFont);
481 vGet0FontInfo(iFodo, aucFpage + 4, &tFont);
482 ulCharPos = ulCharPosNext;
483 ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
484 tFont.ulFileOffset = ulCharPos;
485 vAdd2FontInfoList(&tFont);
487 ulBeginCharInfo += 128;
488 } while (ulCharPosNext == ulBeginCharInfo);
489 } /* end of vGet0ChrInfo */