Imported from antiword-0.34.tar.gz.
[antiword.git] / word2text.c
blobdfd8a7f4c00d2e7a5d1fc757bb767cde9fee7dcc
1 /*
2 * word2text.c
3 * Copyright (C) 1998-2003 A.J. van Os; Released under GNU GPL
5 * Description:
6 * MS Word to text functions
7 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #if defined(__riscos)
14 #include "visdelay.h"
15 #endif /* __riscos */
16 #include "antiword.h"
19 #define INITIAL_SIZE 40
20 #define EXTENTION_SIZE 20
23 /* Macros to make sure all such statements will be identical */
24 #define OUTPUT_LINE() \
25 do {\
26 vAlign2Window(pDiag, pAnchor, lWidthMax, ucAlignment);\
27 pAnchor = pStartNewOutput(pAnchor, NULL);\
28 pOutput = pAnchor;\
29 } while(0)
31 #define RESET_LINE() \
32 do {\
33 pAnchor = pStartNewOutput(pAnchor, NULL);\
34 pOutput = pAnchor;\
35 } while(0)
37 #if defined(__riscos)
38 /* Length of the document in characters */
39 static ULONG ulDocumentLength;
40 /* Number of characters processed so far */
41 static ULONG ulCharCounter;
42 static int iCurrPct, iPrevPct;
43 #endif /* __riscos */
44 /* The document is in the format belonging to this version of Word */
45 static int iWordVersion = -1;
46 /* Special treatment for files from Word 6 on an Apple Macintosh */
47 static BOOL bOldMacFile = FALSE;
48 /* Section Information */
49 static const section_block_type *pSection = NULL;
50 static const section_block_type *pSectionNext = NULL;
51 /* All the (command line) options */
52 static options_type tOptions;
53 /* Needed for reading a complete table row */
54 static const row_block_type *pRowInfo = NULL;
55 static BOOL bStartRow = FALSE;
56 static BOOL bEndRowNorm = FALSE;
57 static BOOL bEndRowFast = FALSE;
58 static BOOL bIsTableRow = FALSE;
59 /* Index of the next style and font information */
60 static USHORT usIstdNext = ISTD_NORMAL;
61 /* Needed for finding the start of a style */
62 static const style_block_type *pStyleInfo = NULL;
63 static style_block_type tStyleNext;
64 static BOOL bStartStyle = FALSE;
65 static BOOL bStartStyleNext = FALSE;
66 /* Needed for finding the start of a font */
67 static const font_block_type *pFontInfo = NULL;
68 static font_block_type tFontNext;
69 static BOOL bStartFont = FALSE;
70 static BOOL bStartFontNext = FALSE;
71 /* Needed for finding an image */
72 static ULONG ulFileOffsetImage = FC_INVALID;
76 * vUpdateCounters - Update the counters for the hourglass
78 static void
79 vUpdateCounters(void)
81 #if defined(__riscos)
82 ulCharCounter++;
83 iCurrPct = (int)((ulCharCounter * 100) / ulDocumentLength);
84 if (iCurrPct != iPrevPct) {
85 visdelay_percent(iCurrPct);
86 iPrevPct = iCurrPct;
88 #endif /* __riscos */
89 } /* end of vUpdateCounters */
92 * bOutputContainsText - see if the output contains more than white space
94 static BOOL
95 bOutputContainsText(output_type *pAnchor)
97 output_type *pCurr;
98 size_t tIndex;
100 fail(pAnchor == NULL);
102 for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
103 fail(pCurr->lStringWidth < 0);
104 for (tIndex = 0; tIndex < pCurr->tNextFree; tIndex++) {
105 if (isspace((int)(UCHAR)pCurr->szStorage[tIndex])) {
106 continue;
108 #if defined(DEBUG)
109 if (pCurr->szStorage[tIndex] == FILLER_CHAR) {
110 continue;
112 #endif /* DEBUG */
113 return TRUE;
116 return FALSE;
117 } /* end of bOutputContainsText */
120 * lTotalStringWidth - compute the total width of the output string
122 static long
123 lTotalStringWidth(output_type *pAnchor)
125 output_type *pCurr;
126 long lTotal;
128 lTotal = 0;
129 for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
130 DBG_DEC_C(pCurr->lStringWidth < 0, pCurr->lStringWidth);
131 fail(pCurr->lStringWidth < 0);
132 lTotal += pCurr->lStringWidth;
134 return lTotal;
135 } /* end of lTotalStringWidth */
138 * vStoreByte - store one byte
140 static void
141 vStoreByte(UCHAR ucChar, output_type *pOutput)
143 fail(pOutput == NULL);
145 if (ucChar == 0) {
146 pOutput->szStorage[pOutput->tNextFree] = '\0';
147 return;
150 while (pOutput->tNextFree + 2 > pOutput->tStorageSize) {
151 pOutput->tStorageSize += EXTENTION_SIZE;
152 pOutput->szStorage = xrealloc(pOutput->szStorage,
153 pOutput->tStorageSize);
155 pOutput->szStorage[pOutput->tNextFree] = (char)ucChar;
156 pOutput->szStorage[pOutput->tNextFree + 1] = '\0';
157 pOutput->tNextFree++;
158 } /* end of vStoreByte */
161 * vStoreChar - store a character as one or more bytes
163 static void
164 vStoreChar(ULONG ulChar, BOOL bChangeAllowed, output_type *pOutput)
166 char szResult[4];
167 size_t tIndex, tLen;
169 fail(pOutput == NULL);
171 if (tOptions.eEncoding == encoding_utf8 && bChangeAllowed) {
172 DBG_HEX_C(ulChar > 0xffff, ulChar);
173 fail(ulChar > 0xffff);
174 tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult));
175 for (tIndex = 0; tIndex < tLen; tIndex++) {
176 vStoreByte((UCHAR)szResult[tIndex], pOutput);
178 } else {
179 DBG_HEX_C(ulChar > 0xff, ulChar);
180 fail(ulChar > 0xff);
181 vStoreByte((UCHAR)ulChar, pOutput);
182 tLen = 1;
184 pOutput->lStringWidth += lComputeStringWidth(
185 pOutput->szStorage + pOutput->tNextFree - tLen,
186 tLen,
187 pOutput->tFontRef,
188 pOutput->usFontSize);
189 } /* end of vStoreChar */
192 * vStoreCharacter - store one character
194 static void
195 vStoreCharacter(ULONG ulChar, output_type *pOutput)
197 vStoreChar(ulChar, TRUE, pOutput);
198 } /* end of vStoreCharacter */
201 * vStoreString - store a string
203 static void
204 vStoreString(const char *szString, size_t tStringLength, output_type *pOutput)
206 size_t tIndex;
208 fail(szString == NULL || pOutput == NULL);
210 for (tIndex = 0; tIndex < tStringLength; tIndex++) {
211 vStoreCharacter((UCHAR)szString[tIndex], pOutput);
213 } /* end of vStoreString */
216 * vStoreNumberAsDecimal - store a number as a decimal number
218 static void
219 vStoreNumberAsDecimal(UINT uiNumber, output_type *pOutput)
221 size_t tLen;
222 char szString[3 * sizeof(UINT) + 1];
224 fail(uiNumber == 0);
225 fail(pOutput == NULL);
227 tLen = (size_t)sprintf(szString, "%u", uiNumber);
228 vStoreString(szString, tLen, pOutput);
229 } /* end of vStoreNumberAsDecimal */
232 * vStoreNumberAsRoman - store a number as a roman numerical
234 static void
235 vStoreNumberAsRoman(UINT uiNumber, output_type *pOutput)
237 size_t tLen;
238 char szString[15];
240 fail(uiNumber == 0);
241 fail(pOutput == NULL);
243 tLen = tNumber2Roman(uiNumber, FALSE, szString);
244 vStoreString(szString, tLen, pOutput);
245 } /* end of vStoreNumberAsRoman */
248 * vStoreStyle - store a style
250 static void
251 vStoreStyle(diagram_type *pDiag, output_type *pOutput,
252 const style_block_type *pStyle)
254 size_t tLen;
255 char szString[120];
257 fail(pDiag == NULL);
258 fail(pOutput == NULL);
259 fail(pStyle == NULL);
261 if (tOptions.eConversionType == conversion_xml) {
262 vSetHeaders(pDiag, pStyle->usIstd);
263 } else {
264 tLen = tStyle2Window(szString, pStyle, pSection);
265 vStoreString(szString, tLen, pOutput);
267 } /* end of vStoreStyle */
270 * vPutIndentation - output the specified amount of indentation
272 static void
273 vPutIndentation(diagram_type *pDiag, output_type *pOutput,
274 BOOL bNoMarks, BOOL bFirstLine,
275 UINT uiListNumber, UCHAR ucNFC, const char *szListChar,
276 long lLeftIndentation, long lLeftIndentation1)
278 long lWidth;
279 size_t tIndex, tNextFree;
280 char szLine[30];
282 fail(pDiag == NULL);
283 fail(pOutput == NULL);
284 fail(szListChar == NULL);
285 fail(lLeftIndentation < 0);
287 if (tOptions.eConversionType == conversion_xml) {
288 /* XML does its own indentation at rendering time */
289 return;
292 if (bNoMarks) {
293 if (bFirstLine) {
294 lLeftIndentation += lLeftIndentation1;
296 if (lLeftIndentation < 0) {
297 lLeftIndentation = 0;
299 vSetLeftIndentation(pDiag, lLeftIndentation);
300 return;
302 if (lLeftIndentation <= 0) {
303 DBG_HEX_C(ucNFC != 0x00, ucNFC);
304 vSetLeftIndentation(pDiag, 0);
305 return;
308 #if defined(DEBUG)
309 if (tOptions.eEncoding == encoding_utf8) {
310 fail(strlen(szListChar) > 3);
311 } else {
312 DBG_HEX_C(iscntrl((int)szListChar[0]), szListChar[0]);
313 fail(iscntrl((int)szListChar[0]));
314 fail(szListChar[1] != '\0');
316 #endif /* DEBUG */
318 switch (ucNFC) {
319 case LIST_ARABIC_NUM:
320 tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber);
321 break;
322 case LIST_UPPER_ROMAN:
323 case LIST_LOWER_ROMAN:
324 tNextFree = tNumber2Roman(uiListNumber,
325 ucNFC == LIST_UPPER_ROMAN, szLine);
326 break;
327 case LIST_UPPER_ALPHA:
328 case LIST_LOWER_ALPHA:
329 tNextFree = tNumber2Alpha(uiListNumber,
330 ucNFC == LIST_UPPER_ALPHA, szLine);
331 break;
332 case LIST_ORDINAL_NUM:
333 if (uiListNumber % 10 == 1 && uiListNumber != 11) {
334 tNextFree =
335 (size_t)sprintf(szLine, "%ust", uiListNumber);
336 } else if (uiListNumber % 10 == 2 && uiListNumber != 12) {
337 tNextFree =
338 (size_t)sprintf(szLine, "%und", uiListNumber);
339 } else if (uiListNumber % 10 == 3 && uiListNumber != 13) {
340 tNextFree =
341 (size_t)sprintf(szLine, "%urd", uiListNumber);
342 } else {
343 tNextFree =
344 (size_t)sprintf(szLine, "%uth", uiListNumber);
346 break;
347 case LIST_SPECIAL:
348 case LIST_BULLETS:
349 tNextFree = 0;
350 break;
351 default:
352 DBG_DEC(ucNFC);
353 DBG_FIXME();
354 tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber);
355 break;
357 tNextFree += (size_t)sprintf(szLine + tNextFree, "%.3s", szListChar);
358 szLine[tNextFree++] = ' ';
359 szLine[tNextFree] = '\0';
360 lWidth = lComputeStringWidth(szLine, tNextFree,
361 pOutput->tFontRef, pOutput->usFontSize);
362 lLeftIndentation -= lWidth;
363 if (lLeftIndentation < 0) {
364 lLeftIndentation = 0;
366 vSetLeftIndentation(pDiag, lLeftIndentation);
367 for (tIndex = 0; tIndex < tNextFree; tIndex++) {
368 vStoreChar((UCHAR)szLine[tIndex], FALSE, pOutput);
370 } /* end of vPutIndentation */
373 * vPutSeparatorLine - output a separator line
375 * A separator line is a horizontal line two inches long.
376 * Two inches equals 144000 millipoints.
378 static void
379 vPutSeparatorLine(output_type *pOutput)
381 long lCharWidth;
382 int iCounter, iChars;
383 char szOne[2];
385 fail(pOutput == NULL);
387 szOne[0] = OUR_EM_DASH;
388 szOne[1] = '\0';
389 lCharWidth = lComputeStringWidth(szOne, 1,
390 pOutput->tFontRef, pOutput->usFontSize);
391 NO_DBG_DEC(lCharWidth);
392 iChars = (int)((144000 + lCharWidth / 2) / lCharWidth);
393 NO_DBG_DEC(iChars);
394 for (iCounter = 0; iCounter < iChars; iCounter++) {
395 vStoreCharacter((ULONG)(UCHAR)OUR_EM_DASH, pOutput);
397 } /* end of vPutSeparatorLine */
400 * pStartNextOutput - start the next output record
402 * returns a pointer to the next record
404 static output_type *
405 pStartNextOutput(output_type *pCurrent)
407 output_type *pNew;
409 if (pCurrent->tNextFree == 0) {
410 /* The current record is empty, re-use */
411 fail(pCurrent->szStorage[0] != '\0');
412 fail(pCurrent->lStringWidth != 0);
413 return pCurrent;
415 /* The current record is in use, make a new one */
416 pNew = xmalloc(sizeof(*pNew));
417 pCurrent->pNext = pNew;
418 pNew->tStorageSize = INITIAL_SIZE;
419 pNew->szStorage = xmalloc(pNew->tStorageSize);
420 pNew->szStorage[0] = '\0';
421 pNew->tNextFree = 0;
422 pNew->lStringWidth = 0;
423 pNew->ucFontColor = FONT_COLOR_DEFAULT;
424 pNew->usFontStyle = FONT_REGULAR;
425 pNew->tFontRef = (draw_fontref)0;
426 pNew->usFontSize = DEFAULT_FONT_SIZE;
427 pNew->pPrev = pCurrent;
428 pNew->pNext = NULL;
429 return pNew;
430 } /* end of pStartNextOutput */
433 * pStartNewOutput
435 static output_type *
436 pStartNewOutput(output_type *pAnchor, output_type *pLeftOver)
438 output_type *pCurr, *pNext;
439 USHORT usFontStyle, usFontSize;
440 draw_fontref tFontRef;
441 UCHAR ucFontColor;
443 ucFontColor = FONT_COLOR_DEFAULT;
444 usFontStyle = FONT_REGULAR;
445 tFontRef = (draw_fontref)0;
446 usFontSize = DEFAULT_FONT_SIZE;
447 /* Free the old output space */
448 pCurr = pAnchor;
449 while (pCurr != NULL) {
450 pNext = pCurr->pNext;
451 pCurr->szStorage = xfree(pCurr->szStorage);
452 if (pCurr->pNext == NULL) {
453 ucFontColor = pCurr->ucFontColor;
454 usFontStyle = pCurr->usFontStyle;
455 tFontRef = pCurr->tFontRef;
456 usFontSize = pCurr->usFontSize;
458 pCurr = xfree(pCurr);
459 pCurr = pNext;
461 if (pLeftOver == NULL) {
462 /* Create new output space */
463 pLeftOver = xmalloc(sizeof(*pLeftOver));
464 pLeftOver->tStorageSize = INITIAL_SIZE;
465 pLeftOver->szStorage = xmalloc(pLeftOver->tStorageSize);
466 pLeftOver->szStorage[0] = '\0';
467 pLeftOver->tNextFree = 0;
468 pLeftOver->lStringWidth = 0;
469 pLeftOver->ucFontColor = ucFontColor;
470 pLeftOver->usFontStyle = usFontStyle;
471 pLeftOver->tFontRef = tFontRef;
472 pLeftOver->usFontSize = usFontSize;
473 pLeftOver->pPrev = NULL;
474 pLeftOver->pNext = NULL;
476 fail(!bCheckDoubleLinkedList(pLeftOver));
477 return pLeftOver;
478 } /* end of pStartNewOutput */
481 * ulGetChar - get the next character from the specified list
483 * returns the next character of EOF
485 static ULONG
486 ulGetChar(FILE *pFile, list_id_enum eListID)
488 const font_block_type *pCurr;
489 ULONG ulChar, ulFileOffset, ulTextOffset;
490 row_info_enum eRowInfo;
491 USHORT usChar, usPropMod;
492 BOOL bSkip;
494 fail(pFile == NULL);
496 pCurr = pFontInfo;
497 bSkip = FALSE;
498 for (;;) {
499 usChar = usNextChar(pFile, eListID,
500 &ulFileOffset, &ulTextOffset, &usPropMod);
501 if (usChar == (USHORT)EOF) {
502 return (ULONG)EOF;
505 vUpdateCounters();
507 eRowInfo = ePropMod2RowInfo(usPropMod, iWordVersion);
508 if (!bStartRow) {
509 #if 0
510 bStartRow = eRowInfo == found_a_cell ||
511 (pRowInfo != NULL &&
512 ulFileOffset == pRowInfo->ulFileOffsetStart &&
513 eRowInfo != found_not_a_cell);
514 #else
515 bStartRow = pRowInfo != NULL &&
516 ulFileOffset == pRowInfo->ulFileOffsetStart;
517 #endif
518 NO_DBG_HEX_C(bStartRow, pRowInfo->ulFileOffsetStart);
520 if (!bEndRowNorm) {
521 #if 0
522 bEndRow = eRowInfo == found_end_of_row ||
523 (pRowInfo != NULL &&
524 ulFileOffset == pRowInfo->ulFileOffsetEnd &&
525 eRowInfo != found_not_end_of_row);
526 #else
527 bEndRowNorm = pRowInfo != NULL &&
528 ulFileOffset == pRowInfo->ulFileOffsetEnd;
529 #endif
530 NO_DBG_HEX_C(bEndRowNorm, pRowInfo->ulFileOffsetEnd);
532 if (!bEndRowFast) {
533 bEndRowFast = eRowInfo == found_end_of_row;
534 NO_DBG_HEX_C(bEndRowFast, pRowInfo->ulFileOffsetEnd);
537 if (!bStartStyle) {
538 bStartStyle = pStyleInfo != NULL &&
539 ulFileOffset == pStyleInfo->ulFileOffset;
540 NO_DBG_HEX_C(bStartStyle, ulFileOffset);
542 if (pCurr != NULL && ulFileOffset == pCurr->ulFileOffset) {
543 bStartFont = TRUE;
544 NO_DBG_HEX(ulFileOffset);
545 pFontInfo = pCurr;
546 pCurr = pGetNextFontInfoListItem(pCurr);
549 /* Skip embedded characters */
550 if (usChar == START_EMBEDDED) {
551 bSkip = TRUE;
552 continue;
554 if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
555 bSkip = FALSE;
556 continue;
558 if (bSkip) {
559 continue;
561 ulChar = ulTranslateCharacters(usChar,
562 ulFileOffset,
563 iWordVersion,
564 tOptions.eConversionType,
565 tOptions.eEncoding,
566 bOldMacFile);
567 if (ulChar == IGNORE_CHARACTER) {
568 continue;
570 if (ulChar == PICTURE) {
571 ulFileOffsetImage = ulGetPictInfoListItem(ulFileOffset);
572 } else {
573 ulFileOffsetImage = FC_INVALID;
575 if (ulChar == PAR_END) {
576 /* End of paragraph seen, prepare for the next */
577 vFillStyleFromStylesheet(usIstdNext, &tStyleNext);
578 vCorrectStyleValues(&tStyleNext);
579 bStartStyleNext = TRUE;
580 vFillFontFromStylesheet(usIstdNext, &tFontNext);
581 vCorrectFontValues(&tFontNext);
582 bStartFontNext = TRUE;
584 if (ulChar == PAGE_BREAK) {
585 /* Might be the start of a new section */
586 pSectionNext = pGetSectionInfo(pSection, ulTextOffset);
588 return ulChar;
590 } /* end of ulGetChar */
593 * bWordDecryptor - translate Word to text or PostScript
595 * returns TRUE when succesful, otherwise FALSE
597 BOOL
598 bWordDecryptor(FILE *pFile, long lFilesize, diagram_type *pDiag)
600 imagedata_type tImage;
601 const style_block_type *pStyleTmp;
602 const font_block_type *pFontTmp;
603 const char *szListChar;
604 output_type *pAnchor, *pOutput, *pLeftOver;
605 ULONG ulChar;
606 long lBeforeIndentation, lAfterIndentation;
607 long lLeftIndentation, lLeftIndentation1, lRightIndentation;
608 long lWidthCurr, lWidthMax, lDefaultTabWidth, lTmp;
609 list_id_enum eListID;
610 image_info_enum eRes;
611 UINT uiFootnoteNumber, uiEndnoteNumber, uiTmp;
612 int iListSeqNumber;
613 BOOL bWasTableRow, bTableFontClosed, bWasEndOfParagraph;
614 BOOL bInList, bWasInList, bNoMarks, bFirstLine;
615 BOOL bAllCapitals, bHiddenText, bMarkDelText, bSuccess;
616 USHORT usListNumber;
617 USHORT usFontStyle, usFontStyleMinimal, usFontSize, usTmp;
618 UCHAR ucFontNumber, ucFontColor;
619 UCHAR ucNFC, ucAlignment;
621 fail(pFile == NULL || lFilesize <= 0 || pDiag == NULL);
623 DBG_MSG("bWordDecryptor");
625 iWordVersion = iInitDocument(pFile, lFilesize);
626 if (iWordVersion < 0) {
627 DBG_DEC(iWordVersion);
628 return FALSE;
630 vPrologue2(pDiag, iWordVersion);
632 /* Initialisation */
633 #if defined(__riscos)
634 ulCharCounter = 0;
635 iCurrPct = 0;
636 iPrevPct = -1;
637 ulDocumentLength = ulGetDocumentLength();
638 #endif /* __riscos */
639 bOldMacFile = bIsOldMacFile();
640 pSection = pGetSectionInfo(NULL, 0);
641 pSectionNext = pSection;
642 lDefaultTabWidth = lGetDefaultTabWidth();
643 DBG_DEC_C(lDefaultTabWidth != 36000, lDefaultTabWidth);
644 pRowInfo = pGetNextRowInfoListItem();
645 DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetStart);
646 DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetEnd);
647 DBG_MSG_C(pRowInfo == NULL, "No rows at all");
648 bStartRow = FALSE;
649 bEndRowNorm = FALSE;
650 bEndRowFast = FALSE;
651 bIsTableRow = FALSE;
652 bWasTableRow = FALSE;
653 vResetStyles();
654 pStyleInfo = pGetNextStyleInfoListItem(NULL);
655 bStartStyle = FALSE;
656 bInList = FALSE;
657 bWasInList = FALSE;
658 iListSeqNumber = 0;
659 usIstdNext = ISTD_NORMAL;
660 pAnchor = NULL;
661 pFontInfo = pGetNextFontInfoListItem(NULL);
662 DBG_HEX_C(pFontInfo != NULL, pFontInfo->ulFileOffset);
663 DBG_MSG_C(pFontInfo == NULL, "No fonts at all");
664 bStartFont = FALSE;
665 ucFontNumber = 0;
666 usFontStyleMinimal = FONT_REGULAR;
667 usFontStyle = FONT_REGULAR;
668 usFontSize = DEFAULT_FONT_SIZE;
669 ucFontColor = FONT_COLOR_DEFAULT;
670 pAnchor = pStartNewOutput(pAnchor, NULL);
671 pOutput = pAnchor;
672 pOutput->ucFontColor = ucFontColor;
673 pOutput->usFontStyle = usFontStyle;
674 pOutput->tFontRef = tOpenFont(ucFontNumber, usFontStyle, usFontSize);
675 pOutput->usFontSize = usFontSize;
676 bTableFontClosed = TRUE;
677 lBeforeIndentation = 0;
678 lAfterIndentation = 0;
679 lLeftIndentation = 0;
680 lLeftIndentation1 = 0;
681 lRightIndentation = 0;
682 bWasEndOfParagraph = TRUE;
683 bNoMarks = TRUE;
684 bFirstLine = TRUE;
685 ucNFC = LIST_BULLETS;
686 vGetOptions(&tOptions);
687 if (pStyleInfo != NULL) {
688 szListChar = pStyleInfo->szListChar;
689 pStyleTmp = pStyleInfo;
690 } else {
691 if (tStyleNext.szListChar[0] == '\0') {
692 vGetBulletValue(tOptions.eConversionType,
693 tOptions.eEncoding, tStyleNext.szListChar, 4);
695 szListChar = tStyleNext.szListChar;
696 pStyleTmp = &tStyleNext;
698 usListNumber = 0;
699 ucAlignment = ALIGNMENT_LEFT;
700 bAllCapitals = FALSE;
701 bHiddenText = FALSE;
702 bMarkDelText = FALSE;
703 fail(tOptions.iParagraphBreak < 0);
704 if (tOptions.iParagraphBreak == 0) {
705 lWidthMax = LONG_MAX;
706 } else if (tOptions.iParagraphBreak < MIN_SCREEN_WIDTH) {
707 lWidthMax = lChar2MilliPoints(MIN_SCREEN_WIDTH);
708 } else if (tOptions.iParagraphBreak > MAX_SCREEN_WIDTH) {
709 lWidthMax = lChar2MilliPoints(MAX_SCREEN_WIDTH);
710 } else {
711 lWidthMax = lChar2MilliPoints(tOptions.iParagraphBreak);
713 NO_DBG_DEC(lWidthMax);
715 visdelay_begin();
717 uiFootnoteNumber = 0;
718 uiEndnoteNumber = 0;
719 eListID = text_list;
720 for(;;) {
721 ulChar = ulGetChar(pFile, eListID);
722 if (ulChar == (ULONG)EOF) {
723 if (bOutputContainsText(pAnchor)) {
724 OUTPUT_LINE();
725 } else {
726 RESET_LINE();
728 switch (eListID) {
729 case text_list:
730 eListID = footnote_list;
731 if (uiFootnoteNumber != 0) {
732 vPutSeparatorLine(pAnchor);
733 OUTPUT_LINE();
734 uiFootnoteNumber = 0;
736 break;
737 case footnote_list:
738 eListID = endnote_list;
739 if (uiEndnoteNumber != 0) {
740 vPutSeparatorLine(pAnchor);
741 OUTPUT_LINE();
742 uiEndnoteNumber = 0;
744 break;
745 case endnote_list:
746 eListID = textbox_list;
747 if (bExistsTextBox()) {
748 vPutSeparatorLine(pAnchor);
749 OUTPUT_LINE();
751 break;
752 case textbox_list:
753 eListID = hdrtextbox_list;
754 if (bExistsHdrTextBox()) {
755 vPutSeparatorLine(pAnchor);
756 OUTPUT_LINE();
758 break;
759 case hdrtextbox_list:
760 default:
761 eListID = end_of_lists;
762 break;
764 if (eListID == end_of_lists) {
765 break;
767 continue;
770 if (ulChar == UNKNOWN_NOTE_CHAR) {
771 switch (eListID) {
772 case footnote_list:
773 ulChar = FOOTNOTE_CHAR;
774 break;
775 case endnote_list:
776 ulChar = ENDNOTE_CHAR;
777 break;
778 default:
779 break;
783 if (bStartRow) {
784 /* Begin of a tablerow found */
785 if (bOutputContainsText(pAnchor)) {
786 OUTPUT_LINE();
787 } else {
788 RESET_LINE();
790 fail(pAnchor != pOutput);
791 if (bTableFontClosed) {
792 /* Start special table font */
793 vCloseFont();
795 * Compensate for the fact that Word uses
796 * proportional fonts for its tables and we
797 * only one fixed-width font
799 uiTmp = ((UINT)usFontSize * 5 + 3) / 6;
800 if (uiTmp < MIN_TABLEFONT_SIZE) {
801 uiTmp = MIN_TABLEFONT_SIZE;
802 } else if (uiTmp > MAX_TABLEFONT_SIZE) {
803 uiTmp = MAX_TABLEFONT_SIZE;
805 pOutput->usFontSize = (USHORT)uiTmp;
806 pOutput->tFontRef =
807 tOpenTableFont(pOutput->usFontSize);
808 pOutput->usFontStyle = FONT_REGULAR;
809 pOutput->ucFontColor = FONT_COLOR_BLACK;
810 bTableFontClosed = FALSE;
812 bIsTableRow = TRUE;
813 bStartRow = FALSE;
816 if (bWasTableRow &&
817 !bIsTableRow &&
818 ulChar != PAR_END &&
819 ulChar != HARD_RETURN &&
820 ulChar != PAGE_BREAK &&
821 ulChar != COLUMN_FEED) {
823 * The end of a table should be followed by an
824 * empty line, like the end of a paragraph
826 OUTPUT_LINE();
827 vEndOfParagraph(pDiag,
828 pOutput->tFontRef,
829 pOutput->usFontSize,
830 (long)pOutput->usFontSize * 600);
833 switch (ulChar) {
834 case PAGE_BREAK:
835 case COLUMN_FEED:
836 if (bIsTableRow) {
837 vStoreCharacter((ULONG)'\n', pOutput);
838 break;
840 if (bOutputContainsText(pAnchor)) {
841 OUTPUT_LINE();
842 } else {
843 RESET_LINE();
845 if (ulChar == PAGE_BREAK) {
846 vEndOfPage(pDiag,
847 lAfterIndentation);
848 } else {
849 vEndOfParagraph(pDiag,
850 pOutput->tFontRef,
851 pOutput->usFontSize,
852 lAfterIndentation);
854 break;
855 default:
856 break;
859 if (bStartFont || (bStartFontNext && ulChar != PAR_END)) {
860 /* Begin of a font found */
861 if (bStartFont) {
862 /* bStartFont takes priority */
863 fail(pFontInfo == NULL);
864 pFontTmp = pFontInfo;
865 } else {
866 pFontTmp = &tFontNext;
868 bAllCapitals = bIsCapitals(pFontTmp->usFontStyle);
869 bHiddenText = bIsHidden(pFontTmp->usFontStyle);
870 bMarkDelText = bIsMarkDel(pFontTmp->usFontStyle);
871 usTmp = pFontTmp->usFontStyle &
872 (FONT_BOLD|FONT_ITALIC|FONT_UNDERLINE|
873 FONT_STRIKE|FONT_MARKDEL|
874 FONT_SUPERSCRIPT|FONT_SUBSCRIPT);
875 if (!bIsTableRow &&
876 (usFontSize != pFontTmp->usFontSize ||
877 ucFontNumber != pFontTmp->ucFontNumber ||
878 usFontStyleMinimal != usTmp ||
879 ucFontColor != pFontTmp->ucFontColor)) {
880 pOutput = pStartNextOutput(pOutput);
881 vCloseFont();
882 pOutput->ucFontColor = pFontTmp->ucFontColor;
883 pOutput->usFontStyle = pFontTmp->usFontStyle;
884 pOutput->usFontSize = pFontTmp->usFontSize;
885 pOutput->tFontRef = tOpenFont(
886 pFontTmp->ucFontNumber,
887 pFontTmp->usFontStyle,
888 pFontTmp->usFontSize);
889 fail(!bCheckDoubleLinkedList(pAnchor));
891 ucFontNumber = pFontTmp->ucFontNumber;
892 usFontSize = pFontTmp->usFontSize;
893 ucFontColor = pFontTmp->ucFontColor;
894 usFontStyle = pFontTmp->usFontStyle;
895 usFontStyleMinimal = usTmp;
896 if (bStartFont) {
897 /* Get the next font info */
898 pFontInfo = pGetNextFontInfoListItem(pFontInfo);
899 NO_DBG_HEX_C(pFontInfo != NULL,
900 pFontInfo->ulFileOffset);
901 DBG_MSG_C(pFontInfo == NULL, "No more fonts");
903 bStartFont = FALSE;
904 bStartFontNext = FALSE;
907 if (bStartStyle || (bStartStyleNext && ulChar != PAR_END)) {
908 bFirstLine = TRUE;
909 /* Begin of a style found */
910 if (bStartStyle) {
911 /* bStartStyle takes priority */
912 fail(pStyleInfo == NULL);
913 pStyleTmp = pStyleInfo;
914 } else {
915 pStyleTmp = &tStyleNext;
917 if (!bIsTableRow) {
918 vStoreStyle(pDiag, pOutput, pStyleTmp);
920 usIstdNext = pStyleTmp->usIstdNext;
921 lBeforeIndentation =
922 lTwips2MilliPoints(pStyleTmp->usBeforeIndent);
923 lAfterIndentation =
924 lTwips2MilliPoints(pStyleTmp->usAfterIndent);
925 lLeftIndentation =
926 lTwips2MilliPoints(pStyleTmp->sLeftIndent);
927 lLeftIndentation1 =
928 lTwips2MilliPoints(pStyleTmp->sLeftIndent1);
929 lRightIndentation =
930 lTwips2MilliPoints(pStyleTmp->sRightIndent);
931 bInList = bStyleImpliesList(pStyleTmp, iWordVersion);
932 bNoMarks = !bInList || pStyleTmp->bNumPause;
933 ucNFC = pStyleTmp->ucNFC;
934 szListChar = pStyleTmp->szListChar;
935 ucAlignment = pStyleTmp->ucAlignment;
936 if (bInList && !bWasInList) {
937 /* Start of a list */
938 iListSeqNumber++;
939 vStartOfList(pDiag, ucNFC,
940 bWasTableRow && !bIsTableRow);
942 if (!bInList && bWasInList) {
943 /* End of a list */
944 vEndOfList(pDiag);
946 bWasInList = bInList;
947 if (bStartStyle) {
948 pStyleInfo =
949 pGetNextStyleInfoListItem(pStyleInfo);
950 NO_DBG_HEX_C(pStyleInfo != NULL,
951 pStyleInfo->ulFileOffset);
952 DBG_MSG_C(pStyleInfo == NULL,
953 "No more styles");
955 bStartStyle = FALSE;
956 bStartStyleNext = FALSE;
959 if (bWasEndOfParagraph) {
960 vStartOfParagraph1(pDiag, lBeforeIndentation);
963 if (!bIsTableRow &&
964 lTotalStringWidth(pAnchor) == 0) {
965 if (!bNoMarks) {
966 usListNumber = usGetListValue(iListSeqNumber,
967 iWordVersion,
968 pStyleTmp);
970 if (bInList && bFirstLine) {
971 vStartOfListItem(pDiag, bNoMarks);
973 vPutIndentation(pDiag, pAnchor, bNoMarks, bFirstLine,
974 usListNumber, ucNFC, szListChar,
975 lLeftIndentation, lLeftIndentation1);
976 bFirstLine = FALSE;
977 /* One number or mark per paragraph will do */
978 bNoMarks = TRUE;
981 if (bWasEndOfParagraph) {
982 vStartOfParagraph2(pDiag);
983 bWasEndOfParagraph = FALSE;
986 switch (ulChar) {
987 case PICTURE:
988 (void)memset(&tImage, 0, sizeof(tImage));
989 eRes = eExamineImage(pFile, ulFileOffsetImage, &tImage);
990 switch (eRes) {
991 case image_no_information:
992 bSuccess = FALSE;
993 break;
994 case image_minimal_information:
995 case image_full_information:
996 #if 0
997 if (bOutputContainsText(pAnchor)) {
998 OUTPUT_LINE();
999 } else {
1000 RESET_LINE();
1002 #endif
1003 bSuccess = bTranslateImage(pDiag, pFile,
1004 eRes == image_minimal_information,
1005 ulFileOffsetImage, &tImage);
1006 break;
1007 default:
1008 DBG_DEC(eRes);
1009 bSuccess = FALSE;
1010 break;
1012 if (!bSuccess) {
1013 vStoreString("[pic]", 5, pOutput);
1015 break;
1016 case FOOTNOTE_CHAR:
1017 uiFootnoteNumber++;
1018 vStoreCharacter((ULONG)'[', pOutput);
1019 vStoreNumberAsDecimal(uiFootnoteNumber, pOutput);
1020 vStoreCharacter((ULONG)']', pOutput);
1021 break;
1022 case ENDNOTE_CHAR:
1023 uiEndnoteNumber++;
1024 vStoreCharacter((ULONG)'[', pOutput);
1025 vStoreNumberAsRoman(uiEndnoteNumber, pOutput);
1026 vStoreCharacter((ULONG)']', pOutput);
1027 break;
1028 case UNKNOWN_NOTE_CHAR:
1029 vStoreString("[?]", 3, pOutput);
1030 break;
1031 case PAR_END:
1032 if (bIsTableRow) {
1033 vStoreCharacter((ULONG)'\n', pOutput);
1034 break;
1036 OUTPUT_LINE();
1037 vEndOfParagraph(pDiag,
1038 pOutput->tFontRef,
1039 pOutput->usFontSize,
1040 lAfterIndentation);
1041 bWasEndOfParagraph = TRUE;
1042 break;
1043 case HARD_RETURN:
1044 if (bIsTableRow) {
1045 vStoreCharacter((ULONG)'\n', pOutput);
1046 break;
1048 if (bOutputContainsText(pAnchor)) {
1049 OUTPUT_LINE();
1051 vMove2NextLine(pDiag,
1052 pOutput->tFontRef, pOutput->usFontSize);
1053 break;
1054 case PAGE_BREAK:
1055 case COLUMN_FEED:
1056 pSection = pSectionNext;
1057 break;
1058 case TABLE_SEPARATOR:
1059 if (bIsTableRow) {
1060 vStoreCharacter(ulChar, pOutput);
1061 break;
1063 vStoreCharacter((ULONG)' ', pOutput);
1064 vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR, pOutput);
1065 break;
1066 case TAB:
1067 if (bIsTableRow ||
1068 tOptions.eConversionType == conversion_xml) {
1069 vStoreCharacter((ULONG)' ', pOutput);
1070 break;
1072 if (tOptions.iParagraphBreak == 0 &&
1073 tOptions.eConversionType == conversion_text) {
1074 /* No logical lines, so no tab expansion */
1075 vStoreCharacter(TAB, pOutput);
1076 break;
1078 lTmp = lTotalStringWidth(pAnchor);
1079 lTmp += lDrawUnits2MilliPoints(pDiag->lXleft);
1080 lTmp /= lDefaultTabWidth;
1081 do {
1082 vStoreCharacter((ULONG)FILLER_CHAR, pOutput);
1083 lWidthCurr = lTotalStringWidth(pAnchor);
1084 lWidthCurr +=
1085 lDrawUnits2MilliPoints(pDiag->lXleft);
1086 } while (lTmp == lWidthCurr / lDefaultTabWidth &&
1087 lWidthCurr < lWidthMax + lRightIndentation);
1088 break;
1089 default:
1090 if (bHiddenText && tOptions.bHideHiddenText) {
1091 continue;
1093 if (bMarkDelText &&
1094 tOptions.eConversionType != conversion_ps) {
1095 continue;
1097 if (bAllCapitals) {
1098 ulChar = ulToUpper(ulChar);
1100 vStoreCharacter(ulChar, pOutput);
1101 break;
1104 if (bWasTableRow && !bIsTableRow) {
1105 /* End of a table */
1106 vEndOfTable(pDiag);
1107 /* Resume normal font */
1108 NO_DBG_MSG("End of table font");
1109 vCloseFont();
1110 bTableFontClosed = TRUE;
1111 pOutput->ucFontColor = ucFontColor;
1112 pOutput->usFontStyle = usFontStyle;
1113 pOutput->usFontSize = usFontSize;
1114 pOutput->tFontRef = tOpenFont(
1115 ucFontNumber, usFontStyle, usFontSize);
1117 bWasTableRow = bIsTableRow;
1119 if (bIsTableRow) {
1120 fail(pAnchor != pOutput);
1121 if (!bEndRowNorm && !bEndRowFast) {
1122 continue;
1124 /* End of a table row */
1125 if (bEndRowNorm) {
1126 fail(pRowInfo == NULL);
1127 vTableRow2Window(pDiag, pAnchor, pRowInfo);
1128 } else {
1129 fail(!bEndRowFast);
1131 /* Reset */
1132 pAnchor = pStartNewOutput(pAnchor, NULL);
1133 pOutput = pAnchor;
1134 if (bEndRowNorm) {
1135 pRowInfo = pGetNextRowInfoListItem();
1137 bIsTableRow = FALSE;
1138 bEndRowNorm = FALSE;
1139 bEndRowFast = FALSE;
1140 NO_DBG_HEX_C(pRowInfo != NULL,
1141 pRowInfo->ulFileOffsetStart);
1142 NO_DBG_HEX_C(pRowInfo != NULL,
1143 pRowInfo->ulFileOffsetEnd);
1144 continue;
1146 lWidthCurr = lTotalStringWidth(pAnchor);
1147 lWidthCurr += lDrawUnits2MilliPoints(pDiag->lXleft);
1148 if (lWidthCurr < lWidthMax + lRightIndentation) {
1149 continue;
1151 pLeftOver = pSplitList(pAnchor);
1152 vJustify2Window(pDiag, pAnchor,
1153 lWidthMax, lRightIndentation, ucAlignment);
1154 pAnchor = pStartNewOutput(pAnchor, pLeftOver);
1155 for (pOutput = pAnchor;
1156 pOutput->pNext != NULL;
1157 pOutput = pOutput->pNext)
1158 ; /* EMPTY */
1159 fail(pOutput == NULL);
1160 if (lTotalStringWidth(pAnchor) > 0) {
1161 vSetLeftIndentation(pDiag, lLeftIndentation);
1165 pAnchor = pStartNewOutput(pAnchor, NULL);
1166 pAnchor->szStorage = xfree(pAnchor->szStorage);
1167 pAnchor = xfree(pAnchor);
1168 vCloseFont();
1169 vFreeDocument();
1170 visdelay_end();
1171 return TRUE;
1172 } /* end of bWordDecryptor */