Imported from antiword-0.33.tar.gz.
[antiword.git] / word2text.c
blob106eb165ccc39013065f2093c2bb6cb291d521af
1 /*
2 * word2text.c
3 * Copyright (C) 1998-2002 A.J. van Os; Released under GPL
5 * Description:
6 * MS Word to text functions
7 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #if defined(__riscos)
14 #include "visdelay.h"
15 #endif /* __riscos */
16 #include "antiword.h"
19 #define INITIAL_SIZE 40
20 #define EXTENTION_SIZE 20
23 /* Macros to make sure all such statements will be identical */
24 #define OUTPUT_LINE() \
25 do {\
26 vAlign2Window(pDiag, pAnchor, lWidthMax, ucAlignment);\
27 pAnchor = pStartNewOutput(pAnchor, NULL);\
28 pOutput = pAnchor;\
29 } while(0)
31 #define RESET_LINE() \
32 do {\
33 pAnchor = pStartNewOutput(pAnchor, NULL);\
34 pOutput = pAnchor;\
35 } while(0)
37 #if defined(__riscos)
38 /* Length of the document in characters */
39 static long lDocumentLength;
40 /* Number of characters processed so far */
41 static long lCharCounter;
42 static int iCurrPct, iPrevPct;
43 #endif /* __riscos */
44 /* The document is in the format belonging to this version of Word */
45 static int iWordVersion = -1;
46 /* Special treatment for files from Word 6 on an Apple Macintosh */
47 static BOOL bWord6MacFile = FALSE;
48 /* Section Information */
49 static const section_block_type *pSection = NULL;
50 static const section_block_type *pSectionNext = NULL;
51 /* All the (command line) options */
52 static options_type tOptions;
53 /* Needed for reading a complete table row */
54 static const row_block_type *pRowInfo = NULL;
55 static BOOL bStartRow = FALSE;
56 static BOOL bEndRowNorm = FALSE;
57 static BOOL bEndRowFast = FALSE;
58 static BOOL bIsTableRow = FALSE;
59 /* Index of the next style and font information */
60 static USHORT usIstdNext = ISTD_NORMAL;
61 /* Needed for finding the start of a style */
62 static const style_block_type *pStyleInfo = NULL;
63 static style_block_type tStyleNext;
64 static BOOL bStartStyle = FALSE;
65 static BOOL bStartStyleNext = FALSE;
66 /* Needed for finding the start of a font */
67 static const font_block_type *pFontInfo = NULL;
68 static font_block_type tFontNext;
69 static BOOL bStartFont = FALSE;
70 static BOOL bStartFontNext = FALSE;
71 /* Needed for finding an image */
72 static ULONG ulFileOffsetImage = FC_INVALID;
76 * vUpdateCounters - Update the counters for the hourglass
78 static void
79 vUpdateCounters(void)
81 #if defined(__riscos)
82 lCharCounter++;
83 iCurrPct = (int)((lCharCounter * 100) / lDocumentLength);
84 if (iCurrPct != iPrevPct) {
85 visdelay_percent(iCurrPct);
86 iPrevPct = iCurrPct;
88 #endif /* __riscos */
89 } /* end of vUpdateCounters */
92 * bOutputContainsText - see if the output contains more than white space
94 static BOOL
95 bOutputContainsText(output_type *pAnchor)
97 output_type *pCurr;
98 int iIndex;
100 fail(pAnchor == NULL);
102 for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
103 fail(pCurr->lStringWidth < 0);
104 for (iIndex = 0; iIndex < pCurr->iNextFree; iIndex++) {
105 if (isspace((int)pCurr->szStorage[iIndex])) {
106 continue;
108 #if defined(DEBUG)
109 if (pCurr->szStorage[iIndex] == FILLER_CHAR) {
110 continue;
112 #endif /* DEBUG */
113 return TRUE;
116 return FALSE;
117 } /* end of bOutputContainsText */
120 * lTotalStringWidth - compute the total width of the output string
122 static long
123 lTotalStringWidth(output_type *pAnchor)
125 output_type *pCurr;
126 long lTotal;
128 lTotal = 0;
129 for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
130 DBG_DEC_C(pCurr->lStringWidth < 0, pCurr->lStringWidth);
131 fail(pCurr->lStringWidth < 0);
132 lTotal += pCurr->lStringWidth;
134 return lTotal;
135 } /* end of lTotalStringWidth */
138 * vStoreByte - store one byte
140 static void
141 vStoreByte(UCHAR ucChar, output_type *pOutput)
143 fail(pOutput == NULL);
145 if (ucChar == 0) {
146 pOutput->szStorage[pOutput->iNextFree] = '\0';
147 return;
150 while (pOutput->iNextFree + 2 > (int)pOutput->tStorageSize) {
151 pOutput->tStorageSize += EXTENTION_SIZE;
152 pOutput->szStorage = xrealloc(pOutput->szStorage,
153 pOutput->tStorageSize);
155 pOutput->szStorage[pOutput->iNextFree] = (char)ucChar;
156 pOutput->szStorage[pOutput->iNextFree + 1] = '\0';
157 pOutput->iNextFree++;
158 } /* end of vStoreByte */
161 * vStoreCharacter - store one character
163 static void
164 vStoreCharacter(ULONG ulChar, output_type *pOutput)
166 int iLen;
168 fail(pOutput == NULL);
170 if (tOptions.eEncoding == encoding_utf8) {
171 DBG_HEX_C(ulChar > 0xffff, ulChar);
172 fail(ulChar > 0xffff);
173 if (ulChar < 0x80) {
174 vStoreByte((UCHAR)ulChar, pOutput);
175 iLen = 1;
176 } else if (ulChar < 0x800) {
177 vStoreByte((UCHAR)(0xc0 | ulChar >> 6),
178 pOutput);
179 vStoreByte((UCHAR)(0x80 | (ulChar & 0x3f)),
180 pOutput);
181 iLen = 2;
182 } else {
183 vStoreByte((UCHAR)(0xe0 | ulChar >> 12),
184 pOutput);
185 vStoreByte((UCHAR)(0x80 | (ulChar >> 6 & 0x3f)),
186 pOutput);
187 vStoreByte((UCHAR)(0x80 | (ulChar & 0x3f)),
188 pOutput);
189 iLen = 3;
191 } else {
192 DBG_HEX_C(ulChar > 0xff, ulChar);
193 fail(ulChar > 0xff);
194 vStoreByte((UCHAR)ulChar, pOutput);
195 iLen = 1;
197 pOutput->lStringWidth += lComputeStringWidth(
198 pOutput->szStorage + pOutput->iNextFree - iLen,
199 iLen,
200 pOutput->tFontRef,
201 pOutput->sFontsize);
202 } /* end of vStoreCharacter */
205 * vStoreString - store a string
207 static void
208 vStoreString(const char *szString, int iStringLength, output_type *pOutput)
210 int iIndex;
212 fail(szString == NULL || iStringLength < 0 || pOutput == NULL);
214 for (iIndex = 0; iIndex < iStringLength; iIndex++) {
215 vStoreCharacter(szString[iIndex], pOutput);
217 } /* end of vStoreString */
220 * vStoreNumberAsDecimal - store a number as a decimal number
222 static void
223 vStoreNumberAsDecimal(UINT uiNumber, output_type *pOutput)
225 int iLen;
226 char szString[15];
228 fail(uiNumber == 0);
229 fail(pOutput == NULL);
231 iLen = sprintf(szString, "%u", uiNumber);
232 vStoreString(szString, iLen, pOutput);
233 } /* end of vStoreNumberAsDecimal */
236 * vStoreNumberAsRoman - store a number as a roman numerical
238 static void
239 vStoreNumberAsRoman(UINT uiNumber, output_type *pOutput)
241 int iLen;
242 char szString[15];
244 fail(uiNumber == 0);
245 fail(pOutput == NULL);
247 iLen = iNumber2Roman(uiNumber, FALSE, szString);
248 vStoreString(szString, iLen, pOutput);
249 } /* end of vStoreNumberAsRoman */
252 * vStoreStyle - store a style
254 static void
255 vStoreStyle(output_type *pOutput, const style_block_type *pStyle)
257 int iLen;
258 char szString[120];
260 fail(pOutput == NULL);
261 fail(pStyle == NULL);
263 iLen = iStyle2Window(szString, pStyle, pSection);
264 vStoreString(szString, iLen, pOutput);
265 } /* end of vStoreStyle */
268 * Create an empty line by adding a extra "newline"
270 static void
271 vEmptyLine2Diagram(diagram_type *pDiag, draw_fontref tFontRef, short sFontsize)
273 fail(pDiag == NULL);
274 fail(sFontsize < MIN_FONT_SIZE || sFontsize > MAX_FONT_SIZE);
276 if (pDiag->lXleft > 0) {
277 /* To the start of the line */
278 vMove2NextLine(pDiag, tFontRef, sFontsize);
280 /* Empty line */
281 vMove2NextLine(pDiag, tFontRef, sFontsize);
282 } /* end of vEmptyLine2Diagram */
285 * vPutIndentation - output the given amount of indentation
287 static void
288 vPutIndentation(diagram_type *pDiag, output_type *pOutput, BOOL bNumPause,
289 UINT uiListNumber, UCHAR ucNFC, char cListCharacter,
290 long lLeftIndentation)
292 long lWidth;
293 int iNextFree;
294 char szLine[30];
296 fail(pDiag == NULL || pOutput == NULL);
297 fail(lLeftIndentation < 0);
299 if (lLeftIndentation <= 0) {
300 return;
302 if (bNumPause) {
303 vSetLeftIndentation(pDiag, lLeftIndentation);
304 return;
306 fail(iscntrl((int)cListCharacter));
308 switch (ucNFC) {
309 case LIST_ARABIC_NUM:
310 iNextFree = sprintf(szLine, "%u", uiListNumber);
311 break;
312 case LIST_ROMAN_NUM_UPPER:
313 case LIST_ROMAN_NUM_LOWER:
314 iNextFree = iNumber2Roman(uiListNumber,
315 ucNFC == LIST_ROMAN_NUM_UPPER, szLine);
316 break;
317 case LIST_UPPER_ALPHA:
318 case LIST_LOWER_ALPHA:
319 iNextFree = iNumber2Alpha(uiListNumber,
320 ucNFC == LIST_UPPER_ALPHA, szLine);
321 break;
322 case LIST_ORDINAL_NUM:
323 if (uiListNumber % 10 == 1 && uiListNumber != 11) {
324 iNextFree = sprintf(szLine, "%ust", uiListNumber);
325 } else if (uiListNumber % 10 == 2 && uiListNumber != 12) {
326 iNextFree = sprintf(szLine, "%und", uiListNumber);
327 } else if (uiListNumber % 10 == 3 && uiListNumber != 13) {
328 iNextFree = sprintf(szLine, "%urd", uiListNumber);
329 } else {
330 iNextFree = sprintf(szLine, "%uth", uiListNumber);
332 break;
333 case LIST_BULLETS:
334 iNextFree = 0;
335 break;
336 default:
337 DBG_DEC(ucNFC);
338 DBG_FIXME();
339 iNextFree = sprintf(szLine, "%u", uiListNumber);
340 break;
342 szLine[iNextFree++] = cListCharacter;
343 szLine[iNextFree++] = ' ';
344 szLine[iNextFree] = '\0';
345 lWidth = lComputeStringWidth(szLine, iNextFree,
346 pOutput->tFontRef, pOutput->sFontsize);
347 lLeftIndentation -= lWidth;
348 if (lLeftIndentation > 0) {
349 vSetLeftIndentation(pDiag, lLeftIndentation);
351 vStoreString(szLine, iNextFree, pOutput);
352 } /* end of vPutIndentation */
355 * vPutNoteSeparator - output a note separator
357 * A note separator is a horizontal line two inches long.
358 * Two inches equals 144000 millipoints.
360 static void
361 vPutNoteSeparator(output_type *pOutput)
363 long lCharWidth;
364 int iCounter, iChars;
365 char szOne[2];
367 fail(pOutput == NULL);
369 szOne[0] = OUR_EM_DASH;
370 szOne[1] = '\0';
371 lCharWidth = lComputeStringWidth(szOne, 1,
372 pOutput->tFontRef, pOutput->sFontsize);
373 DBG_DEC(lCharWidth);
374 iChars = (int)((144000 + lCharWidth / 2) / lCharWidth);
375 DBG_DEC(iChars);
376 for (iCounter = 0; iCounter < iChars; iCounter++) {
377 vStoreCharacter(OUR_EM_DASH, pOutput);
379 } /* end of vPutNoteSeparator */
384 static output_type *
385 pStartNextOutput(output_type *pCurrent)
387 output_type *pNew;
389 if (pCurrent->iNextFree == 0) {
390 /* The current record is empty, re-use */
391 fail(pCurrent->szStorage[0] != '\0');
392 fail(pCurrent->lStringWidth != 0);
393 return pCurrent;
395 /* The current record is in use, make a new one */
396 pNew = xmalloc(sizeof(*pNew));
397 pCurrent->pNext = pNew;
398 pNew->tStorageSize = INITIAL_SIZE;
399 pNew->szStorage = xmalloc(pNew->tStorageSize);
400 pNew->szStorage[0] = '\0';
401 pNew->iNextFree = 0;
402 pNew->lStringWidth = 0;
403 pNew->iColor = FONT_COLOR_DEFAULT;
404 pNew->ucFontstyle = FONT_REGULAR;
405 pNew->tFontRef = (draw_fontref)0;
406 pNew->sFontsize = DEFAULT_FONT_SIZE;
407 pNew->pPrev = pCurrent;
408 pNew->pNext = NULL;
409 return pNew;
410 } /* end of pStartNextOutput */
413 * pStartNewOutput
415 static output_type *
416 pStartNewOutput(output_type *pAnchor, output_type *pLeftOver)
418 output_type *pCurr, *pNext;
419 int iColor;
420 short sFontsize;
421 draw_fontref tFontRef;
422 UCHAR ucFontstyle;
424 iColor = FONT_COLOR_DEFAULT;
425 ucFontstyle = FONT_REGULAR;
426 tFontRef = (draw_fontref)0;
427 sFontsize = DEFAULT_FONT_SIZE;
428 /* Free the old output space */
429 pCurr = pAnchor;
430 while (pCurr != NULL) {
431 pNext = pCurr->pNext;
432 pCurr->szStorage = xfree(pCurr->szStorage);
433 if (pCurr->pNext == NULL) {
434 iColor = pCurr->iColor;
435 ucFontstyle = pCurr->ucFontstyle;
436 tFontRef = pCurr->tFontRef;
437 sFontsize = pCurr->sFontsize;
439 pCurr = xfree(pCurr);
440 pCurr = pNext;
442 if (pLeftOver == NULL) {
443 /* Create new output space */
444 pLeftOver = xmalloc(sizeof(*pLeftOver));
445 pLeftOver->tStorageSize = INITIAL_SIZE;
446 pLeftOver->szStorage = xmalloc(pLeftOver->tStorageSize);
447 pLeftOver->szStorage[0] = '\0';
448 pLeftOver->iNextFree = 0;
449 pLeftOver->lStringWidth = 0;
450 pLeftOver->iColor = iColor;
451 pLeftOver->ucFontstyle = ucFontstyle;
452 pLeftOver->tFontRef = tFontRef;
453 pLeftOver->sFontsize = sFontsize;
454 pLeftOver->pPrev = NULL;
455 pLeftOver->pNext = NULL;
457 fail(!bCheckDoubleLinkedList(pLeftOver));
458 return pLeftOver;
459 } /* end of pStartNewOutput */
462 * ulGetChar - get the next character from the given list
464 * returns the next character of EOF
466 static ULONG
467 ulGetChar(FILE *pFile, list_id_enum eListID)
469 const font_block_type *pCurr;
470 ULONG ulChar, ulFileOffset, ulTextOffset;
471 row_info_enum eRowInfo;
472 USHORT usChar, usPropMod;
473 BOOL bSkip;
475 fail(pFile == NULL);
477 pCurr = pFontInfo;
478 bSkip = FALSE;
479 for (;;) {
480 usChar = usNextChar(pFile, eListID,
481 &ulFileOffset, &ulTextOffset, &usPropMod);
482 if (usChar == (USHORT)EOF) {
483 return (ULONG)EOF;
486 vUpdateCounters();
488 eRowInfo = ePropMod2RowInfo(usPropMod, iWordVersion);
489 if (!bStartRow) {
490 #if 0
491 bStartRow = eRowInfo == found_a_cell ||
492 (pRowInfo != NULL &&
493 ulFileOffset == pRowInfo->ulFileOffsetStart &&
494 eRowInfo != found_not_a_cell);
495 #else
496 bStartRow = pRowInfo != NULL &&
497 ulFileOffset == pRowInfo->ulFileOffsetStart;
498 #endif
499 NO_DBG_HEX_C(bStartRow, pRowInfo->ulFileOffsetStart);
501 if (!bEndRowNorm) {
502 #if 0
503 bEndRow = eRowInfo == found_end_of_row ||
504 (pRowInfo != NULL &&
505 ulFileOffset == pRowInfo->ulFileOffsetEnd &&
506 eRowInfo != found_not_end_of_row);
507 #else
508 bEndRowNorm = pRowInfo != NULL &&
509 ulFileOffset == pRowInfo->ulFileOffsetEnd;
510 #endif
511 NO_DBG_HEX_C(bEndRowNorm, pRowInfo->ulFileOffsetEnd);
513 if (!bEndRowFast) {
514 bEndRowFast = eRowInfo == found_end_of_row;
515 NO_DBG_HEX_C(bEndRowFast, pRowInfo->ulFileOffsetEnd);
518 if (!bStartStyle) {
519 bStartStyle = pStyleInfo != NULL &&
520 ulFileOffset == pStyleInfo->ulFileOffset;
521 NO_DBG_HEX_C(bStartStyle, ulFileOffset);
523 if (pCurr != NULL && ulFileOffset == pCurr->ulFileOffset) {
524 bStartFont = TRUE;
525 NO_DBG_HEX(ulFileOffset);
526 pFontInfo = pCurr;
527 pCurr = pGetNextFontInfoListItem(pCurr);
530 /* Skip embedded characters */
531 if (usChar == START_EMBEDDED) {
532 bSkip = TRUE;
533 continue;
535 if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
536 bSkip = FALSE;
537 continue;
539 if (bSkip) {
540 continue;
542 ulChar = ulTranslateCharacters(usChar,
543 ulFileOffset,
544 iWordVersion,
545 tOptions.eEncoding,
546 bWord6MacFile);
547 if (ulChar == IGNORE_CHARACTER) {
548 continue;
550 if (ulChar == PICTURE) {
551 ulFileOffsetImage = ulGetPictInfoListItem(ulFileOffset);
552 } else {
553 ulFileOffsetImage = FC_INVALID;
555 if (ulChar == PAR_END) {
556 /* End of paragraph seen, prepare for the next */
557 vFillStyleFromStylesheet(usIstdNext, &tStyleNext);
558 vCorrectStyleValues(&tStyleNext);
559 bStartStyleNext = TRUE;
560 vFillFontFromStylesheet(usIstdNext, &tFontNext);
561 vCorrectFontValues(&tFontNext);
562 bStartFontNext = TRUE;
564 if (ulChar == PAGE_BREAK) {
565 /* Might be the start of a new section */
566 pSectionNext = pGetSectionInfo(pSection, ulTextOffset);
568 return ulChar;
570 } /* end of ulGetChar */
573 * bWord2Text - translate Word to text or Postcript
575 * returns TRUE when succesful, otherwise FALSE
577 BOOL
578 bWord2Text(FILE *pFile, long lFilesize, diagram_type *pDiag)
580 imagedata_type tImage;
581 const style_block_type *pStyleTmp;
582 const font_block_type *pFontTmp;
583 output_type *pAnchor, *pOutput, *pLeftOver;
584 ULONG ulChar;
585 long lBeforeIndentation, lAfterIndentation;
586 long lLeftIndentation, lRightIndentation;
587 long lWidthCurr, lWidthMax, lDefaultTabWidth, lTmp;
588 list_id_enum eListID;
589 image_info_enum eRes;
590 UINT uiListNumber, uiFootnoteNumber, uiEndnoteNumber;
591 BOOL bWasTableRow, bTableFontClosed, bWasInList, bWasEndOfParagraph;
592 BOOL bNumPause, bAllCapitals, bHiddenText, bSuccess;
593 short sFontsize;
594 UCHAR ucFontnumber, ucFontcolor, ucTmp;
595 UCHAR ucFontstyle, ucFontstyleMinimal;
596 UCHAR ucNFC, ucAlignment;
597 char cListChar;
599 fail(pFile == NULL || lFilesize <= 0 || pDiag == NULL);
601 DBG_MSG("bWord2Text");
603 iWordVersion = iInitDocument(pFile, lFilesize);
604 if (iWordVersion < 0) {
605 DBG_DEC(iWordVersion);
606 return FALSE;
608 vAddFonts2Diagram(pDiag);
610 /* Initialisation */
611 #if defined(__riscos)
612 lCharCounter = 0;
613 iCurrPct = 0;
614 iPrevPct = -1;
615 lDocumentLength = (long)ulGetDocumentLength();
616 #endif /* __riscos */
617 bWord6MacFile = bIsWord6MacFile();
618 pSection = pGetSectionInfo(NULL, 0);
619 pSectionNext = pSection;
620 lDefaultTabWidth = lGetDefaultTabWidth();
621 DBG_DEC_C(lDefaultTabWidth != 36000, lDefaultTabWidth);
622 pRowInfo = pGetNextRowInfoListItem();
623 DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetStart);
624 DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetEnd);
625 DBG_MSG_C(pRowInfo == NULL, "No rows at all");
626 bStartRow = FALSE;
627 bEndRowNorm = FALSE;
628 bEndRowFast = FALSE;
629 bIsTableRow = FALSE;
630 bWasTableRow = FALSE;
631 vResetStyles();
632 pStyleInfo = pGetNextStyleInfoListItem(NULL);
633 bStartStyle = FALSE;
634 bWasInList = FALSE;
635 usIstdNext = ISTD_NORMAL;
636 pAnchor = NULL;
637 pFontInfo = pGetNextFontInfoListItem(NULL);
638 DBG_HEX_C(pFontInfo != NULL, pFontInfo->ulFileOffset);
639 DBG_MSG_C(pFontInfo == NULL, "No fonts at all");
640 bStartFont = FALSE;
641 ucFontnumber = 0;
642 ucFontstyleMinimal = FONT_REGULAR;
643 ucFontstyle = FONT_REGULAR;
644 sFontsize = DEFAULT_FONT_SIZE;
645 ucFontcolor = FONT_COLOR_DEFAULT;
646 pAnchor = pStartNewOutput(pAnchor, NULL);
647 pOutput = pAnchor;
648 pOutput->iColor = (int)ucFontcolor;
649 pOutput->ucFontstyle = ucFontstyle;
650 pOutput->tFontRef = tOpenFont(ucFontnumber, ucFontstyle, sFontsize);
651 pOutput->sFontsize = sFontsize;
652 bTableFontClosed = TRUE;
653 lBeforeIndentation = 0;
654 lAfterIndentation = 0;
655 lLeftIndentation = 0;
656 lRightIndentation = 0;
657 bWasEndOfParagraph = TRUE;
658 bNumPause = TRUE;
659 ucNFC = LIST_BULLETS;
660 cListChar = OUR_BULLET;
661 uiListNumber = 0;
662 ucAlignment = ALIGNMENT_LEFT;
663 bAllCapitals = FALSE;
664 bHiddenText = FALSE;
665 vGetOptions(&tOptions);
666 fail(tOptions.iParagraphBreak < 0);
667 if (tOptions.iParagraphBreak == 0) {
668 lWidthMax = LONG_MAX;
669 } else if (tOptions.iParagraphBreak < MIN_SCREEN_WIDTH) {
670 lWidthMax = lChar2MilliPoints(MIN_SCREEN_WIDTH);
671 } else if (tOptions.iParagraphBreak > MAX_SCREEN_WIDTH) {
672 lWidthMax = lChar2MilliPoints(MAX_SCREEN_WIDTH);
673 } else {
674 lWidthMax = lChar2MilliPoints(tOptions.iParagraphBreak);
676 NO_DBG_DEC(lWidthMax);
678 visdelay_begin();
680 uiFootnoteNumber = 0;
681 uiEndnoteNumber = 0;
682 eListID = text_list;
683 for(;;) {
684 ulChar = ulGetChar(pFile, eListID);
685 if (ulChar == (ULONG)EOF) {
686 if (bOutputContainsText(pAnchor)) {
687 OUTPUT_LINE();
688 } else {
689 RESET_LINE();
691 switch (eListID) {
692 case text_list:
693 eListID = footnote_list;
694 if (uiFootnoteNumber != 0) {
695 vPutNoteSeparator(pAnchor);
696 OUTPUT_LINE();
697 uiFootnoteNumber = 0;
699 break;
700 case footnote_list:
701 eListID = endnote_list;
702 if (uiEndnoteNumber != 0) {
703 vPutNoteSeparator(pAnchor);
704 OUTPUT_LINE();
705 uiEndnoteNumber = 0;
707 break;
708 case endnote_list:
709 default:
710 eListID = end_of_lists;
711 break;
713 if (eListID == end_of_lists) {
714 break;
716 continue;
719 if (ulChar == UNKNOWN_NOTE_CHAR) {
720 switch (eListID) {
721 case footnote_list:
722 ulChar = FOOTNOTE_CHAR;
723 break;
724 case endnote_list:
725 ulChar = ENDNOTE_CHAR;
726 break;
727 default:
728 break;
732 if (bStartRow) {
733 /* Begin of a tablerow found */
734 if (bOutputContainsText(pAnchor)) {
735 OUTPUT_LINE();
736 } else {
737 RESET_LINE();
739 fail(pAnchor != pOutput);
740 if (bTableFontClosed) {
741 /* Start special table font */
742 vCloseFont();
744 * Compensate for the fact that Word uses
745 * proportional fonts for its tables and we
746 * only one fixed-width font
748 pOutput->sFontsize =
749 (sFontsize <= DEFAULT_FONT_SIZE ?
750 (DEFAULT_FONT_SIZE * 5 + 3) / 6 :
751 (sFontsize * 5 + 3) / 6);
752 pOutput->tFontRef =
753 tOpenTableFont(pOutput->sFontsize);
754 pOutput->ucFontstyle = FONT_REGULAR;
755 pOutput->iColor = FONT_COLOR_BLACK;
756 bTableFontClosed = FALSE;
758 bIsTableRow = TRUE;
759 bStartRow = FALSE;
762 if (bWasTableRow &&
763 !bIsTableRow &&
764 ulChar != PAR_END &&
765 ulChar != HARD_RETURN &&
766 ulChar != PAGE_BREAK &&
767 ulChar != COLUMN_FEED) {
769 * The end of a table should be followed by an
770 * empty line, like the end of a paragraph
772 OUTPUT_LINE();
773 vEndOfParagraph2Diagram(pDiag,
774 pOutput->tFontRef,
775 pOutput->sFontsize,
776 (long)pOutput->sFontsize * 600);
779 switch (ulChar) {
780 case PAGE_BREAK:
781 case COLUMN_FEED:
782 if (bIsTableRow) {
783 vStoreCharacter((ULONG)'\n', pOutput);
784 break;
786 if (bOutputContainsText(pAnchor)) {
787 OUTPUT_LINE();
788 } else {
789 RESET_LINE();
791 if (ulChar == PAGE_BREAK) {
792 vEndOfPage2Diagram(pDiag,
793 pOutput->tFontRef,
794 pOutput->sFontsize,
795 lAfterIndentation);
796 } else {
797 vEndOfParagraph2Diagram(pDiag,
798 pOutput->tFontRef,
799 pOutput->sFontsize,
800 lAfterIndentation);
802 break;
803 default:
804 break;
807 if (bStartFont || (bStartFontNext && ulChar != PAR_END)) {
808 /* Begin of a font found */
809 if (bStartFont) {
810 /* bStartFont takes priority */
811 fail(pFontInfo == NULL);
812 pFontTmp = pFontInfo;
813 } else {
814 pFontTmp = &tFontNext;
816 bAllCapitals = bIsCapitals(pFontTmp->ucFontstyle);
817 bHiddenText = bIsHidden(pFontTmp->ucFontstyle);
818 ucTmp = pFontTmp->ucFontstyle &
819 (FONT_BOLD|FONT_ITALIC|FONT_UNDERLINE|
820 FONT_STRIKE|FONT_MARKDEL);
821 if (!bIsTableRow &&
822 (sFontsize != pFontTmp->sFontsize ||
823 ucFontnumber != pFontTmp->ucFontnumber ||
824 ucFontstyleMinimal != ucTmp ||
825 ucFontcolor != pFontTmp->ucFontcolor)) {
826 pOutput = pStartNextOutput(pOutput);
827 vCloseFont();
828 pOutput->iColor = (int)pFontTmp->ucFontcolor;
829 pOutput->ucFontstyle = pFontTmp->ucFontstyle;
830 pOutput->sFontsize = pFontTmp->sFontsize;
831 pOutput->tFontRef = tOpenFont(
832 pFontTmp->ucFontnumber,
833 pFontTmp->ucFontstyle,
834 pFontTmp->sFontsize);
835 fail(!bCheckDoubleLinkedList(pAnchor));
837 ucFontnumber = pFontTmp->ucFontnumber;
838 sFontsize = pFontTmp->sFontsize;
839 ucFontcolor = pFontTmp->ucFontcolor;
840 ucFontstyle = pFontTmp->ucFontstyle;
841 ucFontstyleMinimal = ucTmp;
842 if (bStartFont) {
843 /* Get the next font info */
844 pFontInfo = pGetNextFontInfoListItem(pFontInfo);
845 NO_DBG_HEX_C(pFontInfo != NULL,
846 pFontInfo->ulFileOffset);
847 DBG_MSG_C(pFontInfo == NULL, "No more fonts");
849 bStartFont = FALSE;
850 bStartFontNext = FALSE;
853 if (bStartStyle || (bStartStyleNext && ulChar != PAR_END)) {
854 /* Begin of a style found */
855 if (bStartStyle) {
856 /* bStartStyle takes priority */
857 fail(pStyleInfo == NULL);
858 pStyleTmp = pStyleInfo;
859 } else {
860 pStyleTmp = &tStyleNext;
862 if (!bIsTableRow) {
863 vStoreStyle(pOutput, pStyleTmp);
865 usIstdNext = pStyleTmp->usIstdNext;
866 lBeforeIndentation =
867 lTwips2MilliPoints(pStyleTmp->usBeforeIndent);
868 lAfterIndentation =
869 lTwips2MilliPoints(pStyleTmp->usAfterIndent);
870 lLeftIndentation =
871 lTwips2MilliPoints(pStyleTmp->sLeftIndent);
872 lRightIndentation =
873 lTwips2MilliPoints(pStyleTmp->sRightIndent);
874 bNumPause = !pStyleTmp->bInList ||
875 pStyleTmp->bNumPause;
876 ucNFC = pStyleTmp->ucNFC;
877 cListChar = (char)pStyleTmp->ucListCharacter;
878 ucAlignment = pStyleTmp->ucAlignment;
879 if (pStyleTmp->bInList) {
880 if (bWasInList) {
881 if (!pStyleTmp->bNumPause) {
882 uiListNumber++;
884 } else {
885 uiListNumber =
886 (UINT)pStyleTmp->usStartAt;
888 } else {
889 uiListNumber = 0;
891 bWasInList = pStyleTmp->bInList;
892 if (bStartStyle) {
893 pStyleInfo =
894 pGetNextStyleInfoListItem(pStyleInfo);
895 NO_DBG_HEX_C(pStyleInfo != NULL,
896 pStyleInfo->ulFileOffset);
897 NO_DBG_MSG_C(pStyleInfo == NULL,
898 "No more styles");
900 bStartStyle = FALSE;
901 bStartStyleNext = FALSE;
904 if (bWasEndOfParagraph) {
905 vStartOfParagraph2Diagram(pDiag,
906 pOutput->tFontRef,
907 pOutput->sFontsize,
908 lBeforeIndentation);
909 bWasEndOfParagraph = FALSE;
912 if (!bIsTableRow &&
913 lTotalStringWidth(pAnchor) == 0) {
914 vPutIndentation(pDiag, pAnchor, bNumPause,
915 uiListNumber, ucNFC, cListChar,
916 lLeftIndentation);
917 /* One number or mark per paragraph will do */
918 bNumPause = TRUE;
921 switch (ulChar) {
922 case PICTURE:
923 (void)memset(&tImage, 0, sizeof(tImage));
924 eRes = eExamineImage(pFile, ulFileOffsetImage, &tImage);
925 switch (eRes) {
926 case image_no_information:
927 bSuccess = FALSE;
928 break;
929 case image_minimal_information:
930 case image_full_information:
931 #if 0
932 if (bOutputContainsText(pAnchor)) {
933 OUTPUT_LINE();
934 } else {
935 RESET_LINE();
937 #endif
938 bSuccess = bTranslateImage(pDiag, pFile,
939 eRes == image_minimal_information,
940 ulFileOffsetImage, &tImage);
941 break;
942 default:
943 DBG_DEC(eRes);
944 bSuccess = FALSE;
945 break;
947 if (!bSuccess) {
948 vStoreString("[pic]", 5, pOutput);
950 break;
951 case FOOTNOTE_CHAR:
952 uiFootnoteNumber++;
953 vStoreCharacter((ULONG)'[', pOutput);
954 vStoreNumberAsDecimal(uiFootnoteNumber, pOutput);
955 vStoreCharacter((ULONG)']', pOutput);
956 break;
957 case ENDNOTE_CHAR:
958 uiEndnoteNumber++;
959 vStoreCharacter((ULONG)'[', pOutput);
960 vStoreNumberAsRoman(uiEndnoteNumber, pOutput);
961 vStoreCharacter((ULONG)']', pOutput);
962 break;
963 case UNKNOWN_NOTE_CHAR:
964 vStoreString("[?]", 3, pOutput);
965 break;
966 case PAR_END:
967 if (bIsTableRow) {
968 vStoreCharacter((ULONG)'\n', pOutput);
969 break;
971 OUTPUT_LINE();
972 vEndOfParagraph2Diagram(pDiag,
973 pOutput->tFontRef,
974 pOutput->sFontsize,
975 lAfterIndentation);
976 bWasEndOfParagraph = TRUE;
977 break;
978 case HARD_RETURN:
979 if (bIsTableRow) {
980 vStoreCharacter((ULONG)'\n', pOutput);
981 break;
983 if (bOutputContainsText(pAnchor)) {
984 OUTPUT_LINE();
986 vEmptyLine2Diagram(pDiag,
987 pOutput->tFontRef,
988 pOutput->sFontsize);
989 break;
990 case PAGE_BREAK:
991 case COLUMN_FEED:
992 pSection = pSectionNext;
993 break;
994 case TABLE_SEPARATOR:
995 if (bIsTableRow) {
996 vStoreCharacter(ulChar, pOutput);
997 break;
999 vStoreCharacter((ULONG)' ', pOutput);
1000 vStoreCharacter(TABLE_SEPARATOR_CHAR, pOutput);
1001 break;
1002 case TAB:
1003 if (bIsTableRow) {
1004 vStoreCharacter((ULONG)' ', pOutput);
1005 break;
1007 if (tOptions.iParagraphBreak == 0 &&
1008 !tOptions.bUseOutlineFonts) {
1009 /* No logical lines, so no tab expansion */
1010 vStoreCharacter(TAB, pOutput);
1011 break;
1013 lTmp = lTotalStringWidth(pAnchor);
1014 lTmp += lDrawUnits2MilliPoints(pDiag->lXleft);
1015 lTmp /= lDefaultTabWidth;
1016 do {
1017 vStoreCharacter(FILLER_CHAR, pOutput);
1018 lWidthCurr = lTotalStringWidth(pAnchor);
1019 lWidthCurr +=
1020 lDrawUnits2MilliPoints(pDiag->lXleft);
1021 } while (lTmp == lWidthCurr / lDefaultTabWidth &&
1022 lWidthCurr < lWidthMax + lRightIndentation);
1023 break;
1024 default:
1025 if (bHiddenText && tOptions.bHideHiddenText) {
1026 continue;
1028 if (bAllCapitals) {
1029 ulChar = ulToUpper(ulChar);
1031 vStoreCharacter(ulChar, pOutput);
1032 break;
1035 if (bWasTableRow && !bIsTableRow) {
1036 /* End of a table, resume normal font */
1037 NO_DBG_MSG("End of table font");
1038 vCloseFont();
1039 bTableFontClosed = TRUE;
1040 pOutput->iColor = ucFontcolor;
1041 pOutput->ucFontstyle = ucFontstyle;
1042 pOutput->sFontsize = sFontsize;
1043 pOutput->tFontRef =
1044 tOpenFont(ucFontnumber, ucFontstyle, sFontsize);
1046 bWasTableRow = bIsTableRow;
1048 if (bIsTableRow) {
1049 fail(pAnchor != pOutput);
1050 if (!bEndRowNorm && !bEndRowFast) {
1051 continue;
1053 /* End of a table row */
1054 if (bEndRowNorm) {
1055 fail(pRowInfo == NULL);
1056 vTableRow2Window(pDiag, pAnchor, pRowInfo);
1057 } else {
1058 fail(!bEndRowFast);
1060 /* Reset */
1061 pAnchor = pStartNewOutput(pAnchor, NULL);
1062 pOutput = pAnchor;
1063 if (bEndRowNorm) {
1064 pRowInfo = pGetNextRowInfoListItem();
1066 bIsTableRow = FALSE;
1067 bEndRowNorm = FALSE;
1068 bEndRowFast = FALSE;
1069 NO_DBG_HEX_C(pRowInfo != NULL,
1070 pRowInfo->ulFileOffsetStart);
1071 NO_DBG_HEX_C(pRowInfo != NULL,
1072 pRowInfo->ulFileOffsetEnd);
1073 continue;
1075 lWidthCurr = lTotalStringWidth(pAnchor);
1076 lWidthCurr += lDrawUnits2MilliPoints(pDiag->lXleft);
1077 if (lWidthCurr < lWidthMax + lRightIndentation) {
1078 continue;
1080 pLeftOver = pSplitList(pAnchor);
1081 vJustify2Window(pDiag, pAnchor,
1082 lWidthMax, lRightIndentation, ucAlignment);
1083 pAnchor = pStartNewOutput(pAnchor, pLeftOver);
1084 for (pOutput = pAnchor;
1085 pOutput->pNext != NULL;
1086 pOutput = pOutput->pNext)
1087 ; /* EMPTY */
1088 fail(pOutput == NULL);
1089 if (lTotalStringWidth(pAnchor) > 0) {
1090 vSetLeftIndentation(pDiag, lLeftIndentation);
1094 pAnchor = pStartNewOutput(pAnchor, NULL);
1095 pAnchor->szStorage = xfree(pAnchor->szStorage);
1096 pAnchor = xfree(pAnchor);
1097 vCloseFont();
1098 vFreeDocument();
1099 visdelay_end();
1100 return TRUE;
1101 } /* end of bWord2Text */