Imported from antiword-0.37.tar.gz.
[antiword.git] / word2text.c
blob62b8964c8d5f5a9f7c19ebd5227f3104c720e181
1 /*
2 * word2text.c
3 * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
5 * Description:
6 * MS Word to "text" functions
7 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #if defined(__riscos)
14 #include "DeskLib:Hourglass.h"
15 #include "drawfile.h"
16 #endif /* __riscos */
17 #include "antiword.h"
20 #define INITIAL_SIZE 40
21 #define EXTENTION_SIZE 20
24 /* Macros to make sure all such statements will be identical */
25 #define OUTPUT_LINE() \
26 do {\
27 vAlign2Window(pDiag, pAnchor, lWidthMax, ucAlignment);\
28 TRACE_MSG("after vAlign2Window");\
29 pAnchor = pStartNewOutput(pAnchor, NULL);\
30 pOutput = pAnchor;\
31 } while(0)
33 #define RESET_LINE() \
34 do {\
35 pAnchor = pStartNewOutput(pAnchor, NULL);\
36 pOutput = pAnchor;\
37 } while(0)
39 #if defined(__riscos)
40 /* Length of the document in characters */
41 static ULONG ulDocumentLength;
42 /* Number of characters processed so far */
43 static ULONG ulCharCounter;
44 static int iCurrPct, iPrevPct;
45 #endif /* __riscos */
46 /* The document is in the format belonging to this version of Word */
47 static int iWordVersion = -1;
48 /* Special treatment for files from Word 4/5/6 on an Apple Macintosh */
49 static BOOL bOldMacFile = FALSE;
50 /* Section Information */
51 static const section_block_type *pSection = NULL;
52 static const section_block_type *pSectionNext = NULL;
53 /* All the (command line) options */
54 static options_type tOptions;
55 /* Needed for reading a complete table row */
56 static const row_block_type *pRowInfo = NULL;
57 static BOOL bStartRow = FALSE;
58 static BOOL bEndRowNorm = FALSE;
59 static BOOL bEndRowFast = FALSE;
60 static BOOL bIsTableRow = FALSE;
61 /* Index of the next style and font information */
62 static USHORT usIstdNext = ISTD_NORMAL;
63 /* Needed for finding the start of a style */
64 static const style_block_type *pStyleInfo = NULL;
65 static style_block_type tStyleNext;
66 static BOOL bStartStyle = FALSE;
67 static BOOL bStartStyleNext = FALSE;
68 /* Needed for finding the start of a font */
69 static const font_block_type *pFontInfo = NULL;
70 static font_block_type tFontNext;
71 static BOOL bStartFont = FALSE;
72 static BOOL bStartFontNext = FALSE;
73 /* Needed for finding an image */
74 static ULONG ulFileOffsetImage = FC_INVALID;
78 * vUpdateCounters - Update the counters for the hourglass
80 static void
81 vUpdateCounters(void)
83 #if defined(__riscos)
84 ulCharCounter++;
85 iCurrPct = (int)((ulCharCounter * 100) / ulDocumentLength);
86 if (iCurrPct != iPrevPct) {
87 Hourglass_Percentage(iCurrPct);
88 iPrevPct = iCurrPct;
90 #endif /* __riscos */
91 } /* end of vUpdateCounters */
94 * bOutputContainsText - see if the output contains more than white space
96 BOOL
97 bOutputContainsText(const output_type *pAnchor)
99 const output_type *pCurr;
100 size_t tIndex;
102 fail(pAnchor == NULL);
104 for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
105 fail(pCurr->lStringWidth < 0);
106 for (tIndex = 0; tIndex < pCurr->tNextFree; tIndex++) {
107 if (isspace((int)(UCHAR)pCurr->szStorage[tIndex])) {
108 continue;
110 #if defined(DEBUG)
111 if (pCurr->szStorage[tIndex] == FILLER_CHAR) {
112 continue;
114 #endif /* DEBUG */
115 return TRUE;
118 return FALSE;
119 } /* end of bOutputContainsText */
122 * lTotalStringWidth - compute the total width of the output string
124 static long
125 lTotalStringWidth(const output_type *pAnchor)
127 const output_type *pCurr;
128 long lTotal;
130 lTotal = 0;
131 for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
132 DBG_DEC_C(pCurr->lStringWidth < 0, pCurr->lStringWidth);
133 fail(pCurr->lStringWidth < 0);
134 lTotal += pCurr->lStringWidth;
136 return lTotal;
137 } /* end of lTotalStringWidth */
140 * vStoreByte - store one byte
142 static void
143 vStoreByte(UCHAR ucChar, output_type *pOutput)
145 fail(pOutput == NULL);
147 if (ucChar == 0) {
148 pOutput->szStorage[pOutput->tNextFree] = '\0';
149 return;
152 while (pOutput->tNextFree + 2 > pOutput->tStorageSize) {
153 pOutput->tStorageSize += EXTENTION_SIZE;
154 pOutput->szStorage = xrealloc(pOutput->szStorage,
155 pOutput->tStorageSize);
157 pOutput->szStorage[pOutput->tNextFree] = (char)ucChar;
158 pOutput->szStorage[pOutput->tNextFree + 1] = '\0';
159 pOutput->tNextFree++;
160 } /* end of vStoreByte */
163 * vStoreChar - store a character as one or more bytes
165 static void
166 vStoreChar(ULONG ulChar, BOOL bChangeAllowed, output_type *pOutput)
168 char szResult[4];
169 size_t tIndex, tLen;
171 fail(pOutput == NULL);
173 if (tOptions.eEncoding == encoding_utf_8 && bChangeAllowed) {
174 DBG_HEX_C(ulChar > 0xffff, ulChar);
175 fail(ulChar > 0xffff);
176 tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult));
177 for (tIndex = 0; tIndex < tLen; tIndex++) {
178 vStoreByte((UCHAR)szResult[tIndex], pOutput);
180 } else {
181 DBG_HEX_C(ulChar > 0xff, ulChar);
182 fail(ulChar > 0xff);
183 vStoreByte((UCHAR)ulChar, pOutput);
184 tLen = 1;
186 pOutput->lStringWidth += lComputeStringWidth(
187 pOutput->szStorage + pOutput->tNextFree - tLen,
188 tLen,
189 pOutput->tFontRef,
190 pOutput->usFontSize);
191 } /* end of vStoreChar */
194 * vStoreCharacter - store one character
196 static void
197 vStoreCharacter(ULONG ulChar, output_type *pOutput)
199 vStoreChar(ulChar, TRUE, pOutput);
200 } /* end of vStoreCharacter */
203 * vStoreString - store a string
205 static void
206 vStoreString(const char *szString, size_t tStringLength, output_type *pOutput)
208 size_t tIndex;
210 fail(szString == NULL || pOutput == NULL);
212 for (tIndex = 0; tIndex < tStringLength; tIndex++) {
213 vStoreCharacter((ULONG)(UCHAR)szString[tIndex], pOutput);
215 } /* end of vStoreString */
218 * vStoreNumberAsDecimal - store a number as a decimal number
220 static void
221 vStoreNumberAsDecimal(UINT uiNumber, output_type *pOutput)
223 size_t tLen;
224 char szString[3 * sizeof(UINT) + 1];
226 fail(uiNumber == 0);
227 fail(pOutput == NULL);
229 tLen = (size_t)sprintf(szString, "%u", uiNumber);
230 vStoreString(szString, tLen, pOutput);
231 } /* end of vStoreNumberAsDecimal */
234 * vStoreNumberAsRoman - store a number as a roman numerical
236 static void
237 vStoreNumberAsRoman(UINT uiNumber, output_type *pOutput)
239 size_t tLen;
240 char szString[15];
242 fail(uiNumber == 0);
243 fail(pOutput == NULL);
245 tLen = tNumber2Roman(uiNumber, FALSE, szString);
246 vStoreString(szString, tLen, pOutput);
247 } /* end of vStoreNumberAsRoman */
250 * vStoreStyle - store a style
252 static void
253 vStoreStyle(diagram_type *pDiag, output_type *pOutput,
254 const style_block_type *pStyle)
256 size_t tLen;
257 char szString[120];
259 fail(pDiag == NULL);
260 fail(pOutput == NULL);
261 fail(pStyle == NULL);
263 if (tOptions.eConversionType == conversion_xml) {
264 vSetHeaders(pDiag, pStyle->usIstd);
265 } else {
266 tLen = tStyle2Window(szString, sizeof(szString),
267 pStyle, pSection);
268 vStoreString(szString, tLen, pOutput);
270 } /* end of vStoreStyle */
273 * vPutIndentation - output the specified amount of indentation
275 static void
276 vPutIndentation(diagram_type *pDiag, output_type *pOutput,
277 BOOL bNoMarks, BOOL bFirstLine,
278 UINT uiListNumber, UCHAR ucNFC, const char *szListChar,
279 long lLeftIndentation, long lLeftIndentation1)
281 long lWidth;
282 size_t tIndex, tNextFree;
283 char szLine[30];
285 fail(pDiag == NULL);
286 fail(pOutput == NULL);
287 fail(szListChar == NULL);
288 fail(lLeftIndentation < 0);
290 if (tOptions.eConversionType == conversion_xml) {
291 /* XML does its own indentation at rendering time */
292 return;
295 if (bNoMarks) {
296 if (bFirstLine) {
297 lLeftIndentation += lLeftIndentation1;
299 if (lLeftIndentation < 0) {
300 lLeftIndentation = 0;
302 vSetLeftIndentation(pDiag, lLeftIndentation);
303 return;
305 if (lLeftIndentation <= 0) {
306 DBG_HEX_C(ucNFC != 0x00, ucNFC);
307 vSetLeftIndentation(pDiag, 0);
308 return;
311 #if defined(DEBUG)
312 if (tOptions.eEncoding == encoding_utf_8) {
313 fail(strlen(szListChar) > 3);
314 } else {
315 DBG_HEX_C(iscntrl((int)szListChar[0]), szListChar[0]);
316 fail(iscntrl((int)szListChar[0]));
317 fail(szListChar[1] != '\0');
319 #endif /* DEBUG */
321 switch (ucNFC) {
322 case LIST_ARABIC_NUM:
323 case LIST_NUMBER_TXT:
324 tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber);
325 break;
326 case LIST_UPPER_ROMAN:
327 case LIST_LOWER_ROMAN:
328 tNextFree = tNumber2Roman(uiListNumber,
329 ucNFC == LIST_UPPER_ROMAN, szLine);
330 break;
331 case LIST_UPPER_ALPHA:
332 case LIST_LOWER_ALPHA:
333 tNextFree = tNumber2Alpha(uiListNumber,
334 ucNFC == LIST_UPPER_ALPHA, szLine);
335 break;
336 case LIST_ORDINAL_NUM:
337 case LIST_ORDINAL_TXT:
338 if (uiListNumber % 10 == 1 && uiListNumber != 11) {
339 tNextFree =
340 (size_t)sprintf(szLine, "%ust", uiListNumber);
341 } else if (uiListNumber % 10 == 2 && uiListNumber != 12) {
342 tNextFree =
343 (size_t)sprintf(szLine, "%und", uiListNumber);
344 } else if (uiListNumber % 10 == 3 && uiListNumber != 13) {
345 tNextFree =
346 (size_t)sprintf(szLine, "%urd", uiListNumber);
347 } else {
348 tNextFree =
349 (size_t)sprintf(szLine, "%uth", uiListNumber);
351 break;
352 case LIST_OUTLINE_NUM:
353 tNextFree = (size_t)sprintf(szLine, "%02u", uiListNumber);
354 break;
355 case LIST_SPECIAL:
356 case LIST_SPECIAL2:
357 case LIST_BULLETS:
358 tNextFree = 0;
359 break;
360 default:
361 DBG_HEX(ucNFC);
362 DBG_FIXME();
363 tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber);
364 break;
366 tNextFree += (size_t)sprintf(szLine + tNextFree, "%.3s", szListChar);
367 szLine[tNextFree++] = ' ';
368 szLine[tNextFree] = '\0';
369 lWidth = lComputeStringWidth(szLine, tNextFree,
370 pOutput->tFontRef, pOutput->usFontSize);
371 lLeftIndentation -= lWidth;
372 if (lLeftIndentation < 0) {
373 lLeftIndentation = 0;
375 vSetLeftIndentation(pDiag, lLeftIndentation);
376 for (tIndex = 0; tIndex < tNextFree; tIndex++) {
377 vStoreChar((ULONG)(UCHAR)szLine[tIndex], FALSE, pOutput);
379 } /* end of vPutIndentation */
382 * vPutSeparatorLine - output a separator line
384 * A separator line is a horizontal line two inches long.
385 * Two inches equals 144000 millipoints.
387 static void
388 vPutSeparatorLine(output_type *pOutput)
390 long lCharWidth;
391 int iCounter, iChars;
392 char szOne[2];
394 fail(pOutput == NULL);
396 szOne[0] = OUR_EM_DASH;
397 szOne[1] = '\0';
398 lCharWidth = lComputeStringWidth(szOne, 1,
399 pOutput->tFontRef, pOutput->usFontSize);
400 NO_DBG_DEC(lCharWidth);
401 iChars = (int)((144000 + lCharWidth / 2) / lCharWidth);
402 NO_DBG_DEC(iChars);
403 for (iCounter = 0; iCounter < iChars; iCounter++) {
404 vStoreCharacter((ULONG)(UCHAR)OUR_EM_DASH, pOutput);
406 } /* end of vPutSeparatorLine */
409 * pStartNextOutput - start the next output record
411 * returns a pointer to the next record
413 static output_type *
414 pStartNextOutput(output_type *pCurrent)
416 output_type *pNew;
418 TRACE_MSG("pStartNextOutput");
420 if (pCurrent->tNextFree == 0) {
421 /* The current record is empty, re-use */
422 fail(pCurrent->szStorage[0] != '\0');
423 fail(pCurrent->lStringWidth != 0);
424 return pCurrent;
426 /* The current record is in use, make a new one */
427 pNew = xmalloc(sizeof(*pNew));
428 pCurrent->pNext = pNew;
429 pNew->tStorageSize = INITIAL_SIZE;
430 pNew->szStorage = xmalloc(pNew->tStorageSize);
431 pNew->szStorage[0] = '\0';
432 pNew->tNextFree = 0;
433 pNew->lStringWidth = 0;
434 pNew->ucFontColor = FONT_COLOR_DEFAULT;
435 pNew->usFontStyle = FONT_REGULAR;
436 pNew->tFontRef = (drawfile_fontref)0;
437 pNew->usFontSize = DEFAULT_FONT_SIZE;
438 pNew->pPrev = pCurrent;
439 pNew->pNext = NULL;
440 return pNew;
441 } /* end of pStartNextOutput */
444 * pStartNewOutput
446 static output_type *
447 pStartNewOutput(output_type *pAnchor, output_type *pLeftOver)
449 output_type *pCurr, *pNext;
450 USHORT usFontStyle, usFontSize;
451 drawfile_fontref tFontRef;
452 UCHAR ucFontColor;
454 TRACE_MSG("pStartNewOutput");
456 ucFontColor = FONT_COLOR_DEFAULT;
457 usFontStyle = FONT_REGULAR;
458 tFontRef = (drawfile_fontref)0;
459 usFontSize = DEFAULT_FONT_SIZE;
460 /* Free the old output space */
461 pCurr = pAnchor;
462 while (pCurr != NULL) {
463 TRACE_MSG("Free the old output space");
464 pNext = pCurr->pNext;
465 pCurr->szStorage = xfree(pCurr->szStorage);
466 if (pCurr->pNext == NULL) {
467 ucFontColor = pCurr->ucFontColor;
468 usFontStyle = pCurr->usFontStyle;
469 tFontRef = pCurr->tFontRef;
470 usFontSize = pCurr->usFontSize;
472 pCurr = xfree(pCurr);
473 pCurr = pNext;
475 if (pLeftOver == NULL) {
476 /* Create new output space */
477 TRACE_MSG("Create new output space");
478 pLeftOver = xmalloc(sizeof(*pLeftOver));
479 pLeftOver->tStorageSize = INITIAL_SIZE;
480 NO_DBG_DEC(pLeftOver->tStorageSize);
481 TRACE_MSG("before 2nd xmalloc");
482 pLeftOver->szStorage = xmalloc(pLeftOver->tStorageSize);
483 TRACE_MSG("after 2nd xmalloc");
484 pLeftOver->szStorage[0] = '\0';
485 pLeftOver->tNextFree = 0;
486 pLeftOver->lStringWidth = 0;
487 pLeftOver->ucFontColor = ucFontColor;
488 pLeftOver->usFontStyle = usFontStyle;
489 pLeftOver->tFontRef = tFontRef;
490 pLeftOver->usFontSize = usFontSize;
491 pLeftOver->pPrev = NULL;
492 pLeftOver->pNext = NULL;
494 fail(!bCheckDoubleLinkedList(pLeftOver));
495 return pLeftOver;
496 } /* end of pStartNewOutput */
499 * ulGetChar - get the next character from the specified list
501 * returns the next character of EOF
503 static ULONG
504 ulGetChar(FILE *pFile, list_id_enum eListID)
506 const font_block_type *pCurr;
507 ULONG ulChar, ulFileOffset, ulCharPos;
508 row_info_enum eRowInfo;
509 USHORT usChar, usPropMod;
510 BOOL bSkip;
512 fail(pFile == NULL);
514 pCurr = pFontInfo;
515 bSkip = FALSE;
516 for (;;) {
517 usChar = usNextChar(pFile, eListID,
518 &ulFileOffset, &ulCharPos, &usPropMod);
519 if (usChar == (USHORT)EOF) {
520 return (ULONG)EOF;
523 vUpdateCounters();
525 eRowInfo = ePropMod2RowInfo(usPropMod, iWordVersion);
526 if (!bStartRow) {
527 #if 0
528 bStartRow = eRowInfo == found_a_cell ||
529 (pRowInfo != NULL &&
530 ulFileOffset == pRowInfo->ulFileOffsetStart &&
531 eRowInfo != found_not_a_cell);
532 #else
533 bStartRow = pRowInfo != NULL &&
534 ulFileOffset == pRowInfo->ulFileOffsetStart;
535 #endif
536 NO_DBG_HEX_C(bStartRow, pRowInfo->ulFileOffsetStart);
538 if (!bEndRowNorm) {
539 #if 0
540 bEndRow = eRowInfo == found_end_of_row ||
541 (pRowInfo != NULL &&
542 ulFileOffset == pRowInfo->ulFileOffsetEnd &&
543 eRowInfo != found_not_end_of_row);
544 #else
545 bEndRowNorm = pRowInfo != NULL &&
546 ulFileOffset == pRowInfo->ulFileOffsetEnd;
547 #endif
548 NO_DBG_HEX_C(bEndRowNorm, pRowInfo->ulFileOffsetEnd);
550 if (!bEndRowFast) {
551 bEndRowFast = eRowInfo == found_end_of_row;
552 NO_DBG_HEX_C(bEndRowFast, pRowInfo->ulFileOffsetEnd);
555 if (!bStartStyle) {
556 bStartStyle = pStyleInfo != NULL &&
557 ulFileOffset == pStyleInfo->ulFileOffset;
558 NO_DBG_HEX_C(bStartStyle, ulFileOffset);
560 if (pCurr != NULL && ulFileOffset == pCurr->ulFileOffset) {
561 bStartFont = TRUE;
562 NO_DBG_HEX(ulFileOffset);
563 pFontInfo = pCurr;
564 pCurr = pGetNextFontInfoListItem(pCurr);
567 /* Skip embedded characters */
568 if (usChar == START_EMBEDDED) {
569 bSkip = TRUE;
570 continue;
572 if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
573 bSkip = FALSE;
574 continue;
576 if (bSkip) {
577 continue;
579 ulChar = ulTranslateCharacters(usChar,
580 ulFileOffset,
581 iWordVersion,
582 tOptions.eConversionType,
583 tOptions.eEncoding,
584 bOldMacFile);
585 if (ulChar == IGNORE_CHARACTER) {
586 continue;
588 if (ulChar == PICTURE) {
589 ulFileOffsetImage = ulGetPictInfoListItem(ulFileOffset);
590 } else {
591 ulFileOffsetImage = FC_INVALID;
593 if (ulChar == PAR_END) {
594 /* End of paragraph seen, prepare for the next */
595 vFillStyleFromStylesheet(usIstdNext, &tStyleNext);
596 vCorrectStyleValues(&tStyleNext);
597 bStartStyleNext = TRUE;
598 vFillFontFromStylesheet(usIstdNext, &tFontNext);
599 vCorrectFontValues(&tFontNext);
600 bStartFontNext = TRUE;
602 if (ulChar == PAGE_BREAK) {
603 /* Might be the start of a new section */
604 pSectionNext = pGetSectionInfo(pSection, ulCharPos);
606 return ulChar;
608 } /* end of ulGetChar */
611 * lGetWidthMax - get the maximum line width from the paragraph break value
613 * Returns the maximum line width in millipoints
615 static long
616 lGetWidthMax(int iParagraphBreak)
618 fail(iParagraphBreak < 0);
620 if (iParagraphBreak == 0) {
621 return LONG_MAX;
623 if (iParagraphBreak < MIN_SCREEN_WIDTH) {
624 return lChar2MilliPoints(MIN_SCREEN_WIDTH);
626 if (iParagraphBreak > MAX_SCREEN_WIDTH) {
627 return lChar2MilliPoints(MAX_SCREEN_WIDTH);
629 return lChar2MilliPoints(iParagraphBreak);
630 } /* end of lGetWidthMax */
633 * bWordDecryptor - turn Word to something more useful
635 * returns TRUE when succesful, otherwise FALSE
637 BOOL
638 bWordDecryptor(FILE *pFile, long lFilesize, diagram_type *pDiag)
640 imagedata_type tImage;
641 const style_block_type *pStyleTmp;
642 const font_block_type *pFontTmp;
643 const char *szListChar;
644 output_type *pAnchor, *pOutput, *pLeftOver;
645 ULONG ulChar;
646 long lBeforeIndentation, lAfterIndentation;
647 long lLeftIndentation, lLeftIndentation1, lRightIndentation;
648 long lWidthCurr, lWidthMax, lDefaultTabWidth, lHalfSpaceWidth, lTmp;
649 list_id_enum eListID;
650 image_info_enum eRes;
651 UINT uiFootnoteNumber, uiEndnoteNumber, uiTmp;
652 int iListSeqNumber;
653 BOOL bWasTableRow, bTableFontClosed, bWasEndOfParagraph;
654 BOOL bInList, bWasInList, bNoMarks, bFirstLine;
655 BOOL bAllCapitals, bHiddenText, bMarkDelText, bSuccess;
656 USHORT usListNumber;
657 USHORT usFontStyle, usFontStyleMinimal, usFontSize, usTmp;
658 UCHAR ucFontNumber, ucFontColor;
659 UCHAR ucNFC, ucAlignment;
661 fail(pFile == NULL || lFilesize <= 0 || pDiag == NULL);
663 TRACE_MSG("bWordDecryptor");
665 iWordVersion = iInitDocument(pFile, lFilesize);
666 if (iWordVersion < 0) {
667 DBG_DEC(iWordVersion);
668 return FALSE;
671 vGetOptions(&tOptions);
672 bOldMacFile = bIsOldMacFile();
673 vPrepareHdrFtrText(pFile);
674 vPrepareFootnoteText(pFile);
676 vPrologue2(pDiag, iWordVersion);
678 /* Initialisation */
679 #if defined(__riscos)
680 ulCharCounter = 0;
681 iCurrPct = 0;
682 iPrevPct = -1;
683 ulDocumentLength = ulGetDocumentLength();
684 #endif /* __riscos */
685 pSection = pGetSectionInfo(NULL, 0);
686 pSectionNext = pSection;
687 lDefaultTabWidth = lGetDefaultTabWidth();
688 DBG_DEC_C(lDefaultTabWidth != 36000, lDefaultTabWidth);
689 pRowInfo = pGetNextRowInfoListItem();
690 DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetStart);
691 DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetEnd);
692 DBG_MSG_C(pRowInfo == NULL, "No rows at all");
693 bStartRow = FALSE;
694 bEndRowNorm = FALSE;
695 bEndRowFast = FALSE;
696 bIsTableRow = FALSE;
697 bWasTableRow = FALSE;
698 vResetStyles();
699 pStyleInfo = pGetNextTextStyle(NULL);
700 bStartStyle = FALSE;
701 bInList = FALSE;
702 bWasInList = FALSE;
703 iListSeqNumber = 0;
704 usIstdNext = ISTD_NORMAL;
705 pAnchor = NULL;
706 pFontInfo = pGetNextFontInfoListItem(NULL);
707 DBG_HEX_C(pFontInfo != NULL, pFontInfo->ulFileOffset);
708 DBG_MSG_C(pFontInfo == NULL, "No fonts at all");
709 bStartFont = FALSE;
710 ucFontNumber = 0;
711 usFontStyleMinimal = FONT_REGULAR;
712 usFontStyle = FONT_REGULAR;
713 usFontSize = DEFAULT_FONT_SIZE;
714 ucFontColor = FONT_COLOR_DEFAULT;
715 pAnchor = pStartNewOutput(pAnchor, NULL);
716 pOutput = pAnchor;
717 pOutput->ucFontColor = ucFontColor;
718 pOutput->usFontStyle = usFontStyle;
719 pOutput->tFontRef = tOpenFont(ucFontNumber, usFontStyle, usFontSize);
720 pOutput->usFontSize = usFontSize;
721 bTableFontClosed = TRUE;
722 lBeforeIndentation = 0;
723 lAfterIndentation = 0;
724 lLeftIndentation = 0;
725 lLeftIndentation1 = 0;
726 lRightIndentation = 0;
727 bWasEndOfParagraph = TRUE;
728 bNoMarks = TRUE;
729 bFirstLine = TRUE;
730 ucNFC = LIST_BULLETS;
731 if (pStyleInfo != NULL) {
732 szListChar = pStyleInfo->szListChar;
733 pStyleTmp = pStyleInfo;
734 } else {
735 if (tStyleNext.szListChar[0] == '\0') {
736 vGetBulletValue(tOptions.eConversionType,
737 tOptions.eEncoding, tStyleNext.szListChar, 4);
739 szListChar = tStyleNext.szListChar;
740 pStyleTmp = &tStyleNext;
742 usListNumber = 0;
743 ucAlignment = ALIGNMENT_LEFT;
744 bAllCapitals = FALSE;
745 bHiddenText = FALSE;
746 bMarkDelText = FALSE;
747 lWidthMax = lGetWidthMax(tOptions.iParagraphBreak);
748 NO_DBG_DEC(lWidthMax);
750 Hourglass_On();
752 uiFootnoteNumber = 0;
753 uiEndnoteNumber = 0;
754 eListID = text_list;
755 for(;;) {
756 ulChar = ulGetChar(pFile, eListID);
757 if (ulChar == (ULONG)EOF) {
758 if (bOutputContainsText(pAnchor)) {
759 OUTPUT_LINE();
760 } else {
761 RESET_LINE();
763 switch (eListID) {
764 case text_list:
765 if (tOptions.eConversionType !=
766 conversion_xml) {
767 eListID = footnote_list;
768 if (uiFootnoteNumber != 0) {
769 vPutSeparatorLine(pAnchor);
770 OUTPUT_LINE();
771 uiFootnoteNumber = 0;
773 break;
775 /* No break or return */
776 case footnote_list:
777 eListID = endnote_list;
778 if (uiEndnoteNumber != 0) {
779 vPutSeparatorLine(pAnchor);
780 OUTPUT_LINE();
781 uiEndnoteNumber = 0;
783 break;
784 case endnote_list:
785 eListID = textbox_list;
786 if (bExistsTextBox()) {
787 vPutSeparatorLine(pAnchor);
788 OUTPUT_LINE();
790 break;
791 case textbox_list:
792 eListID = hdrtextbox_list;
793 if (bExistsHdrTextBox()) {
794 vPutSeparatorLine(pAnchor);
795 OUTPUT_LINE();
797 break;
798 case hdrtextbox_list:
799 default:
800 eListID = end_of_lists;
801 break;
803 if (eListID == end_of_lists) {
804 break;
806 continue;
809 if (ulChar == UNKNOWN_NOTE_CHAR) {
810 switch (eListID) {
811 case footnote_list:
812 ulChar = FOOTNOTE_CHAR;
813 break;
814 case endnote_list:
815 ulChar = ENDNOTE_CHAR;
816 break;
817 default:
818 break;
822 if (bStartRow) {
823 /* Begin of a tablerow found */
824 if (bOutputContainsText(pAnchor)) {
825 OUTPUT_LINE();
826 } else {
827 RESET_LINE();
829 fail(pAnchor != pOutput);
830 if (bTableFontClosed) {
831 /* Start special table font */
832 vCloseFont();
834 * Compensate for the fact that Word uses
835 * proportional fonts for its tables and we
836 * only one fixed-width font
838 uiTmp = ((UINT)usFontSize * 5 + 3) / 6;
839 if (uiTmp < MIN_TABLEFONT_SIZE) {
840 uiTmp = MIN_TABLEFONT_SIZE;
841 } else if (uiTmp > MAX_TABLEFONT_SIZE) {
842 uiTmp = MAX_TABLEFONT_SIZE;
844 pOutput->usFontSize = (USHORT)uiTmp;
845 pOutput->tFontRef =
846 tOpenTableFont(pOutput->usFontSize);
847 pOutput->usFontStyle = FONT_REGULAR;
848 pOutput->ucFontColor = FONT_COLOR_BLACK;
849 bTableFontClosed = FALSE;
851 bIsTableRow = TRUE;
852 bStartRow = FALSE;
855 if (bWasTableRow &&
856 !bIsTableRow &&
857 ulChar != PAR_END &&
858 ulChar != HARD_RETURN &&
859 ulChar != PAGE_BREAK &&
860 ulChar != COLUMN_FEED) {
862 * The end of a table should be followed by an
863 * empty line, like the end of a paragraph
865 OUTPUT_LINE();
866 vEndOfParagraph(pDiag,
867 pOutput->tFontRef,
868 pOutput->usFontSize,
869 (long)pOutput->usFontSize * 600);
872 switch (ulChar) {
873 case PAGE_BREAK:
874 case COLUMN_FEED:
875 if (bIsTableRow) {
876 /* Ignore when in a table */
877 break;
879 if (bOutputContainsText(pAnchor)) {
880 OUTPUT_LINE();
881 } else {
882 RESET_LINE();
884 if (ulChar == PAGE_BREAK) {
885 vEndOfPage(pDiag, lAfterIndentation,
886 pSection != pSectionNext);
887 } else {
888 vEndOfParagraph(pDiag,
889 pOutput->tFontRef,
890 pOutput->usFontSize,
891 lAfterIndentation);
893 break;
894 default:
895 break;
898 if (bStartFont || (bStartFontNext && ulChar != PAR_END)) {
899 /* Begin of a font found */
900 if (bStartFont) {
901 /* bStartFont takes priority */
902 fail(pFontInfo == NULL);
903 pFontTmp = pFontInfo;
904 } else {
905 pFontTmp = &tFontNext;
907 bAllCapitals = bIsCapitals(pFontTmp->usFontStyle);
908 bHiddenText = bIsHidden(pFontTmp->usFontStyle);
909 bMarkDelText = bIsMarkDel(pFontTmp->usFontStyle);
910 usTmp = pFontTmp->usFontStyle &
911 (FONT_BOLD|FONT_ITALIC|FONT_UNDERLINE|
912 FONT_STRIKE|FONT_MARKDEL|
913 FONT_SUPERSCRIPT|FONT_SUBSCRIPT);
914 if (!bIsTableRow &&
915 (usFontSize != pFontTmp->usFontSize ||
916 ucFontNumber != pFontTmp->ucFontNumber ||
917 usFontStyleMinimal != usTmp ||
918 ucFontColor != pFontTmp->ucFontColor)) {
919 pOutput = pStartNextOutput(pOutput);
920 vCloseFont();
921 pOutput->ucFontColor = pFontTmp->ucFontColor;
922 pOutput->usFontStyle = pFontTmp->usFontStyle;
923 pOutput->usFontSize = pFontTmp->usFontSize;
924 pOutput->tFontRef = tOpenFont(
925 pFontTmp->ucFontNumber,
926 pFontTmp->usFontStyle,
927 pFontTmp->usFontSize);
928 fail(!bCheckDoubleLinkedList(pAnchor));
930 ucFontNumber = pFontTmp->ucFontNumber;
931 usFontSize = pFontTmp->usFontSize;
932 ucFontColor = pFontTmp->ucFontColor;
933 usFontStyle = pFontTmp->usFontStyle;
934 usFontStyleMinimal = usTmp;
935 if (bStartFont) {
936 /* Get the next font info */
937 pFontInfo = pGetNextFontInfoListItem(pFontInfo);
938 NO_DBG_HEX_C(pFontInfo != NULL,
939 pFontInfo->ulFileOffset);
940 DBG_MSG_C(pFontInfo == NULL, "No more fonts");
942 bStartFont = FALSE;
943 bStartFontNext = FALSE;
946 if (bStartStyle || (bStartStyleNext && ulChar != PAR_END)) {
947 bFirstLine = TRUE;
948 /* Begin of a style found */
949 if (bStartStyle) {
950 /* bStartStyle takes priority */
951 fail(pStyleInfo == NULL);
952 pStyleTmp = pStyleInfo;
953 } else {
954 pStyleTmp = &tStyleNext;
956 if (!bIsTableRow) {
957 vStoreStyle(pDiag, pOutput, pStyleTmp);
959 usIstdNext = pStyleTmp->usIstdNext;
960 lBeforeIndentation =
961 lTwips2MilliPoints(pStyleTmp->usBeforeIndent);
962 lAfterIndentation =
963 lTwips2MilliPoints(pStyleTmp->usAfterIndent);
964 lLeftIndentation =
965 lTwips2MilliPoints(pStyleTmp->sLeftIndent);
966 lLeftIndentation1 =
967 lTwips2MilliPoints(pStyleTmp->sLeftIndent1);
968 lRightIndentation =
969 lTwips2MilliPoints(pStyleTmp->sRightIndent);
970 bInList = bStyleImpliesList(pStyleTmp, iWordVersion);
971 bNoMarks = !bInList || pStyleTmp->bNumPause;
972 ucNFC = pStyleTmp->ucNFC;
973 szListChar = pStyleTmp->szListChar;
974 ucAlignment = pStyleTmp->ucAlignment;
975 if (bInList && !bWasInList) {
976 /* Start of a list */
977 iListSeqNumber++;
978 vStartOfList(pDiag, ucNFC,
979 bWasTableRow && !bIsTableRow);
981 if (!bInList && bWasInList) {
982 /* End of a list */
983 vEndOfList(pDiag);
985 bWasInList = bInList;
986 if (bStartStyle) {
987 pStyleInfo = pGetNextTextStyle(pStyleInfo);
988 NO_DBG_HEX_C(pStyleInfo != NULL,
989 pStyleInfo->ulFileOffset);
990 DBG_MSG_C(pStyleInfo == NULL,
991 "No more styles");
993 bStartStyle = FALSE;
994 bStartStyleNext = FALSE;
997 if (bWasEndOfParagraph) {
998 vStartOfParagraph1(pDiag, lBeforeIndentation);
1001 if (!bIsTableRow &&
1002 lTotalStringWidth(pAnchor) == 0) {
1003 if (!bNoMarks) {
1004 usListNumber = usGetListValue(iListSeqNumber,
1005 iWordVersion,
1006 pStyleTmp);
1008 if (bInList && bFirstLine) {
1009 vStartOfListItem(pDiag, bNoMarks);
1011 vPutIndentation(pDiag, pAnchor, bNoMarks, bFirstLine,
1012 usListNumber, ucNFC, szListChar,
1013 lLeftIndentation, lLeftIndentation1);
1014 bFirstLine = FALSE;
1015 /* One number or mark per paragraph will do */
1016 bNoMarks = TRUE;
1019 if (bWasEndOfParagraph) {
1020 vStartOfParagraph2(pDiag);
1021 bWasEndOfParagraph = FALSE;
1024 switch (ulChar) {
1025 case PICTURE:
1026 (void)memset(&tImage, 0, sizeof(tImage));
1027 eRes = eExamineImage(pFile, ulFileOffsetImage, &tImage);
1028 switch (eRes) {
1029 case image_no_information:
1030 bSuccess = FALSE;
1031 break;
1032 case image_minimal_information:
1033 case image_full_information:
1034 #if 0
1035 if (bOutputContainsText(pAnchor)) {
1036 OUTPUT_LINE();
1037 } else {
1038 RESET_LINE();
1040 #endif
1041 bSuccess = bTranslateImage(pDiag, pFile,
1042 eRes == image_minimal_information,
1043 ulFileOffsetImage, &tImage);
1044 break;
1045 default:
1046 DBG_DEC(eRes);
1047 bSuccess = FALSE;
1048 break;
1050 if (!bSuccess) {
1051 vStoreString("[pic]", 5, pOutput);
1053 break;
1054 case FOOTNOTE_CHAR:
1055 uiFootnoteNumber++;
1056 if (tOptions.eConversionType == conversion_xml) {
1057 vStoreCharacter((ULONG)FOOTNOTE_OR_ENDNOTE,
1058 pOutput);
1059 break;
1061 vStoreCharacter((ULONG)'[', pOutput);
1062 vStoreNumberAsDecimal(uiFootnoteNumber, pOutput);
1063 vStoreCharacter((ULONG)']', pOutput);
1064 break;
1065 case ENDNOTE_CHAR:
1066 uiEndnoteNumber++;
1067 vStoreCharacter((ULONG)'[', pOutput);
1068 vStoreNumberAsRoman(uiEndnoteNumber, pOutput);
1069 vStoreCharacter((ULONG)']', pOutput);
1070 break;
1071 case UNKNOWN_NOTE_CHAR:
1072 vStoreString("[?]", 3, pOutput);
1073 break;
1074 case PAR_END:
1075 if (bIsTableRow) {
1076 vStoreCharacter((ULONG)'\n', pOutput);
1077 break;
1079 if (bOutputContainsText(pAnchor)) {
1080 OUTPUT_LINE();
1081 } else {
1082 vMove2NextLine(pDiag,
1083 pOutput->tFontRef, pOutput->usFontSize);
1084 RESET_LINE();
1086 vEndOfParagraph(pDiag,
1087 pOutput->tFontRef,
1088 pOutput->usFontSize,
1089 lAfterIndentation);
1090 bWasEndOfParagraph = TRUE;
1091 break;
1092 case HARD_RETURN:
1093 if (bIsTableRow) {
1094 vStoreCharacter((ULONG)'\n', pOutput);
1095 break;
1097 if (bOutputContainsText(pAnchor)) {
1098 OUTPUT_LINE();
1099 } else {
1100 vMove2NextLine(pDiag,
1101 pOutput->tFontRef, pOutput->usFontSize);
1102 RESET_LINE();
1104 break;
1105 case PAGE_BREAK:
1106 case COLUMN_FEED:
1107 pSection = pSectionNext;
1108 break;
1109 case TABLE_SEPARATOR:
1110 if (bIsTableRow) {
1111 vStoreCharacter(ulChar, pOutput);
1112 break;
1114 vStoreCharacter((ULONG)' ', pOutput);
1115 vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR, pOutput);
1116 break;
1117 case TAB:
1118 if (bIsTableRow ||
1119 tOptions.eConversionType == conversion_xml) {
1120 vStoreCharacter((ULONG)' ', pOutput);
1121 break;
1123 if (tOptions.iParagraphBreak == 0 &&
1124 (tOptions.eConversionType == conversion_text ||
1125 tOptions.eConversionType == conversion_fmt_text)) {
1126 /* No logical lines, so no tab expansion */
1127 vStoreCharacter(TAB, pOutput);
1128 break;
1130 lHalfSpaceWidth = (lComputeSpaceWidth(
1131 pOutput->tFontRef,
1132 pOutput->usFontSize) + 1) / 2;
1133 lTmp = lTotalStringWidth(pAnchor);
1134 lTmp += lDrawUnits2MilliPoints(pDiag->lXleft);
1135 lTmp /= lDefaultTabWidth;
1136 do {
1137 vStoreCharacter((ULONG)FILLER_CHAR, pOutput);
1138 lWidthCurr = lTotalStringWidth(pAnchor);
1139 lWidthCurr +=
1140 lDrawUnits2MilliPoints(pDiag->lXleft);
1141 } while (lTmp == lWidthCurr / lDefaultTabWidth &&
1142 lWidthCurr < lWidthMax + lRightIndentation);
1143 break;
1144 default:
1145 if (bHiddenText && tOptions.bHideHiddenText) {
1146 continue;
1148 if (bMarkDelText && tOptions.bRemoveRemovedText) {
1149 continue;
1151 if (ulChar == UNICODE_ELLIPSIS &&
1152 tOptions.eEncoding != encoding_utf_8) {
1153 vStoreString("...", 3, pOutput);
1154 } else {
1155 if (bAllCapitals) {
1156 ulChar = ulToUpper(ulChar);
1158 vStoreCharacter(ulChar, pOutput);
1160 break;
1163 if (bWasTableRow && !bIsTableRow) {
1164 /* End of a table */
1165 vEndOfTable(pDiag);
1166 /* Resume normal font */
1167 NO_DBG_MSG("End of table font");
1168 vCloseFont();
1169 bTableFontClosed = TRUE;
1170 pOutput->ucFontColor = ucFontColor;
1171 pOutput->usFontStyle = usFontStyle;
1172 pOutput->usFontSize = usFontSize;
1173 pOutput->tFontRef = tOpenFont(
1174 ucFontNumber, usFontStyle, usFontSize);
1176 bWasTableRow = bIsTableRow;
1178 if (bIsTableRow) {
1179 fail(pAnchor != pOutput);
1180 if (!bEndRowNorm && !bEndRowFast) {
1181 continue;
1183 /* End of a table row */
1184 if (bEndRowNorm) {
1185 fail(pRowInfo == NULL);
1186 vTableRow2Window(pDiag, pAnchor, pRowInfo,
1187 tOptions.eConversionType,
1188 tOptions.iParagraphBreak);
1189 } else {
1190 fail(!bEndRowFast);
1192 /* Reset */
1193 pAnchor = pStartNewOutput(pAnchor, NULL);
1194 pOutput = pAnchor;
1195 if (bEndRowNorm) {
1196 pRowInfo = pGetNextRowInfoListItem();
1198 bIsTableRow = FALSE;
1199 bEndRowNorm = FALSE;
1200 bEndRowFast = FALSE;
1201 NO_DBG_HEX_C(pRowInfo != NULL,
1202 pRowInfo->ulFileOffsetStart);
1203 NO_DBG_HEX_C(pRowInfo != NULL,
1204 pRowInfo->ulFileOffsetEnd);
1205 continue;
1207 lWidthCurr = lTotalStringWidth(pAnchor);
1208 lWidthCurr += lDrawUnits2MilliPoints(pDiag->lXleft);
1209 if (lWidthCurr < lWidthMax + lRightIndentation) {
1210 continue;
1212 pLeftOver = pSplitList(pAnchor);
1213 vJustify2Window(pDiag, pAnchor,
1214 lWidthMax, lRightIndentation, ucAlignment);
1215 pAnchor = pStartNewOutput(pAnchor, pLeftOver);
1216 for (pOutput = pAnchor;
1217 pOutput->pNext != NULL;
1218 pOutput = pOutput->pNext)
1219 ; /* EMPTY */
1220 fail(pOutput == NULL);
1221 if (lTotalStringWidth(pAnchor) > 0) {
1222 vSetLeftIndentation(pDiag, lLeftIndentation);
1226 pAnchor = pStartNewOutput(pAnchor, NULL);
1227 pAnchor->szStorage = xfree(pAnchor->szStorage);
1228 pAnchor = xfree(pAnchor);
1229 vCloseFont();
1230 vFreeDocument();
1231 Hourglass_Off();
1232 return TRUE;
1233 } /* end of bWordDecryptor */
1236 * lLastStringWidth - compute the width of the last part of the output string
1238 static long
1239 lLastStringWidth(const output_type *pAnchor)
1241 const output_type *pCurr, *pStart;
1243 pStart = NULL;
1244 for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
1245 if (pCurr->tNextFree == 1 &&
1246 (pCurr->szStorage[0] == PAR_END ||
1247 pCurr->szStorage[0] == HARD_RETURN)) {
1248 /* Found a separator. Start after the separator */
1249 pStart = pCurr->pNext;
1252 if (pStart == NULL) {
1253 /* No separators. Use the whole output string */
1254 pStart = pAnchor;
1256 return lTotalStringWidth(pStart);
1257 } /* end of lLastStringWidth */
1260 * pHdrFtrDecryptor - turn a header/footer list element to something useful
1262 output_type *
1263 pHdrFtrDecryptor(FILE *pFile, ULONG ulCharPosStart, ULONG ulCharPosNext)
1265 output_type *pAnchor, *pOutput, *pLeftOver;
1266 ULONG ulChar, ulFileOffset, ulCharPos;
1267 long lWidthCurr, lWidthMax;
1268 long lRightIndentation;
1269 USHORT usChar;
1270 UCHAR ucAlignment;
1271 BOOL bSkip;
1273 fail(iWordVersion < 0);
1274 fail(tOptions.eConversionType == conversion_unknown);
1275 fail(tOptions.eEncoding == 0);
1277 if (ulCharPosStart == ulCharPosNext) {
1278 /* There are no bytes to decrypt */
1279 return NULL;
1282 lRightIndentation = 0;
1283 ucAlignment = ALIGNMENT_LEFT;
1284 bSkip = FALSE;
1285 lWidthMax = lGetWidthMax(tOptions.iParagraphBreak);
1286 pAnchor = pStartNewOutput(NULL, NULL);
1287 pOutput = pAnchor;
1288 pOutput->tFontRef = tOpenFont(0, FONT_REGULAR, DEFAULT_FONT_SIZE);
1289 usChar = usToHdrFtrPosition(pFile, ulCharPosStart);
1290 ulCharPos = ulCharPosStart;
1291 ulFileOffset = ulCharPos2FileOffset(ulCharPos);
1292 while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext) {
1293 /* Skip embedded characters */
1294 if (usChar == START_EMBEDDED) {
1295 bSkip = TRUE;
1296 } else if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
1297 bSkip = FALSE;
1299 /* Translate character */
1300 if (bSkip || usChar == END_IGNORE || usChar == END_EMBEDDED) {
1301 ulChar = IGNORE_CHARACTER;
1302 } else {
1303 ulChar = ulTranslateCharacters(usChar,
1304 ulFileOffset,
1305 iWordVersion,
1306 tOptions.eConversionType,
1307 tOptions.eEncoding,
1308 bOldMacFile);
1310 /* Process character */
1311 if (ulChar != IGNORE_CHARACTER) {
1312 switch (ulChar) {
1313 case PICTURE:
1314 vStoreString("[pic]", 5, pOutput);
1315 break;
1316 case PAR_END:
1317 case HARD_RETURN:
1318 case PAGE_BREAK:
1319 case COLUMN_FEED:
1320 /* To the next substring */
1321 pOutput = pStartNextOutput(pOutput);
1322 vCloseFont();
1323 pOutput->tFontRef = tOpenFont(0,
1324 FONT_REGULAR, DEFAULT_FONT_SIZE);
1325 /* A substring with just one character */
1326 if (ulChar == HARD_RETURN) {
1327 vStoreCharacter(HARD_RETURN, pOutput);
1328 } else {
1329 vStoreCharacter(PAR_END, pOutput);
1331 /* To the next substring */
1332 pOutput = pStartNextOutput(pOutput);
1333 vCloseFont();
1334 pOutput->tFontRef = tOpenFont(0,
1335 FONT_REGULAR, DEFAULT_FONT_SIZE);
1336 fail(!bCheckDoubleLinkedList(pAnchor));
1337 break;
1338 case TABLE_SEPARATOR:
1339 vStoreCharacter((ULONG)' ', pOutput);
1340 vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR,
1341 pOutput);
1342 break;
1343 case TAB:
1344 vStoreCharacter((ULONG)FILLER_CHAR, pOutput);
1345 break;
1346 default:
1347 vStoreCharacter(ulChar, pOutput);
1348 break;
1351 lWidthCurr = lLastStringWidth(pAnchor);
1352 if (lWidthCurr >= lWidthMax + lRightIndentation) {
1353 pLeftOver = pSplitList(pAnchor);
1354 for (pOutput = pAnchor;
1355 pOutput->pNext != NULL;
1356 pOutput = pOutput->pNext)
1357 ; /* EMPTY */
1358 fail(pOutput == NULL);
1359 /* To the next substring */
1360 pOutput = pStartNextOutput(pOutput);
1361 /* A substring with just one HARD_RETURN */
1362 vStoreCharacter(HARD_RETURN, pOutput);
1363 /* Put the leftover piece(s) at the end */
1364 pOutput->pNext = pLeftOver;
1365 if (pLeftOver != NULL) {
1366 pLeftOver->pPrev = pOutput;
1368 fail(!bCheckDoubleLinkedList(pAnchor));
1369 for (pOutput = pAnchor;
1370 pOutput->pNext != NULL;
1371 pOutput = pOutput->pNext)
1372 ; /* EMPTY */
1373 fail(pOutput == NULL);
1375 usChar = usNextChar(pFile, hdrftr_list,
1376 &ulFileOffset, &ulCharPos, NULL);
1378 vCloseFont();
1379 if (bOutputContainsText(pAnchor)) {
1380 return pAnchor;
1382 pAnchor = pStartNewOutput(pAnchor, NULL);
1383 pAnchor->szStorage = xfree(pAnchor->szStorage);
1384 pAnchor = xfree(pAnchor);
1385 return NULL;
1386 } /* end of pHdrFtrDecryptor */
1389 * pFootnoteDecryptor - turn a footnote text list element into text
1391 char *
1392 szFootnoteDecryptor(FILE *pFile, ULONG ulCharPosStart, ULONG ulCharPosNext)
1394 char *szText;
1395 ULONG ulChar, ulFileOffset, ulCharPos;
1396 USHORT usChar;
1397 size_t tLen, tIndex, tNextFree, tStorageSize;
1398 char szResult[6];
1399 BOOL bSkip;
1401 fail(iWordVersion < 0);
1402 fail(tOptions.eConversionType == conversion_unknown);
1403 fail(tOptions.eEncoding == 0);
1405 if (ulCharPosStart == ulCharPosNext) {
1406 /* There are no bytes to decrypt */
1407 return NULL;
1410 if (tOptions.eConversionType != conversion_xml) {
1411 /* Only implemented for XML output */
1412 return NULL;
1415 bSkip = FALSE;
1417 /* Initialise the text buffer */
1418 tStorageSize = INITIAL_SIZE;
1419 szText = xmalloc(tStorageSize);
1420 tNextFree = 0;
1421 szText[tNextFree] = '\0';
1423 /* Goto the start */
1424 usChar = usToFootnotePosition(pFile, ulCharPosStart);
1425 ulCharPos = ulCharPosStart;
1426 ulFileOffset = ulCharPos2FileOffset(ulCharPos);
1427 /* Skip the unwanted starting characters */
1428 while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext &&
1429 (usChar == FOOTNOTE_OR_ENDNOTE ||
1430 usChar == PAR_END ||
1431 usChar == TAB ||
1432 usChar == (USHORT)' ')) {
1433 usChar = usNextChar(pFile, footnote_list,
1434 &ulFileOffset, &ulCharPos, NULL);
1436 /* Process the footnote text */
1437 while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext) {
1438 /* Skip embedded characters */
1439 if (usChar == START_EMBEDDED) {
1440 bSkip = TRUE;
1441 } else if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
1442 bSkip = FALSE;
1444 /* Translate character */
1445 if (bSkip ||
1446 usChar == END_IGNORE ||
1447 usChar == END_EMBEDDED ||
1448 usChar == FOOTNOTE_OR_ENDNOTE) {
1449 ulChar = IGNORE_CHARACTER;
1450 } else {
1451 ulChar = ulTranslateCharacters(usChar,
1452 ulFileOffset,
1453 iWordVersion,
1454 tOptions.eConversionType,
1455 tOptions.eEncoding,
1456 bOldMacFile);
1458 /* Process character */
1459 if (ulChar == PICTURE) {
1460 tLen = 5;
1461 strcpy(szResult, "[pic]");
1462 } else if (ulChar == IGNORE_CHARACTER) {
1463 tLen = 0;
1464 szResult[0] = '\0';
1465 } else {
1466 switch (ulChar) {
1467 case PAR_END:
1468 case HARD_RETURN:
1469 case PAGE_BREAK:
1470 case COLUMN_FEED:
1471 ulChar = (ULONG)PAR_END;
1472 break;
1473 case TAB:
1474 ulChar = (ULONG)' ';
1475 break;
1476 default:
1477 break;
1479 tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult));
1481 /* Add the results to the text */
1482 if (tNextFree + tLen + 1 > tStorageSize) {
1483 tStorageSize += EXTENTION_SIZE;
1484 szText = xrealloc(szText, tStorageSize);
1486 for (tIndex = 0; tIndex < tLen; tIndex++) {
1487 szText[tNextFree++] = szResult[tIndex];
1489 szText[tNextFree] = '\0';
1490 /* Next character */
1491 usChar = usNextChar(pFile, footnote_list,
1492 &ulFileOffset, &ulCharPos, NULL);
1494 /* Remove redundant spaces */
1495 while (tNextFree != 0 && szText[tNextFree - 1] == ' ') {
1496 szText[tNextFree - 1] = '\0';
1497 tNextFree--;
1499 if (tNextFree == 0) {
1500 /* No text */
1501 szText = xfree(szText);
1502 return NULL;
1504 return szText;
1505 } /* end of szFootnoteDecryptor */