Imported from antiword-0.37.tar.gz.
[antiword.git] / stylelist.c
blob09b5ab5a55b4ae5ec19cbc6a32984660481bcdd4
1 /*
2 * stylelist.c
3 * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
5 * Description:
6 * Build, read and destroy a list of Word style information
7 */
9 #include <stdlib.h>
10 #include <stddef.h>
11 #include <ctype.h>
12 #include "antiword.h"
16 * Private structure to hide the way the information
17 * is stored from the rest of the program
19 typedef struct style_mem_tag {
20 style_block_type tInfo;
21 ULONG ulSequenceNumber;
22 struct style_mem_tag *pNext;
23 } style_mem_type;
25 /* Variables needed to write the Style Information List */
26 static style_mem_type *pAnchor = NULL;
27 static style_mem_type *pStyleLast = NULL;
28 /* The type of conversion */
29 static conversion_type eConversionType = conversion_unknown;
30 /* The character set encoding */
31 static encoding_type eEncoding = encoding_neutral;
32 /* Values for efficiency reasons */
33 static const style_mem_type *pMidPtr = NULL;
34 static BOOL bMoveMidPtr = FALSE;
35 static BOOL bInSequence = TRUE;
39 * vDestroyStyleInfoList - destroy the Style Information List
41 void
42 vDestroyStyleInfoList(void)
44 style_mem_type *pCurr, *pNext;
46 DBG_MSG("vDestroyStyleInfoList");
48 /* Free the Style Information List */
49 pCurr = pAnchor;
50 while (pCurr != NULL) {
51 pNext = pCurr->pNext;
52 pCurr = xfree(pCurr);
53 pCurr = pNext;
55 pAnchor = NULL;
56 /* Reset all control variables */
57 pStyleLast = NULL;
58 pMidPtr = NULL;
59 bMoveMidPtr = FALSE;
60 bInSequence = TRUE;
61 } /* end of vDestroyStyleInfoList */
64 * vConvertListCharacter - convert the list character
66 static void
67 vConvertListCharacter(UCHAR ucNFC, USHORT usListChar, char *szListChar)
69 options_type tOptions;
70 size_t tLen;
72 fail(szListChar == NULL);
73 fail(szListChar[0] != '\0');
75 if (usListChar < 0x80 && isprint((int)usListChar)) {
76 DBG_CHR_C(isalnum((int)usListChar), usListChar);
77 szListChar[0] = (char)usListChar;
78 szListChar[1] = '\0';
79 return;
82 if (ucNFC != LIST_SPECIAL &&
83 ucNFC != LIST_SPECIAL2 &&
84 ucNFC != LIST_BULLETS) {
85 szListChar[0] = '.';
86 szListChar[1] = '\0';
87 return;
90 if (eConversionType == conversion_unknown ||
91 eEncoding == encoding_neutral) {
92 vGetOptions(&tOptions);
93 eConversionType = tOptions.eConversionType;
94 eEncoding = tOptions.eEncoding;
97 switch (usListChar) {
98 case 0x0000: case 0x00b7: case 0x00fe: case 0xf021: case 0xf043:
99 case 0xf06c: case 0xf093: case 0xf0b7:
100 usListChar = 0x2022; /* BULLET */
101 break;
102 case 0x0096: case 0xf02d:
103 usListChar = 0x2013; /* EN DASH */
104 break;
105 case 0x00a8:
106 usListChar = 0x2666; /* BLACK DIAMOND SUIT */
107 break;
108 case 0x00de:
109 usListChar = 0x21d2; /* RIGHTWARDS DOUBLE ARROW */
110 break;
111 case 0x00e0: case 0xf074:
112 usListChar = 0x25ca; /* LOZENGE */
113 break;
114 case 0x00e1:
115 usListChar = 0x2329; /* LEFT ANGLE BRACKET */
116 break;
117 case 0xf020:
118 usListChar = 0x0020; /* SPACE */
119 break;
120 case 0xf041:
121 usListChar = 0x270c; /* VICTORY HAND */
122 break;
123 case 0xf066:
124 usListChar = 0x03d5; /* GREEK PHI SYMBOL */
125 break;
126 case 0xf06e:
127 usListChar = 0x25a0; /* BLACK SQUARE */
128 break;
129 case 0xf06f: case 0xf070: case 0xf0a8:
130 usListChar = 0x25a1; /* WHITE SQUARE */
131 break;
132 case 0xf071:
133 usListChar = 0x2751; /* LOWER RIGHT SHADOWED WHITE SQUARE */
134 break;
135 case 0xf075: case 0xf077:
136 usListChar = 0x25c6; /* BLACK DIAMOND */
137 break;
138 case 0xf076:
139 usListChar = 0x2756; /* BLACK DIAMOND MINUS WHITE X */
140 break;
141 case 0xf0a7:
142 usListChar = 0x25aa; /* BLACK SMALL SQUARE */
143 break;
144 case 0xf0d8:
145 usListChar = 0x27a2; /* RIGHTWARDS ARROWHEAD */
146 break;
147 case 0xf0e5:
148 usListChar = 0x2199; /* SOUTH WEST ARROW */
149 break;
150 case 0xf0f0:
151 usListChar = 0x21e8; /* RIGHTWARDS WHITE ARROW */
152 break;
153 case 0xf0fc:
154 usListChar = 0x2713; /* CHECK MARK */
155 break;
156 default:
157 if ((usListChar >= 0xe000 && usListChar < 0xf900) ||
158 (usListChar < 0x80 && !isprint((int)usListChar))) {
160 * All remaining private area characters and all
161 * remaining non-printable ASCII characters to their
162 * default bullet character
164 DBG_HEX(usListChar);
165 DBG_FIXME();
166 if (ucNFC == LIST_SPECIAL || ucNFC == LIST_SPECIAL2) {
167 usListChar = 0x2190; /* LEFTWARDS ARROW */
168 } else {
169 usListChar = 0x2022; /* BULLET */
172 break;
175 if (eEncoding == encoding_utf_8) {
176 tLen = tUcs2Utf8(usListChar, szListChar, 4);
177 szListChar[tLen] = '\0';
178 } else {
179 switch (usListChar) {
180 case 0x03d5: case 0x25a1: case 0x25c6: case 0x25ca:
181 case 0x2751:
182 szListChar[0] = 'o';
183 break;
184 case 0x2013: case 0x2500:
185 szListChar[0] = '-';
186 break;
187 case 0x2190: case 0x2199: case 0x2329:
188 szListChar[0] = '<';
189 break;
190 case 0x21d2:
191 szListChar[0] = '=';
192 break;
193 case 0x21e8: case 0x27a2:
194 szListChar[0] = '>';
195 break;
196 case 0x25a0: case 0x25aa:
197 szListChar[0] = '.';
198 break;
199 case 0x2666:
200 szListChar[0] = OUR_DIAMOND;
201 break;
202 case 0x270c:
203 szListChar[0] = 'x';
204 break;
205 case 0x2713:
206 szListChar[0] = 'V';
207 break;
208 case 0x2756:
209 szListChar[0] = '*';
210 break;
211 case 0x2022:
212 default:
213 vGetBulletValue(eConversionType, eEncoding,
214 szListChar, 2);
215 break;
217 tLen = 1;
219 szListChar[tLen] = '\0';
220 } /* end of vConvertListCharacter */
223 * eGetNumType - get the level type from the given level number
225 * Returns the level type
227 level_type_enum
228 eGetNumType(UCHAR ucNumLevel)
230 switch (ucNumLevel) {
231 case 1: case 2: case 3: case 4: case 5:
232 case 6: case 7: case 8: case 9:
233 return level_type_outline;
234 case 10:
235 return level_type_numbering;
236 case 11:
237 return level_type_sequence;
238 case 12:
239 return level_type_pause;
240 default:
241 return level_type_none;
243 } /* end of eGetNumType */
246 * vCorrectStyleValues - correct style values that Antiword can't use
248 void
249 vCorrectStyleValues(style_block_type *pStyleBlock)
251 if (pStyleBlock->usBeforeIndent > 0x7fff) {
252 pStyleBlock->usBeforeIndent = 0;
253 } else if (pStyleBlock->usBeforeIndent > 2160) {
254 /* 2160 twips = 1.5 inches or 38.1 mm */
255 DBG_DEC(pStyleBlock->usBeforeIndent);
256 pStyleBlock->usBeforeIndent = 2160;
258 if (pStyleBlock->usIstd >= 1 &&
259 pStyleBlock->usIstd <= 9 &&
260 pStyleBlock->usBeforeIndent < HEADING_GAP) {
261 NO_DBG_DEC(pStyleBlock->usBeforeIndent);
262 pStyleBlock->usBeforeIndent = HEADING_GAP;
265 if (pStyleBlock->usAfterIndent > 0x7fff) {
266 pStyleBlock->usAfterIndent = 0;
267 } else if (pStyleBlock->usAfterIndent > 2160) {
268 /* 2160 twips = 1.5 inches or 38.1 mm */
269 DBG_DEC(pStyleBlock->usAfterIndent);
270 pStyleBlock->usAfterIndent = 2160;
272 if (pStyleBlock->usIstd >= 1 &&
273 pStyleBlock->usIstd <= 9 &&
274 pStyleBlock->usAfterIndent < HEADING_GAP) {
275 NO_DBG_DEC(pStyleBlock->usAfterIndent);
276 pStyleBlock->usAfterIndent = HEADING_GAP;
279 if (pStyleBlock->sLeftIndent < 0) {
280 pStyleBlock->sLeftIndent = 0;
282 if (pStyleBlock->sRightIndent > 0) {
283 pStyleBlock->sRightIndent = 0;
285 vConvertListCharacter(pStyleBlock->ucNFC,
286 pStyleBlock->usListChar,
287 pStyleBlock->szListChar);
288 } /* end of vCorrectStyleValues */
291 * vAdd2StyleInfoList - Add an element to the Style Information List
293 void
294 vAdd2StyleInfoList(const style_block_type *pStyleBlock)
296 style_mem_type *pListMember;
298 fail(pStyleBlock == NULL);
300 NO_DBG_MSG("bAdd2StyleInfoList");
302 if (pStyleBlock->ulFileOffset == FC_INVALID) {
303 NO_DBG_DEC(pStyleBlock->usIstd);
304 return;
307 NO_DBG_HEX(pStyleBlock->ulFileOffset);
308 NO_DBG_DEC_C(pStyleBlock->sLeftIndent != 0,
309 pStyleBlock->sLeftIndent);
310 NO_DBG_DEC_C(pStyleBlock->sRightIndent != 0,
311 pStyleBlock->sRightIndent);
312 NO_DBG_DEC_C(pStyleBlock->bNumPause, pStyleBlock->bNumPause);
313 NO_DBG_DEC_C(pStyleBlock->usIstd != 0, pStyleBlock->usIstd);
314 NO_DBG_DEC_C(pStyleBlock->usStartAt != 1, pStyleBlock->usStartAt);
315 NO_DBG_DEC_C(pStyleBlock->usAfterIndent != 0,
316 pStyleBlock->usAfterIndent);
317 NO_DBG_DEC_C(pStyleBlock->ucAlignment != 0, pStyleBlock->ucAlignment);
318 NO_DBG_DEC(pStyleBlock->ucNFC);
319 NO_DBG_HEX(pStyleBlock->usListChar);
321 if (pStyleLast != NULL &&
322 pStyleLast->tInfo.ulFileOffset == pStyleBlock->ulFileOffset) {
324 * If two consecutive styles share the same
325 * offset, remember only the last style
327 fail(pStyleLast->pNext != NULL);
328 pStyleLast->tInfo = *pStyleBlock;
329 /* Correct the values where needed */
330 vCorrectStyleValues(&pStyleLast->tInfo);
331 return;
334 /* Create list member */
335 pListMember = xmalloc(sizeof(style_mem_type));
336 /* Fill the list member */
337 pListMember->tInfo = *pStyleBlock;
338 pListMember->pNext = NULL;
339 /* Add the sequence number */
340 pListMember->ulSequenceNumber =
341 ulGetSeqNumber(pListMember->tInfo.ulFileOffset);
342 /* Correct the values where needed */
343 vCorrectStyleValues(&pListMember->tInfo);
344 /* Add the new member to the list */
345 if (pAnchor == NULL) {
346 pAnchor = pListMember;
347 /* For efficiency */
348 pMidPtr = pAnchor;
349 bMoveMidPtr = FALSE;
350 bInSequence = TRUE;
351 } else {
352 fail(pStyleLast == NULL);
353 pStyleLast->pNext = pListMember;
354 /* For efficiency */
355 if (bMoveMidPtr) {
356 pMidPtr = pMidPtr->pNext;
357 bMoveMidPtr = FALSE;
358 } else {
359 bMoveMidPtr = TRUE;
361 if (bInSequence) {
362 bInSequence = pListMember->ulSequenceNumber >
363 pStyleLast->ulSequenceNumber;
366 pStyleLast = pListMember;
367 } /* end of vAdd2StyleInfoList */
370 * Get the record that follows the given recored in the Style Information List
372 const style_block_type *
373 pGetNextStyleInfoListItem(const style_block_type *pCurr)
375 const style_mem_type *pRecord;
376 size_t tOffset;
378 if (pCurr == NULL) {
379 if (pAnchor == NULL) {
380 /* There are no records */
381 return NULL;
383 /* The first record is the only one without a predecessor */
384 return &pAnchor->tInfo;
386 tOffset = offsetof(style_mem_type, tInfo);
387 /* Many casts to prevent alignment warnings */
388 pRecord = (style_mem_type *)(void *)((char *)pCurr - tOffset);
389 fail(pCurr != &pRecord->tInfo);
390 if (pRecord->pNext == NULL) {
391 /* The last record has no successor */
392 return NULL;
394 return &pRecord->pNext->tInfo;
395 } /* end of pGetNextStyleInfoListItem */
398 * Get the next text style
400 const style_block_type *
401 pGetNextTextStyle(const style_block_type *pCurr)
403 const style_block_type *pRecord;
405 pRecord = pCurr;
406 do {
407 pRecord = pGetNextStyleInfoListItem(pRecord);
408 } while (pRecord != NULL &&
409 (pRecord->eListID == hdrftr_list ||
410 pRecord->eListID == macro_list ||
411 pRecord->eListID == annotation_list));
412 return pRecord;
413 } /* end of pGetNextTextStyle */
416 * usGetIstd - get the istd that belongs to the given file offset
418 USHORT
419 usGetIstd(ULONG ulFileOffset)
421 const style_mem_type *pCurr, *pBest, *pStart;
422 ULONG ulSeq, ulBest;
424 ulSeq = ulGetSeqNumber(ulFileOffset);
425 if (ulSeq == FC_INVALID) {
426 return ISTD_NORMAL;
428 NO_DBG_HEX(ulFileOffset);
429 NO_DBG_DEC(ulSeq);
431 if (bInSequence &&
432 pMidPtr != NULL &&
433 ulSeq > pMidPtr->ulSequenceNumber) {
434 /* The istd is in the second half of the chained list */
435 pStart = pMidPtr;
436 } else {
437 pStart = pAnchor;
440 pBest = NULL;
441 ulBest = 0;
442 for (pCurr = pStart; pCurr != NULL; pCurr = pCurr->pNext) {
443 if (pCurr->ulSequenceNumber != FC_INVALID &&
444 (pBest == NULL || pCurr->ulSequenceNumber > ulBest) &&
445 pCurr->ulSequenceNumber <= ulSeq) {
446 pBest = pCurr;
447 ulBest = pCurr->ulSequenceNumber;
449 if (bInSequence && pCurr->ulSequenceNumber > ulSeq) {
450 break;
453 NO_DBG_DEC(ulBest);
455 if (pBest == NULL) {
456 return ISTD_NORMAL;
459 NO_DBG_DEC(pBest->tInfo.usIstd);
460 return pBest->tInfo.usIstd;
461 } /* end of usGetIstd */
464 * bStyleImpliesList - does style info implies being part of a list
466 * Decide whether the style information implies that the given paragraph is
467 * part of a list
469 * Returns TRUE when the paragraph is part of a list, otherwise FALSE
471 BOOL
472 bStyleImpliesList(const style_block_type *pStyle, int iWordVersion)
474 fail(pStyle == NULL);
475 fail(iWordVersion < 0);
477 if (pStyle->usIstd >= 1 && pStyle->usIstd <= 9) {
478 /* These are heading levels */
479 return FALSE;
481 if (iWordVersion < 8) {
482 /* Check for old style lists */
483 return pStyle->ucNumLevel != 0;
485 /* Check for new style lists */
486 return pStyle->usListIndex != 0;
487 } /* end of bStyleImpliesList */