Imported from antiword-0.34.tar.gz.
[antiword.git] / misc.c
blob5f319307f9f42c21d81a4fd3be11e56c5fb4a5fd
1 /*
2 * misc.c
3 * Copyright (C) 1998-2003 A.J. van Os; Released under GNU GPL
5 * Description:
6 * Miscellaneous functions
7 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <time.h>
14 #if defined(__riscos)
15 #include "kernel.h"
16 #include "swis.h"
17 #else
18 #include <errno.h>
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #endif /* __riscos */
22 #if defined(__dos)
23 #define S_ISDIR(x) (((x) & S_IFMT) == S_IFDIR)
24 #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
25 #endif /* __dos */
26 #include "antiword.h"
29 #if !defined(__riscos)
31 * szGetHomeDirectory - get the name of the home directory
33 const char *
34 szGetHomeDirectory(void)
36 const char *szHome;
38 #if defined(__vms)
39 #include <unixlib.h>
40 szHome = decc$translate_vms(getenv("HOME"));
41 #else
42 szHome = getenv("HOME");
43 #endif /* __vms */
45 if (szHome == NULL || szHome[0] == '\0') {
46 #if defined(__dos)
47 szHome = "C:";
48 #else
49 werr(0, "I can't find the name of your HOME directory");
50 szHome = "";
51 #endif /* __dos */
53 return szHome;
54 } /* end of szGetHomeDirectory */
57 * szGetAntiwordDirectory - get the name of the Antiword directory
59 const char *
60 szGetAntiwordDirectory(void)
62 #if defined(__vms)
63 #include <unixlib.h>
64 return decc$translate_vms(getenv("ANTIWORDHOME"));
65 #else
66 return getenv("ANTIWORDHOME");
67 #endif /* __vms */
68 } /* end of szGetHomeDirectory */
69 #endif /* !__riscos */
72 * Get the size of the specified file.
73 * Returns -1 if the file does not exist or is not a proper file.
75 long
76 lGetFilesize(const char *szFilename)
78 #if defined(__riscos)
79 _kernel_swi_regs regs;
80 _kernel_oserror *e;
82 (void)memset(&regs, 0, sizeof(regs));
83 regs.r[0] = 17;
84 regs.r[1] = (int)szFilename;
85 e = _kernel_swi(OS_File, &regs, &regs);
86 if (e != NULL) {
87 werr(0, "Get Filesize error %d: %s",
88 e->errnum, e->errmess);
89 return -1;
91 if (regs.r[0] != 1) {
92 /* It's not a proper file or the file does not exist */
93 return -1;
95 return (long)regs.r[4];
96 #else
97 struct stat tBuffer;
99 if (stat(szFilename, &tBuffer) != 0) {
100 werr(0, "Get Filesize error %d", errno);
101 return -1;
103 if (!S_ISREG(tBuffer.st_mode)) {
104 /* It's not a regular file */
105 return -1;
107 return (long)tBuffer.st_size;
108 #endif /* __riscos */
109 } /* end of lGetFilesize */
111 #if defined(DEBUG)
112 void
113 vPrintBlock(const char *szFile, int iLine,
114 const UCHAR *aucBlock, size_t tLength)
116 int i, j;
118 fail(szFile == NULL || iLine < 0 || aucBlock == NULL);
120 fprintf(stderr, "%s[%3d]:\n", szFile, iLine);
121 for (i = 0; i < 32; i++) {
122 if (16 * i >= (int)tLength) {
123 return;
125 fprintf(stderr, "%03x: ", (unsigned int)(16 * i));
126 for (j = 0; j < 16; j++) {
127 if (16 * i + j < (int)tLength) {
128 fprintf(stderr, "%02x ",
129 (unsigned int)aucBlock[16 * i + j]);
132 fprintf(stderr, "\n");
134 } /* end of vPrintBlock */
136 void
137 vPrintUnicode(const char *szFile, int iLine, const UCHAR *aucUni, size_t tLen)
139 char *szASCII;
141 fail(tLen % 2 != 0);
143 tLen /= 2; /* Length in bytes to length in characters */
144 szASCII = xmalloc(tLen + 1);
145 (void)unincpy(szASCII, aucUni, tLen);
146 szASCII[tLen] = '\0';
147 (void)fprintf(stderr, "%s[%3d]: %.*s\n",
148 szFile, iLine, (int)tLen, szASCII);
149 szASCII = xfree(szASCII);
150 } /* end of vPrintUnicode */
152 BOOL
153 bCheckDoubleLinkedList(output_type *pAnchor)
155 output_type *pCurr, *pLast;
156 int iInList;
158 pLast = pAnchor;
159 iInList = 0;
160 for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
161 pLast = pCurr;
162 iInList++;
164 NO_DBG_DEC(iInList);
165 for (pCurr = pLast; pCurr != NULL; pCurr = pCurr->pPrev) {
166 pLast = pCurr;
167 iInList--;
169 DBG_DEC_C(iInList != 0, iInList);
170 return pAnchor == pLast && iInList == 0;
171 } /* end of bCheckDoubleLinkedList */
172 #endif /* DEBUG */
175 * bReadBytes
176 * This function reads the specified number of bytes from the specified file,
177 * starting from the specified offset.
178 * Returns TRUE when successfull, otherwise FALSE
180 BOOL
181 bReadBytes(UCHAR *aucBytes, size_t tMemb, ULONG ulOffset, FILE *pFile)
183 fail(aucBytes == NULL || pFile == NULL || ulOffset > (ULONG)LONG_MAX);
185 if (ulOffset > (ULONG)LONG_MAX) {
186 return FALSE;
188 if (fseek(pFile, (long)ulOffset, SEEK_SET) != 0) {
189 return FALSE;
191 if (fread(aucBytes, sizeof(UCHAR), tMemb, pFile) != tMemb) {
192 return FALSE;
194 return TRUE;
195 } /* end of bReadBytes */
198 * bReadBuffer
199 * This function fills the specified buffer with the specified number of bytes,
200 * starting at the specified offset within the Big/Small Block Depot.
202 * Returns TRUE when successful, otherwise FALSE
204 BOOL
205 bReadBuffer(FILE *pFile, ULONG ulStartBlock,
206 const ULONG *aulBlockDepot, size_t tBlockDepotLen, size_t tBlockSize,
207 UCHAR *aucBuffer, ULONG ulOffset, size_t tToRead)
209 ULONG ulBegin, ulIndex;
210 size_t tLen;
212 fail(pFile == NULL);
213 fail(ulStartBlock > MAX_BLOCKNUMBER && ulStartBlock != END_OF_CHAIN);
214 fail(aulBlockDepot == NULL);
215 fail(tBlockSize != BIG_BLOCK_SIZE && tBlockSize != SMALL_BLOCK_SIZE);
216 fail(aucBuffer == NULL);
217 fail(tToRead == 0);
219 for (ulIndex = ulStartBlock;
220 ulIndex != END_OF_CHAIN && tToRead != 0;
221 ulIndex = aulBlockDepot[ulIndex]) {
222 if (ulIndex >= (ULONG)tBlockDepotLen) {
223 DBG_DEC(ulIndex);
224 DBG_DEC(tBlockDepotLen);
225 if (tBlockSize >= BIG_BLOCK_SIZE) {
226 werr(1, "The Big Block Depot is damaged");
227 } else {
228 werr(1, "The Small Block Depot is damaged");
231 if (ulOffset >= (ULONG)tBlockSize) {
232 ulOffset -= tBlockSize;
233 continue;
235 ulBegin = ulDepotOffset(ulIndex, tBlockSize) + ulOffset;
236 tLen = min(tBlockSize - (size_t)ulOffset, tToRead);
237 ulOffset = 0;
238 if (!bReadBytes(aucBuffer, tLen, ulBegin, pFile)) {
239 werr(0, "Read big block 0x%lx not possible", ulBegin);
240 return FALSE;
242 aucBuffer += tLen;
243 tToRead -= tLen;
245 DBG_DEC_C(tToRead != 0, tToRead);
246 return tToRead == 0;
247 } /* end of bReadBuffer */
250 * Translate a Word colornumber into a true color for use in a drawfile
252 * Returns the true color
254 ULONG
255 ulColor2Color(UCHAR ucFontColor)
257 static const ULONG aulColorTable[] = {
258 /* 0 */ 0x00000000UL, /* Automatic */
259 /* 1 */ 0x00000000UL, /* Black */
260 /* 2 */ 0xff000000UL, /* Blue */
261 /* 3 */ 0xffff0000UL, /* Turquoise */
262 /* 4 */ 0x00ff0000UL, /* Bright Green */
263 /* 5 */ 0xff00ff00UL, /* Pink */
264 /* 6 */ 0x0000ff00UL, /* Red */
265 /* 7 */ 0x00ffff00UL, /* Yellow */
266 /* 8 */ 0xffffff00UL, /* White */
267 /* 9 */ 0x80000000UL, /* Dark Blue */
268 /* 10 */ 0x80800000UL, /* Teal */
269 /* 11 */ 0x00800000UL, /* Green */
270 /* 12 */ 0x80008000UL, /* Violet */
271 /* 13 */ 0x00008000UL, /* Dark Red */
272 /* 14 */ 0x00808000UL, /* Dark Yellow */
273 /* 15 */ 0x80808000UL, /* Gray 50% */
274 /* 16 */ 0xc0c0c000UL, /* Gray 25% */
276 if ((size_t)ucFontColor >= elementsof(aulColorTable)) {
277 return aulColorTable[0];
279 return aulColorTable[(int)ucFontColor];
280 } /* end of ulColor2Color */
283 * iFindSplit - find a place to split the string
285 * returns the index of the split character or -1 if no split found.
287 static int
288 iFindSplit(const char *szString, size_t tStringLen)
290 size_t tSplit;
292 if (tStringLen == 0) {
293 return -1;
295 tSplit = tStringLen - 1;
296 while (tSplit >= 1) {
297 if (szString[tSplit] == ' ' ||
298 (szString[tSplit] == '-' && szString[tSplit - 1] != ' ')) {
299 return (int)tSplit;
301 tSplit--;
303 return -1;
304 } /* end of iFindSplit */
307 * pSplitList - split the specified list in a printable part and a leftover part
309 * returns the pointer to the leftover part
311 output_type *
312 pSplitList(output_type *pAnchor)
314 output_type *pCurr, *pLeftOver;
315 int iIndex;
317 fail(pAnchor == NULL);
319 for (pCurr = pAnchor; pCurr->pNext != NULL; pCurr = pCurr->pNext)
320 ; /* EMPTY */
321 iIndex = -1;
322 for (; pCurr != NULL; pCurr = pCurr->pPrev) {
323 iIndex = iFindSplit(pCurr->szStorage, pCurr->tNextFree);
324 if (iIndex >= 0) {
325 break;
329 if (pCurr == NULL || iIndex < 0) {
330 /* No split, no leftover */
331 return NULL;
333 /* Split over the iIndex-th character */
334 NO_DBG_MSG("pLeftOver");
335 pLeftOver = xmalloc(sizeof(*pLeftOver));
336 fail(pCurr->tNextFree < (size_t)iIndex);
337 pLeftOver->tStorageSize = pCurr->tNextFree - (size_t)iIndex;
338 pLeftOver->szStorage = xmalloc(pLeftOver->tStorageSize);
339 pLeftOver->tNextFree = pCurr->tNextFree - (size_t)iIndex - 1;
340 (void)strncpy(pLeftOver->szStorage,
341 pCurr->szStorage + iIndex + 1, pLeftOver->tNextFree);
342 pLeftOver->szStorage[pLeftOver->tNextFree] = '\0';
343 NO_DBG_MSG(pLeftOver->szStorage);
344 pLeftOver->ucFontColor = pCurr->ucFontColor;
345 pLeftOver->usFontStyle = pCurr->usFontStyle;
346 pLeftOver->tFontRef = pCurr->tFontRef;
347 pLeftOver->usFontSize = pCurr->usFontSize;
348 pLeftOver->lStringWidth = lComputeStringWidth(
349 pLeftOver->szStorage,
350 pLeftOver->tNextFree,
351 pLeftOver->tFontRef,
352 pLeftOver->usFontSize);
353 pLeftOver->pPrev = NULL;
354 pLeftOver->pNext = pCurr->pNext;
355 if (pLeftOver->pNext != NULL) {
356 pLeftOver->pNext->pPrev = pLeftOver;
358 fail(!bCheckDoubleLinkedList(pLeftOver));
360 NO_DBG_MSG("pAnchor");
361 NO_DBG_HEX(pCurr->szStorage[iIndex]);
362 while (iIndex >= 0 && isspace((int)(UCHAR)pCurr->szStorage[iIndex])) {
363 iIndex--;
365 pCurr->tNextFree = (size_t)iIndex + 1;
366 pCurr->szStorage[pCurr->tNextFree] = '\0';
367 NO_DBG_MSG(pCurr->szStorage);
368 pCurr->lStringWidth = lComputeStringWidth(
369 pCurr->szStorage,
370 pCurr->tNextFree,
371 pCurr->tFontRef,
372 pCurr->usFontSize);
373 pCurr->pNext = NULL;
374 fail(!bCheckDoubleLinkedList(pAnchor));
376 return pLeftOver;
377 } /* end of pSplitList */
380 * tNumber2Roman - convert a number to Roman Numerals
382 * returns the number of characters written
384 size_t
385 tNumber2Roman(UINT uiNumber, BOOL bUpperCase, char *szOutput)
387 char *outp, *p, *q;
388 UINT uiNextVal, uiValue;
390 fail(szOutput == NULL);
392 uiNumber %= 4000; /* Very high numbers can't be represented */
393 if (uiNumber == 0) {
394 szOutput[0] = '\0';
395 return 0;
398 outp = szOutput;
399 p = bUpperCase ? "M\2D\5C\2L\5X\2V\5I" : "m\2d\5c\2l\5x\2v\5i";
400 uiValue = 1000;
401 for (;;) {
402 while (uiNumber >= uiValue) {
403 *outp++ = *p;
404 uiNumber -= uiValue;
406 if (uiNumber == 0) {
407 *outp = '\0';
408 fail(outp < szOutput);
409 return (size_t)(outp - szOutput);
411 q = p + 1;
412 uiNextVal = uiValue / (UINT)(UCHAR)*q;
413 if ((int)*q == 2) { /* magic */
414 uiNextVal /= (UINT)(UCHAR)*(q += 2);
416 if (uiNumber + uiNextVal >= uiValue) {
417 *outp++ = *++q;
418 uiNumber += uiNextVal;
419 } else {
420 p++;
421 uiValue /= (UINT)(UCHAR)(*p++);
424 } /* end of tNumber2Roman */
427 * iNumber2Alpha - convert a number to alphabetic "numbers"
429 * returns the number of characters written
431 size_t
432 tNumber2Alpha(UINT uiNumber, BOOL bUpperCase, char *szOutput)
434 char *outp;
435 UINT uiTmp;
437 fail(szOutput == NULL);
439 if (uiNumber == 0) {
440 szOutput[0] = '\0';
441 return 0;
444 outp = szOutput;
445 uiTmp = (UINT)(bUpperCase ? 'A': 'a');
446 if (uiNumber <= 26) {
447 uiNumber -= 1;
448 *outp++ = (char)(uiTmp + uiNumber);
449 } else if (uiNumber <= 26U + 26U*26U) {
450 uiNumber -= 26 + 1;
451 *outp++ = (char)(uiTmp + uiNumber / 26);
452 *outp++ = (char)(uiTmp + uiNumber % 26);
453 } else if (uiNumber <= 26U + 26U*26U + 26U*26U*26U) {
454 uiNumber -= 26 + 26*26 + 1;
455 *outp++ = (char)(uiTmp + uiNumber / (26*26));
456 *outp++ = (char)(uiTmp + uiNumber / 26 % 26);
457 *outp++ = (char)(uiTmp + uiNumber % 26);
459 *outp = '\0';
460 fail(outp < szOutput);
461 return (size_t)(outp - szOutput);
462 } /* end of tNumber2Alpha */
465 * unincpy - copy a counted Unicode string to an single-byte string
467 char *
468 unincpy(char *s1, const UCHAR *s2, size_t n)
470 char *dest;
471 ULONG ulChar;
472 size_t tLen;
473 USHORT usUni;
475 for (dest = s1, tLen = 0; tLen < n; dest++, tLen++) {
476 usUni = usGetWord(tLen * 2, s2);
477 if (usUni == 0) {
478 break;
480 ulChar = ulTranslateCharacters(usUni, 0, 8,
481 conversion_unknown, encoding_neutral, FALSE);
482 if (ulChar == IGNORE_CHARACTER) {
483 ulChar = (ULONG)'?';
485 *dest = (char)ulChar;
487 for (; tLen < n; tLen++) {
488 *dest++ = '\0';
490 return s1;
491 } /* end of unincpy */
494 * unilen - calculate the length of a Unicode string
496 * returns the length in bytes
498 size_t
499 unilen(const UCHAR *s)
501 size_t tLen;
502 USHORT usUni;
504 tLen = 0;
505 for (;;) {
506 usUni = usGetWord(tLen, s);
507 if (usUni == 0) {
508 return tLen;
510 tLen += 2;
512 } /* end of unilen */
515 * szBaseName - get the basename of the specified filename
517 const char *
518 szBasename(const char *szFilename)
520 const char *szTmp;
522 fail(szFilename == NULL);
524 if (szFilename == NULL || szFilename[0] == '\0') {
525 return "null";
528 szTmp = strrchr(szFilename, FILE_SEPARATOR[0]);
529 if (szTmp == NULL) {
530 return szFilename;
532 return ++szTmp;
533 } /* end of szBasename */
536 * lComputeLeading - compute the leading
538 * NOTE: the fontsize is specified in half points
540 * Returns the leading in drawunits
542 long
543 lComputeLeading(USHORT usFontSize)
545 long lLeading;
547 lLeading = (long)usFontSize * 500L;
548 if (usFontSize < 18) { /* Small text: 112% */
549 lLeading *= 112;
550 } else if (usFontSize < 28) { /* Normal text: 124% */
551 lLeading *= 124;
552 } else if (usFontSize < 48) { /* Small headlines: 104% */
553 lLeading *= 104;
554 } else { /* Large headlines: 100% */
555 lLeading *= 100;
557 lLeading = lMilliPoints2DrawUnits(lLeading);
558 lLeading += 50;
559 lLeading /= 100;
560 return lLeading;
561 } /* end of lComputeLeading */
564 * Convert a UCS character to an UTF-8 string
566 * Returns the string length of the result
568 size_t
569 tUcs2Utf8(ULONG ulChar, char *szResult, size_t tMaxResultLen)
571 if (szResult == NULL || tMaxResultLen == 0) {
572 return 0;
575 if (ulChar < 0x80 && tMaxResultLen >= 2) {
576 szResult[0] = (char)ulChar;
577 szResult[1] = '\0';
578 return 1;
580 if (ulChar < 0x800 && tMaxResultLen >= 3) {
581 szResult[0] = (char)(0xc0 | ulChar >> 6);
582 szResult[1] = (char)(0x80 | (ulChar & 0x3f));
583 szResult[2] = '\0';
584 return 2;
586 if (ulChar < 0x10000 && tMaxResultLen >= 4) {
587 szResult[0] = (char)(0xe0 | ulChar >> 12);
588 szResult[1] = (char)(0x80 | (ulChar >> 6 & 0x3f));
589 szResult[2] = (char)(0x80 | (ulChar & 0x3f));
590 szResult[3] = '\0';
591 return 3;
593 if (ulChar < 0x200000 && tMaxResultLen >= 5) {
594 szResult[0] = (char)(0xf0 | ulChar >> 18);
595 szResult[1] = (char)(0x80 | (ulChar >> 12 & 0x3f));
596 szResult[2] = (char)(0x80 | (ulChar >> 6 & 0x3f));
597 szResult[3] = (char)(0x80 | (ulChar & 0x3f));
598 szResult[4] = '\0';
599 return 4;
601 szResult[0] = '\0';
602 return 0;
603 } /* end of tUcs2Utf8 */
606 * vGetBulletValue - get the bullet value for the conversing type and encoding
608 void
609 vGetBulletValue(conversion_type eConversionType, encoding_type eEncoding,
610 char *szResult, size_t tMaxResultLen)
612 fail(szResult == NULL);
613 fail(tMaxResultLen < 2);
615 if (eEncoding == encoding_utf8) {
616 (void)tUcs2Utf8(UNICODE_BULLET, szResult, tMaxResultLen);
617 } else if (eEncoding == encoding_iso_8859_1 &&
618 eConversionType == conversion_ps) {
619 szResult[0] = OUR_BULLET_PS;
620 szResult[1] = '\0';
621 } else {
622 szResult[0] = OUR_BULLET_TEXT;
623 szResult[1] = '\0';
625 } /* end of vGetBulletValue */
628 * bAllZero - are all bytes zero?
630 BOOL
631 bAllZero(const UCHAR *aucBytes, size_t tLength)
633 size_t tIndex;
635 if (aucBytes == NULL || tLength == 0) {
636 return TRUE;
639 for (tIndex = 0; tIndex < tLength; tIndex++) {
640 if (aucBytes[tIndex] != 0) {
641 return FALSE;
644 return TRUE;
645 } /* end of bAllZero */