prop0.c

   1 /*
   2  * prop0.c
   3  * Copyright (C) 2002-2004 A.J. van Os; Released under GNU GPL
   4  *
   5  * Description:
   6  * Read the property information from a Word for DOS file
   7  */
   8
   9 #include <string.h>
  10 #include <time.h>
  11 #include "antiword.h"
  12
  13
  14 /*
  15  * tConvertDosDate - convert DOS date format
  16  *
  17  * returns Unix time_t or -1
  18  */
  19 static time_t
  20 tConvertDosDate(const char *szDosDate)
  21 {
  22         struct tm       tTime;
  23         const char      *pcTmp;
  24         time_t          tResult;
  25
  26         memset(&tTime, 0, sizeof(tTime));
  27         pcTmp = szDosDate;
  28         /* Get the month */
  29         if (!isdigit(*pcTmp)) {
  30                 return (time_t)-1;
  31         }
  32         tTime.tm_mon = (int)(*pcTmp - '0');
  33         pcTmp++;
  34         if (isdigit(*pcTmp)) {
  35                 tTime.tm_mon *= 10;
  36                 tTime.tm_mon += (int)(*pcTmp - '0');
  37                 pcTmp++;
  38         }
  39         /* Get the first separater */
  40         if (isalnum(*pcTmp)) {
  41                 return (time_t)-1;
  42         }
  43         pcTmp++;
  44         /* Get the day */
  45         if (!isdigit(*pcTmp)) {
  46                 return (time_t)-1;
  47         }
  48         tTime.tm_mday = (int)(*pcTmp - '0');
  49         pcTmp++;
  50         if (isdigit(*pcTmp)) {
  51                 tTime.tm_mday *= 10;
  52                 tTime.tm_mday += (int)(*pcTmp - '0');
  53                 pcTmp++;
  54         }
  55         /* Get the second separater */
  56         if (isalnum(*pcTmp)) {
  57                 return (time_t)-1;
  58         }
  59         pcTmp++;
  60         /* Get the year */
  61         if (!isdigit(*pcTmp)) {
  62                 return (time_t)-1;
  63         }
  64         tTime.tm_year = (int)(*pcTmp - '0');
  65         pcTmp++;
  66         if (isdigit(*pcTmp)) {
  67                 tTime.tm_year *= 10;
  68                 tTime.tm_year += (int)(*pcTmp - '0');
  69                 pcTmp++;
  70         }
  71         /* Check the values */
  72         if (tTime.tm_mon == 0 || tTime.tm_mday == 0 || tTime.tm_mday > 31) {
  73                 return (time_t)-1;
  74         }
  75         /* Correct the values */
  76         tTime.tm_mon--;         /* From 01-12 to 00-11 */
  77         if (tTime.tm_year < 80) {
  78                 tTime.tm_year += 100;   /* 00 means 2000 is 100 */
  79         }
  80         tTime.tm_isdst = -1;
  81         tResult = mktime(&tTime);
  82         NO_DBG_MSG(ctime(&tResult));
  83         return tResult;
  84 } /* end of tConvertDosDate */
  85
  86 /*
  87  * Build the lists with Document Property Information for Word for DOS files
  88  */
  89 void
  90 vGet0DopInfo(FILE *pFile, const UCHAR *aucHeader)
  91 {
  92         document_block_type     tDocument;
  93         UCHAR   *aucBuffer;
  94         ULONG   ulBeginSumdInfo, ulBeginNextBlock;
  95         size_t  tLen;
  96         USHORT  usOffset;
  97
  98         tDocument.ucHdrFtrSpecification = 0;
  99         tDocument.usDefaultTabWidth = usGetWord(0x70, aucHeader); /* dxaTab */
 100         tDocument.tCreateDate = (time_t)-1;
 101         tDocument.tRevisedDate = (time_t)-1;
 102
 103         ulBeginSumdInfo = 128 * (ULONG)usGetWord(0x1c, aucHeader);
 104         DBG_HEX(ulBeginSumdInfo);
 105         ulBeginNextBlock = 128 * (ULONG)usGetWord(0x6a, aucHeader);
 106         DBG_HEX(ulBeginNextBlock);
 107
 108         if (ulBeginSumdInfo < ulBeginNextBlock && ulBeginNextBlock != 0) {
 109                 /* There is a summary information block */
 110                 tLen = (size_t)(ulBeginNextBlock - ulBeginSumdInfo);
 111                 aucBuffer = xmalloc(tLen);
 112                 /* Read the summary information block */
 113                 if (bReadBytes(aucBuffer, tLen, ulBeginSumdInfo, pFile)) {
 114                         usOffset = usGetWord(12, aucBuffer);
 115                         if (aucBuffer[usOffset] != 0) {
 116                                 NO_DBG_STRN(aucBuffer + usOffset, 8);
 117                                 tDocument.tRevisedDate =
 118                                 tConvertDosDate((char *)aucBuffer + usOffset);
 119                         }
 120                         usOffset = usGetWord(14, aucBuffer);
 121                         if (aucBuffer[usOffset] != 0) {
 122                                 NO_DBG_STRN(aucBuffer + usOffset, 8);
 123                                 tDocument.tCreateDate =
 124                                 tConvertDosDate((char *)aucBuffer + usOffset);
 125                         }
 126                 }
 127                 aucBuffer = xfree(aucBuffer);
 128         }
 129         vCreateDocumentInfoList(&tDocument);
 130 } /* end of vGet0DopInfo */
 131
 132 /*
 133  * Fill the section information block with information
 134  * from a Word for DOS file.
 135  */
 136 static void
 137 vGet0SectionInfo(const UCHAR *aucGrpprl, size_t tBytes,
 138                 section_block_type *pSection)
 139 {
 140         USHORT  usCcol;
 141         UCHAR   ucTmp;
 142
 143         fail(aucGrpprl == NULL || pSection == NULL);
 144
 145         if (tBytes < 2) {
 146                 return;
 147         }
 148         /* bkc */
 149         ucTmp = ucGetByte(1, aucGrpprl);
 150         DBG_HEX(ucTmp);
 151         ucTmp &= 0x07;
 152         DBG_HEX(ucTmp);
 153         pSection->bNewPage = ucTmp != 0 && ucTmp != 1;
 154         if (tBytes < 18) {
 155                 return;
 156         }
 157         /* ccolM1 */
 158         usCcol = (USHORT)ucGetByte(17, aucGrpprl);
 159         DBG_DEC(usCcol);
 160 } /* end of vGet0SectionInfo */
 161
 162 /*
 163  * Build the lists with Section Property Information for Word for DOS files
 164  */
 165 void
 166 vGet0SepInfo(FILE *pFile, const UCHAR *aucHeader)
 167 {
 168         section_block_type      tSection;
 169         UCHAR   *aucBuffer;
 170         ULONG   ulBeginOfText, ulTextOffset, ulBeginSectInfo;
 171         ULONG   ulCharPos, ulSectPage, ulBeginNextBlock;
 172         size_t  tSectInfoLen, tIndex, tSections, tBytes;
 173         UCHAR   aucTmp[2], aucFpage[35];
 174
 175         fail(pFile == NULL || aucHeader == NULL);
 176
 177         ulBeginOfText = 128;
 178         NO_DBG_HEX(ulBeginOfText);
 179         ulBeginSectInfo = 128 * (ULONG)usGetWord(0x18, aucHeader);
 180         DBG_HEX(ulBeginSectInfo);
 181         ulBeginNextBlock = 128 * (ULONG)usGetWord(0x1a, aucHeader);
 182         DBG_HEX(ulBeginNextBlock);
 183         if (ulBeginSectInfo == ulBeginNextBlock) {
 184                 /* There is no section information block */
 185                 return;
 186         }
 187
 188         /* Get the the number of sections */
 189         if (!bReadBytes(aucTmp, 2, ulBeginSectInfo, pFile)) {
 190                 return;
 191         }
 192         tSections = (size_t)usGetWord(0, aucTmp);
 193         NO_DBG_DEC(tSections);
 194
 195         /* Read the Section Descriptors */
 196         tSectInfoLen = 10 * tSections;
 197         NO_DBG_DEC(tSectInfoLen);
 198         aucBuffer = xmalloc(tSectInfoLen);
 199         if (!bReadBytes(aucBuffer, tSectInfoLen, ulBeginSectInfo + 4, pFile)) {
 200                 aucBuffer = xfree(aucBuffer);
 201                 return;
 202         }
 203         NO_DBG_PRINT_BLOCK(aucBuffer, tSectInfoLen);
 204
 205         /* Read the Section Properties */
 206         for (tIndex = 0; tIndex < tSections; tIndex++) {
 207                 ulTextOffset = ulGetLong(10 * tIndex, aucBuffer);
 208                 NO_DBG_HEX(ulTextOffset);
 209                 ulCharPos = ulBeginOfText + ulTextOffset;
 210                 NO_DBG_HEX(ulTextOffset);
 211                 ulSectPage = ulGetLong(10 * tIndex + 6, aucBuffer);
 212                 NO_DBG_HEX(ulSectPage);
 213                 if (ulSectPage == FC_INVALID ||         /* Must use defaults */
 214                     ulSectPage < 128 ||                 /* Should not happen */
 215                     ulSectPage >= ulBeginSectInfo) {    /* Should not happen */
 216                         DBG_HEX_C(ulSectPage != FC_INVALID, ulSectPage);
 217                         vDefault2SectionInfoList(ulCharPos);
 218                         continue;
 219                 }
 220                 /* Get the number of bytes to read */
 221                 if (!bReadBytes(aucTmp, 1, ulSectPage, pFile)) {
 222                         continue;
 223                 }
 224                 tBytes = 1 + (size_t)ucGetByte(0, aucTmp);
 225                 NO_DBG_DEC(tBytes);
 226                 if (tBytes > sizeof(aucFpage)) {
 227                         DBG_DEC(tBytes);
 228                         tBytes = sizeof(aucFpage);
 229                 }
 230                 /* Read the bytes */
 231                 if (!bReadBytes(aucFpage, tBytes, ulSectPage, pFile)) {
 232                         continue;
 233                 }
 234                 NO_DBG_PRINT_BLOCK(aucFpage, tBytes);
 235                 /* Process the bytes */
 236                 vGetDefaultSection(&tSection);
 237                 vGet0SectionInfo(aucFpage + 1, tBytes - 1, &tSection);
 238                 vAdd2SectionInfoList(&tSection, ulCharPos);
 239         }
 240         /* Clean up before you leave */
 241         aucBuffer = xfree(aucBuffer);
 242 } /* end of vGet0SepInfo */
 243
 244 /*
 245  * Fill the style information block with information
 246  * from a Word for DOS file.
 247  */
 248 static void
 249 vGet0StyleInfo(int iFodo, const UCHAR *aucGrpprl, style_block_type *pStyle)
 250 {
 251         int     iBytes;
 252         UCHAR   ucTmp;
 253
 254         fail(iFodo <= 0 || aucGrpprl == NULL || pStyle == NULL);
 255
 256         pStyle->usIstdNext = ISTD_NORMAL;
 257
 258         iBytes = (int)ucGetByte(iFodo, aucGrpprl);
 259         if (iBytes < 1) {
 260                 return;
 261         }
 262         /* stc if styled */
 263         ucTmp = ucGetByte(iFodo + 1, aucGrpprl);
 264         if ((ucTmp & BIT(0)) != 0) {
 265                 ucTmp >>= 1;
 266                 if (ucTmp >= 88 && ucTmp <= 94) {
 267                         /* Header levels 1 through 7 */
 268                         pStyle->usIstd = ucTmp - 87;
 269                         pStyle->ucNumLevel = 1;
 270                 }
 271         }
 272         if (iBytes < 2) {
 273                 return;
 274         }
 275         /* jc */
 276         ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
 277         pStyle->ucAlignment = ucTmp & 0x02;
 278         if (iBytes < 3) {
 279                 return;
 280         }
 281         /* stc */
 282         ucTmp = ucGetByte(iFodo + 3, aucGrpprl);
 283         ucTmp &= 0x7f;
 284         if (ucTmp >= 88 && ucTmp <= 94) {
 285                 /* Header levels 1 through 7 */
 286                 pStyle->usIstd = ucTmp - 87;
 287                 pStyle->ucNumLevel = 1;
 288         }
 289         if (iBytes < 6) {
 290                 return;
 291         }
 292         /* dxaRight */
 293         pStyle->sRightIndent = (short)usGetWord(iFodo + 5, aucGrpprl);
 294         NO_DBG_DEC(pStyle->sRightIndent);
 295         if (iBytes < 8) {
 296                 return;
 297         }
 298         /* dxaLeft */
 299         pStyle->sLeftIndent = (short)usGetWord(iFodo + 7, aucGrpprl);
 300         NO_DBG_DEC(pStyle->sLeftIndent);
 301         if (iBytes < 10) {
 302                 return;
 303         }
 304         /* dxaLeft1 */
 305         pStyle->sLeftIndent1 = (short)usGetWord(iFodo + 9, aucGrpprl);
 306         NO_DBG_DEC(pStyle->sLeftIndent1);
 307         if (iBytes < 14) {
 308                 return;
 309         }
 310         /* dyaBefore */
 311         pStyle->usBeforeIndent = usGetWord(iFodo + 13, aucGrpprl);
 312         NO_DBG_DEC(pStyle->usBeforeIndent);
 313         if (iBytes < 16) {
 314                 return;
 315         }
 316         /* dyaAfter */
 317         pStyle->usAfterIndent = usGetWord(iFodo + 15, aucGrpprl);
 318         NO_DBG_DEC(pStyle->usAfterIndent);
 319 } /* end of vGet0StyleInfo */
 320
 321 /*
 322  * Build the lists with Paragraph Information for Word for DOS files
 323  */
 324 void
 325 vGet0PapInfo(FILE *pFile, const UCHAR *aucHeader)
 326 {
 327         style_block_type        tStyle;
 328         ULONG   ulBeginParfInfo, ulCharPos, ulCharPosNext;
 329         int     iIndex, iRun, iFodo;
 330         UCHAR   aucFpage[128];
 331
 332         fail(pFile == NULL || aucHeader == NULL);
 333
 334         ulBeginParfInfo = 128 * (ULONG)usGetWord(0x12, aucHeader);
 335         NO_DBG_HEX(ulBeginParfInfo);
 336
 337         do {
 338                 if (!bReadBytes(aucFpage, 128, ulBeginParfInfo, pFile)) {
 339                         return;
 340                 }
 341                 NO_DBG_PRINT_BLOCK(aucFpage, 128);
 342                 ulCharPosNext = ulGetLong(0, aucFpage);
 343                 iRun = (int)ucGetByte(0x7f, aucFpage);
 344                 NO_DBG_DEC(iRun);
 345                 for (iIndex = 0; iIndex < iRun; iIndex++) {
 346                         iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
 347                         if (iFodo <= 0 || iFodo > 0x79) {
 348                                 DBG_DEC_C(iFodo != (int)0xffff, iFodo);
 349                                 continue;
 350                         }
 351                         vFillStyleFromStylesheet(0, &tStyle);
 352                         vGet0StyleInfo(iFodo, aucFpage + 4, &tStyle);
 353                         ulCharPos = ulCharPosNext;
 354                         ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
 355                         tStyle.ulFileOffset = ulCharPos;
 356                         vAdd2StyleInfoList(&tStyle);
 357                 }
 358                 ulBeginParfInfo += 128;
 359         } while (ulCharPosNext == ulBeginParfInfo);
 360 } /* end of vGet0PapInfo */
 361
 362 /*
 363  * Fill the font information block with information
 364  * from a Word for DOS file.
 365  */
 366 static void
 367 vGet0FontInfo(int iFodo, const UCHAR *aucGrpprl, font_block_type *pFont)
 368 {
 369         int     iBytes;
 370         UCHAR   ucTmp;
 371
 372         fail(iFodo <= 0 || aucGrpprl == NULL || pFont == NULL);
 373
 374         iBytes = (int)ucGetByte(iFodo, aucGrpprl);
 375         if (iBytes < 2) {
 376                 return;
 377         }
 378         /* fBold, fItalic, cFtc */
 379         ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
 380         if ((ucTmp & BIT(0)) != 0) {
 381                 pFont->usFontStyle |= FONT_BOLD;
 382         }
 383         if ((ucTmp & BIT(1)) != 0) {
 384                 pFont->usFontStyle |= FONT_ITALIC;
 385         }
 386         pFont->ucFontNumber = ucTmp >> 2;
 387         NO_DBG_DEC(pFont->ucFontNumber);
 388         if (iBytes < 3) {
 389                 return;
 390         }
 391         /* cHps */
 392         pFont->usFontSize = (USHORT)ucGetByte(iFodo + 3, aucGrpprl);
 393         NO_DBG_DEC(pFont->usFontSize);
 394         if (iBytes < 4) {
 395                 return;
 396         }
 397         /* cKul, fStrike, fCaps, fSmallCaps, fVanish */
 398         ucTmp = ucGetByte(iFodo + 4, aucGrpprl);
 399         if ((ucTmp & BIT(0)) != 0 || (ucTmp & BIT(2)) != 0) {
 400                 pFont->usFontStyle |= FONT_UNDERLINE;
 401         }
 402         if ((ucTmp & BIT(1)) != 0) {
 403                 pFont->usFontStyle |= FONT_STRIKE;
 404         }
 405         if ((ucTmp & BIT(4)) != 0) {
 406                 pFont->usFontStyle |= FONT_CAPITALS;
 407         }
 408         if ((ucTmp & BIT(5)) != 0) {
 409                 pFont->usFontStyle |= FONT_SMALL_CAPITALS;
 410         }
 411         if ((ucTmp & BIT(7)) != 0) {
 412                 pFont->usFontStyle |= FONT_HIDDEN;
 413         }
 414         DBG_HEX(pFont->usFontStyle);
 415         if (iBytes < 6) {
 416                 return;
 417         }
 418         /* cIss */
 419         ucTmp = ucGetByte(iFodo + 6, aucGrpprl);
 420         if (ucTmp != 0) {
 421                 if (ucTmp < 128) {
 422                         pFont->usFontStyle |= FONT_SUPERSCRIPT;
 423                         DBG_MSG("Superscript");
 424                 } else {
 425                         pFont->usFontStyle |= FONT_SUBSCRIPT;
 426                         DBG_MSG("Subscript");
 427                 }
 428         }
 429         if (iBytes < 7) {
 430                 return;
 431         }
 432         /* cIco */
 433         ucTmp = ucGetByte(iFodo + 7, aucGrpprl);
 434         switch (ucTmp & 0x07) {
 435         case 0: pFont->ucFontColor = FONT_COLOR_BLACK; break;
 436         case 1: pFont->ucFontColor = FONT_COLOR_RED; break;
 437         case 2: pFont->ucFontColor = FONT_COLOR_GREEN; break;
 438         case 3: pFont->ucFontColor = FONT_COLOR_BLUE; break;
 439         case 4: pFont->ucFontColor = FONT_COLOR_CYAN; break;
 440         case 5: pFont->ucFontColor = FONT_COLOR_MAGENTA; break;
 441         case 6: pFont->ucFontColor = FONT_COLOR_YELLOW; break;
 442         case 7: pFont->ucFontColor = FONT_COLOR_WHITE; break;
 443         default:pFont->ucFontColor = FONT_COLOR_BLACK; break;
 444         }
 445         NO_DBG_DEC(pFont->ucFontColor);
 446 } /* end of vGet0FontInfo */
 447
 448 /*
 449  * Build the lists with Character Information for Word for DOS files
 450  */
 451 void
 452 vGet0ChrInfo(FILE *pFile, const UCHAR *aucHeader)
 453 {
 454         font_block_type         tFont;
 455         ULONG   ulBeginCharInfo, ulCharPos, ulCharPosNext;
 456         int     iIndex, iRun, iFodo;
 457         UCHAR   aucFpage[128];
 458
 459         fail(pFile == NULL || aucHeader == NULL);
 460
 461         ulBeginCharInfo = ulGetLong(0x0e, aucHeader);
 462         NO_DBG_HEX(ulBeginCharInfo);
 463         ulBeginCharInfo = ROUND128(ulBeginCharInfo);
 464         NO_DBG_HEX(ulBeginCharInfo);
 465
 466         do {
 467                 if (!bReadBytes(aucFpage, 128, ulBeginCharInfo, pFile)) {
 468                         return;
 469                 }
 470                 NO_DBG_PRINT_BLOCK(aucFpage, 128);
 471                 ulCharPosNext = ulGetLong(0, aucFpage);
 472                 iRun = (int)ucGetByte(0x7f, aucFpage);
 473                 NO_DBG_DEC(iRun);
 474                 for (iIndex = 0; iIndex < iRun; iIndex++) {
 475                         iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
 476                         if (iFodo <= 0 || iFodo > 0x79) {
 477                                 DBG_DEC_C(iFodo != (int)0xffff, iFodo);
 478                                 continue;
 479                         }
 480                         vFillFontFromStylesheet(0, &tFont);
 481                         vGet0FontInfo(iFodo, aucFpage + 4, &tFont);
 482                         ulCharPos = ulCharPosNext;
 483                         ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
 484                         tFont.ulFileOffset = ulCharPos;
 485                         vAdd2FontInfoList(&tFont);
 486                 }
 487                 ulBeginCharInfo += 128;
 488         } while (ulCharPosNext == ulBeginCharInfo);
 489 } /* end of vGet0ChrInfo */