mimelib/dw_date.cpp

   1 //=============================================================================
   2 // File:       dw_date.cpp
   3 // Contents:   Date parsing function
   4 // Maintainer: Doug Sauder <dwsauder@fwb.gulf.net>
   5 // WWW:        http://www.fwb.gulf.net/~dwsauder/mimepp.html
   6 // $Revision$
   7 // $Date$
   8 //
   9 // Copyright (c) 1996, 1997 Douglas W. Sauder
  10 // All rights reserved.
  11 //
  12 // IN NO EVENT SHALL DOUGLAS W. SAUDER BE LIABLE TO ANY PARTY FOR DIRECT,
  13 // INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
  14 // THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF DOUGLAS W. SAUDER
  15 // HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  16 //
  17 // DOUGLAS W. SAUDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT
  18 // NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  19 // PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
  20 // BASIS, AND DOUGLAS W. SAUDER HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
  21 // SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  22 //
  23 //=============================================================================
  24
  25 /*
  26  * For maximum code reuse, the functions in this file are written in C.
  27  */
  28
  29 #include <mimelib/config.h>
  30 #include <mimelib/debug.h>
  31 #include <ctype.h>
  32 #include <time.h>
  33
  34
  35 static int CommentLength(const char *str)
  36 {
  37     int ch, pos, level, quoteNext, done, len;
  38
  39     level = 0;
  40     quoteNext = 0;
  41     pos = 0;
  42     len = 0;
  43     ch = str[pos];
  44     done = 0;
  45     while (1) {
  46         switch (ch) {
  47         case 0:
  48             len = pos;
  49             done = 1;
  50             break;
  51         case '\\':
  52             quoteNext = 1;
  53             break;
  54         case '(':
  55             if (!quoteNext) {
  56                 ++level;
  57             }
  58             quoteNext = 0;
  59             break;
  60         case ')':
  61             if (!quoteNext) {
  62                 --level;
  63                 if (level == 0) {
  64                     len = pos + 1;
  65                     done = 1;
  66                 }
  67             }
  68             quoteNext = 0;
  69             break;
  70         default:
  71             quoteNext = 0;
  72         }
  73         if (done) {
  74             break;
  75         }
  76         ++pos;
  77         ch = str[pos];
  78     }
  79     return len;
  80 }
  81
  82
  83 /*
  84  * ParseRfc822Date() -- Parse a date in RFC-822 (RFC-1123) format
  85  *
  86  * If the parsing succeeds:
  87  *  - tms is set to contain the year, month, day, hour, minute, and second
  88  *  - z is set to contain the time zone in minutes offset from UTC
  89  *  - 0 is returned
  90  * If the parsing fails:
  91  *  - (-1) is returned
  92  *  - the information in tms and z is undefined
  93  */
  94 #ifdef __cplusplus
  95 extern "C"
  96 #endif
  97 int ParseRfc822Date(const char *str, struct tm *tms, int *z)
  98 {
  99     int pos, ch, n, sgn, numZoneDigits;
 100     int day=1, month=0, year=1970, hour=0, minute=0, second=0, zone=0;
 101     int isValid = 1;
 102
 103     if (!str) {
 104         return -1;
 105     }
 106     /*
 107      * Ignore optional day of the week.
 108      */
 109
 110     /*
 111      * Day -- one or two digits
 112      */
 113     /* -- skip over non-digits */
 114     pos = 0;
 115     ch = str[pos];
 116     while (ch && !('0' <= ch && ch <= '9')) {
 117         if (ch == '(') {
 118             pos += CommentLength(&str[pos]);
 119         }
 120         else {
 121             ++pos;
 122         }
 123         ch = str[pos];
 124     }
 125     /* -- convert next one or two digits */
 126     n = -1;
 127     if ('0' <= ch && ch <= '9') {
 128         n = ch - '0';
 129         ++pos;
 130         ch = str[pos];
 131     }
 132     if ('0' <= ch && ch <= '9') {
 133         n *= 10;
 134         n += ch - '0';
 135         ++pos;
 136         ch = str[pos];
 137     }
 138     if (1 <= n && n <= 31) {
 139         day = n;
 140     }
 141     else {
 142         isValid = 0;
 143     }
 144     /*
 145      * Month.  Use case-insensitive string compare for added robustness
 146      */
 147     /* -- skip over chars to first possible month char */
 148     while (ch && !('A' <= ch && ch <= 'S') && !('a' <= ch && ch <= 's')) {
 149         if (ch == '(') {
 150             pos += CommentLength(&str[pos]);
 151         }
 152         else {
 153             ++pos;
 154         }
 155         ch = str[pos];
 156     }
 157     /* -- convert the month name */
 158     n = -1;
 159     switch (ch) {
 160     case 'A':
 161     case 'a':
 162         /* Apr */
 163         if ((str[pos+1] == 'p' || str[pos+1] == 'P')
 164             && (str[pos+2] == 'r' || str[pos+2] == 'R')) {
 165             n = 3;
 166             pos += 3;
 167             ch = str[pos];
 168         }
 169         /* Aug */
 170         else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
 171             && (str[pos+2] == 'g' || str[pos+2] == 'G')) {
 172             n = 7;
 173             pos += 3;
 174             ch = str[pos];
 175         }
 176         break;
 177     case 'D':
 178     case 'd':
 179         /* Dec */
 180         if ((str[pos+1] == 'e' || str[pos+1] == 'E')
 181             && (str[pos+2] == 'c' || str[pos+2] == 'C')) {
 182             n = 11;
 183             pos += 3;
 184             ch = str[pos];
 185         }
 186         break;
 187     case 'F':
 188     case 'f':
 189         /* Feb */
 190         if ((str[pos+1] == 'e' || str[pos+1] == 'E')
 191             && (str[pos+2] == 'b' || str[pos+2] == 'B')) {
 192             n = 1;
 193             pos += 3;
 194             ch = str[pos];
 195         }
 196         break;
 197     case 'J':
 198     case 'j':
 199         /* Jan */
 200         if ((str[pos+1] == 'a' || str[pos+1] == 'A')
 201             && (str[pos+2] == 'n' || str[pos+2] == 'N')) {
 202             n = 0;
 203             pos += 3;
 204             ch = str[pos];
 205         }
 206         /* Jul */
 207         else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
 208             && (str[pos+2] == 'l' || str[pos+2] == 'L')) {
 209             n = 6;
 210             pos += 3;
 211             ch = str[pos];
 212         }
 213         /* Jun */
 214         else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
 215             && (str[pos+2] == 'n' || str[pos+2] == 'N')) {
 216             n = 5;
 217             pos += 3;
 218             ch = str[pos];
 219         }
 220         break;
 221     case 'M':
 222     case 'm':
 223         /* Mar */
 224         if ((str[pos+1] == 'a' || str[pos+1] == 'A')
 225             && (str[pos+2] == 'r' || str[pos+2] == 'R')) {
 226             n = 2;
 227             pos += 3;
 228             ch = str[pos];
 229         }
 230         /* May */
 231         else if ((str[pos+1] == 'a' || str[pos+1] == 'A')
 232             && (str[pos+2] == 'y' || str[pos+2] == 'Y')) {
 233             n = 4;
 234             pos += 3;
 235             ch = str[pos];
 236         }
 237         break;
 238     case 'N':
 239     case 'n':
 240         /* Nov */
 241         if ((str[pos+1] == 'o' || str[pos+1] == 'O')
 242             && (str[pos+2] == 'v' || str[pos+2] == 'V')) {
 243             n = 10;
 244             pos += 3;
 245             ch = str[pos];
 246         }
 247         break;
 248     case 'O':
 249     case 'o':
 250         /* Oct */
 251         if ((str[pos+1] == 'c' || str[pos+1] == 'c')
 252             && (str[pos+2] == 't' || str[pos+2] == 'T')) {
 253             n = 9;
 254             pos += 3;
 255             ch = str[pos];
 256         }
 257         break;
 258     case 'S':
 259     case 's':
 260         /* Sep */
 261         if ((str[pos+1] == 'e' || str[pos+1] == 'E')
 262             && (str[pos+2] == 'p' || str[pos+2] == 'P')) {
 263             n = 8;
 264             pos += 3;
 265             ch = str[pos];
 266         }
 267         break;
 268     }
 269     if (0 <= n && n <= 11) {
 270         month = n;
 271     }
 272     else {
 273         isValid = 0;
 274     }
 275     /*
 276      * Year -- two or four digits (four preferred)
 277      */
 278     /* -- skip over non-digits */
 279     while (ch && !('0' <= ch && ch <= '9')) {
 280         if (ch == '(') {
 281             pos += CommentLength(&str[pos]);
 282         }
 283         else {
 284             ++pos;
 285         }
 286         ch = str[pos];
 287     }
 288     /* -- convert up to four digits */
 289     n = -1;
 290     if ('0' <= ch && ch <= '9') {
 291         n = ch - '0';
 292         ++pos;
 293         ch = str[pos];
 294     }
 295     if ('0' <= ch && ch <= '9') {
 296         n *= 10;
 297         n += ch - '0';
 298         ++pos;
 299         ch = str[pos];
 300     }
 301     if ('0' <= ch && ch <= '9') {
 302         n *= 10;
 303         n += ch - '0';
 304         ++pos;
 305         ch = str[pos];
 306     }
 307     if ('0' <= ch && ch <= '9') {
 308         n *= 10;
 309         n += ch - '0';
 310         ++pos;
 311         ch = str[pos];
 312     }
 313     if (n != -1) {
 314         /* Fixed year 2000 problem (fix by tony@lasernet.globalnet.co.uk) */
 315         if (n < 70)
 316                 n += 2000; /* When less than 70 assume after year 2000 */
 317         else if (n <= 99)
 318                 n += 1900; /* When >69 and <100 assume 1970 to 1999 */
 319         /* Additional check to limit valid range to 1970 to 2037 */
 320         if ((n >= 1970) && (n < 2038))
 321                 year = n;
 322         else
 323                 isValid = 0;
 324     }
 325     else {
 326         isValid = 0;
 327     }
 328     /*
 329      * Hour -- two digits
 330      */
 331     /* -- skip over non-digits */
 332     while (ch && !('0' <= ch && ch <= '9')) {
 333         if (ch == '(') {
 334             pos += CommentLength(&str[pos]);
 335         }
 336         else {
 337             ++pos;
 338         }
 339         ch = str[pos];
 340     }
 341     /* -- convert next one or two digits */
 342     n = -1;
 343     if ('0' <= ch && ch <= '9') {
 344         n = ch - '0';
 345         ++pos;
 346         ch = str[pos];
 347     }
 348     if ('0' <= ch && ch <= '9') {
 349         n *= 10;
 350         n += ch - '0';
 351         ++pos;
 352         ch = str[pos];
 353     }
 354     if (0 <= n && n <= 23) {
 355         hour = n;
 356     }
 357     else {
 358         isValid = 0;
 359     }
 360     /*
 361      * Minute -- two digits
 362      */
 363     /* -- scan for ':' */
 364     while (ch && ch != ':') {
 365         if (ch == '(') {
 366             pos += CommentLength(&str[pos]);
 367         }
 368         else {
 369             ++pos;
 370         }
 371         ch = str[pos];
 372     }
 373     /* -- skip over non-digits */
 374     while (ch && !('0' <= ch && ch <= '9')) {
 375         if (ch == '(') {
 376             pos += CommentLength(&str[pos]);
 377         }
 378         else {
 379             ++pos;
 380         }
 381         ch = str[pos];
 382     }
 383     /* -- convert next one or two digits */
 384     n = -1;
 385     if ('0' <= ch && ch <= '9') {
 386         n = ch - '0';
 387         ++pos;
 388         ch = str[pos];
 389     }
 390     if ('0' <= ch && ch <= '9') {
 391         n *= 10;
 392         n += ch - '0';
 393         ++pos;
 394         ch = str[pos];
 395     }
 396     if (0 <= n && n <= 59) {
 397         minute = n;
 398     }
 399     else {
 400         isValid = 0;
 401     }
 402     /*
 403      * Second (optional) -- two digits
 404      */
 405     /* -- scan for ':' or start of time zone */
 406     while (ch && !(ch == ':' || ch == '+' || ch == '-' || isalpha(ch))) {
 407         if (ch == '(') {
 408             pos += CommentLength(&str[pos]);
 409         }
 410         else {
 411             ++pos;
 412         }
 413         ch = str[pos];
 414     }
 415     /* -- get the seconds, if it's there */
 416     if (ch == ':') {
 417         ++pos;
 418         /* -- skip non-digits */
 419         ch = str[pos];
 420         while (ch && !('0' <= ch && ch <= '9')) {
 421             if (ch == '(') {
 422                 pos += CommentLength(&str[pos]);
 423             }
 424             else {
 425                 ++pos;
 426             }
 427             ch = str[pos];
 428         }
 429         /* -- convert next one or two digits */
 430         n = -1;
 431         if ('0' <= ch && ch <= '9') {
 432             n = ch - '0';
 433             ++pos;
 434             ch = str[pos];
 435         }
 436         if ('0' <= ch && ch <= '9') {
 437             n *= 10;
 438             n += ch - '0';
 439             ++pos;
 440             ch = str[pos];
 441         }
 442         if (0 <= n && n <= 59) {
 443             second = n;
 444         }
 445         else {
 446             isValid = 0;
 447         }
 448         /* -- scan for start of time zone */
 449         while (ch && !(ch == '+' || ch == '-' || isalpha(ch))) {
 450             if (ch == '(') {
 451                 pos += CommentLength(&str[pos]);
 452             }
 453             else {
 454                 ++pos;
 455             }
 456             ch = str[pos];
 457         }
 458     }
 459     else /* if (ch != ':') */ {
 460         second = 0;
 461     }
 462     /*
 463      * Time zone
 464      *
 465      * Note: According to RFC-1123, the military time zones are specified
 466      * incorrectly in RFC-822.  RFC-1123 then states that "military time
 467      * zones in RFC-822 headers carry no information."
 468      * Here, we follow the specification in RFC-822.  What else could we
 469      * do?  Military time zones should *never* be used!
 470      */
 471     sgn = 1;
 472     numZoneDigits = 0;
 473     switch (ch) {
 474     case '-':
 475         sgn = -1;
 476         /* fall through */
 477     case '+':
 478         ++pos;
 479         /* -- skip non-digits */
 480         ch = str[pos];
 481         while (ch && !('0' <= ch && ch <= '9')) {
 482             ++pos;
 483             ch = str[pos];
 484         }
 485         while( str[pos + numZoneDigits] && isdigit(str[pos + numZoneDigits] ) )
 486             ++numZoneDigits;
 487         /* -- convert next four digits */
 488         n = 0;
 489         while ( numZoneDigits ) {
 490             switch(numZoneDigits) {
 491             case 4:
 492                 if ('0' <= ch && ch <= '9') {
 493                     n = (ch - '0')*600;
 494                     ++pos;
 495                     ch = str[pos];
 496                 }
 497                 break;
 498             case 3:
 499                 if ('0' <= ch && ch <= '9') {
 500                     n += (ch - '0')*60;
 501                     ++pos;
 502                     ch = str[pos];
 503                 }
 504                 break;
 505             case 2:
 506                 if ('0' <= ch && ch <= '9') {
 507                     n += (ch - '0')*10;
 508                     ++pos;
 509                     ch = str[pos];
 510                 }
 511                 break;
 512             case 1:
 513                 if ('0' <= ch && ch <= '9') {
 514                     n += ch - '0';
 515                 }
 516                 break;
 517             default:
 518                 break;
 519             }
 520             --numZoneDigits;
 521         }
 522         zone = sgn*n;
 523         break;
 524     case 'U':
 525     case 'u':
 526         if (str[pos+1] == 'T' || str[pos+1] == 't') {
 527             zone = 0;
 528         }
 529         else {
 530             /* Military time zone */
 531             zone = 480;
 532         }
 533         break;
 534     case 'G':
 535     case 'g':
 536         if ((str[pos+1] == 'M' || str[pos+1] == 'm')
 537             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
 538             zone = 0;
 539         }
 540         else {
 541             /* Military time zone */
 542             zone = -420;
 543         }
 544         break;
 545     case 'E':
 546     case 'e':
 547         if ((str[pos+1] == 'S' || str[pos+1] == 's')
 548             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
 549             zone = -300;
 550         }
 551         else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
 552             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
 553             zone = -240;
 554         }
 555         else {
 556             /* Military time zone */
 557             zone = -300;
 558         }
 559         break;
 560     case 'C':
 561     case 'c':
 562         if ((str[pos+1] == 'S' || str[pos+1] == 's')
 563             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
 564             zone = -360;
 565         }
 566         else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
 567             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
 568             zone = -300;
 569         }
 570         else {
 571             /* Military time zone */
 572             zone = -180;
 573         }
 574         break;
 575     case 'M':
 576     case 'm':
 577         if ((str[pos+1] == 'S' || str[pos+1] == 's')
 578             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
 579             zone = -420;
 580         }
 581         else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
 582             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
 583             zone = -360;
 584         }
 585         else {
 586             /* Military time zone */
 587             zone = -720;
 588         }
 589         break;
 590     case 'P':
 591     case 'p':
 592         if ((str[pos+1] == 'S' || str[pos+1] == 's')
 593             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
 594             zone = -480;
 595         }
 596         else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
 597             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
 598             zone = -420;
 599         }
 600         else {
 601             /* Military time zone */
 602             zone = 180;
 603         }
 604         break;
 605     case 'Z':
 606         /* Military time zone */
 607         zone = 0;
 608         break;
 609     default:
 610         /* Military time zone */
 611         if ('A' <= ch && ch <= 'I') {
 612             zone = 'A' - 1 - ch;
 613         }
 614         else if ('K' <= ch && ch <= 'M') {
 615             zone = 'A' - ch;
 616         }
 617         else if ('N' <= ch && ch <= 'Y') {
 618             zone = ch - 'N' + 1;
 619         }
 620         /* Some software doesn't set the timezone, so we default
 621            to +/-0 so KMail isn't too strict. --dnaber@mini.gt.owl.de, 2000-06-11
 622         else {
 623             isValid = 0;
 624         } */
 625         break;
 626     }
 627     if (isValid) {
 628         if (tms) {
 629             tms->tm_year = year - 1900;
 630             tms->tm_mon  = month;
 631             tms->tm_mday = day;
 632             tms->tm_hour = hour;
 633             tms->tm_min  = minute;
 634             tms->tm_sec  = second;
 635         }
 636         if (z) {
 637             *z = zone;
 638         }
 639     }
 640     else {
 641         if (tms) {
 642             tms->tm_year = 70;
 643             tms->tm_mon  = 0;
 644             tms->tm_mday = 1;
 645             tms->tm_hour = 0;
 646             tms->tm_min  = 0;
 647             tms->tm_sec  = 0;
 648         }
 649         if (z) {
 650             *z = 0;
 651         }
 652     }
 653     return isValid ? 0 : -1;
 654 }
 655
 656
 657 #ifdef DW_TESTING_DATEPARSER
 658
 659 #include <stdio.h>
 660 #include <stdlib.h>
 661 #include <limits.h>
 662
 663 const char* testStr[] = {
 664     ""
 665 };
 666
 667 const char* wdays[] = {
 668     "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
 669 };
 670
 671 const char* months[] = {
 672     "Jan", "Feb", "Mar", "Apr", "May", "Jun",
 673     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
 674 };
 675
 676 int main()
 677 {
 678     struct tm *ptms, tms1, tms2;
 679     time_t tt;
 680     int i, zone1, zone2;
 681     char buf[100], sgn;
 682
 683     /* try a bunch of random dates */
 684     srand(100);
 685     for (i=0; i < 1000; ++i) {
 686         tt = rand()*((double)0x7fffffff/RAND_MAX);
 687         zone1 = (rand()%49 - 24)*30;
 688         gmtime(&tt, &ptms);
 689         tms1 = *ptms;
 690         sgn = (zone1 >= 0) ? '+' : '-';
 691         sprintf(buf, "%s, %2d %s %d %d%d:%d%d:%d%d %c%d%d%d%d",
 692             wdays[tms1.tm_wday], tms1.tm_mday, months[tms1.tm_mon],
 693             tms1.tm_year+1900,
 694             tms1.tm_hour/10, tms1.tm_hour%10,
 695             tms1.tm_min/10, tms1.tm_min%10,
 696             tms1.tm_sec/10, tms1.tm_sec%10,
 697             sgn, abs(zone1)/60/10, abs(zone1)/60%10,
 698             abs(zone1)%60/10, abs(zone1)%60%10);
 699         ParseRfc822Date(buf, &tms2, &zone2);
 700         if (tms1.tm_year != tms2.tm_year) {
 701             fprintf(stderr, "Bad year\n");
 702         }
 703         if (tms1.tm_mon != tms2.tm_mon) {
 704             fprintf(stderr, "Bad month\n");
 705         }
 706         if (tms1.tm_mday != tms2.tm_mday) {
 707             fprintf(stderr, "Bad day\n");
 708         }
 709         if (tms1.tm_hour != tms2.tm_hour) {
 710             fprintf(stderr, "Bad hour\n");
 711         }
 712         if (tms1.tm_min != tms2.tm_min) {
 713             fprintf(stderr, "Bad minute\n");
 714         }
 715         if (tms1.tm_sec != tms2.tm_sec) {
 716             fprintf(stderr, "Bad second\n");
 717         }
 718         if (zone1 != zone2) {
 719             fprintf(stderr, "Bad zone\n");
 720         }
 721     }
 722     return 0;
 723 }
 724
 725 #endif